In [1]:
%pip install --upgrade caer canaro

Collecting caer
  Downloading caer-2.0.8-py3-none-any.whl.metadata (7.8 kB)
Collecting canaro
  Downloading canaro-1.1.0-py3-none-any.whl.metadata (2.6 kB)
Collecting mypy (from caer)
  Downloading mypy-1.17.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (2.2 kB)
Collecting pathspec>=0.9.0 (from mypy->caer)
  Downloading pathspec-0.12.1-py3-none-any.whl.metadata (21 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.3 (from tensorflow->canaro)
  Downloading protobuf-5.29.5-cp38-abi3-macosx_10_9_universal2.whl.metadata (592 bytes)
Downloading caer-2.0.8-py3-none-any.whl (809 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m809.5/809.5 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading canaro-1.1.0-py3-none-any.whl (19 kB)
Downloading mypy-1.17.0-cp310-cp310-macosx_11_0_arm64.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m2.8 MB/s[0m eta [36m0:00:0

In [None]:
import os 
import caer 
import canaro 
import numpy as np 
import cv2 as cv 
import gc 

In [None]:
IMG_SIZE = (80, 80)
channels = 1 
char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'

In [None]:
# Creating a character dictionary, sorting it in descending order 
char_dict = {}
for char in os.listdir(char_path):
    char_dict[char] = len(os.listdir(os.path.join(char_path, char)))
    
# Sort in descending order 
char_dict = caer.sort_dict(char_dict, descending=True)
char_dict 

In [None]:
# Getting the first 10 categories with thye most number of images 
characters = []
count = 0 
for i in char_dict:
    characters.append(i[0])
    count += 1 
    if count >= 10: 
        break 
characters 

In [None]:
# Create the training data 
train = caer.preprocess_from_dir(char_path, characters, channels=channels, IMG_SIZE=IMG_SIZE, isShuffle=True)

In [None]:
# Number of training samples 
len(train)

In [None]:
# Visualizing the data (OpenCV doesn't display wel in Jupyter notebooks)
import matplotlib.pyplot as plt 
plt.figure(figsize=(30,30))
plt.imshow(train[0][0], cmap='gray')
plt.show()

In [None]:
# Separating the array and corresponding labels 
featureSet, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE)

In [None]:
from tensorflow.keras.utils import to_categorical 

# Normalize the featureSet ==> (0,1)
featureSet = caer.normalize(featureSet)
# Converting numerical labels to binary class vectors 
labels = to_categorical(labels, len(characters))

In [None]:
x_train, x_val, y_train, y_val = caer.train_val_split(featureSet, labels, val_ratio=.2)

In [None]:
del train 
del featureSet 
del labels 
gc.collect()

In [None]:
# Useful variables when training 
BATCH_SIZE = 32 
EPOCHS = 10 

In [None]:
# Image data generator (introduces randomness in network ==> better accuracy)
import canaro.generators


datagen = canaro.generators.imageDataGenerator()
train_gen = datagen.flow(x_train, y_train, batch_size=BATCH_SIZE)