In [15]:
!pip install caer canaro
    

In [16]:
import caer
import os 
import canaro
import numpy as np
import cv2 as cv
import gc

In [17]:
char_path=r'../input/the-simpsons-characters-dataset/simpsons_dataset' #base path of folder where all data of images is there
channels=1  #no. of colour channels =1 (for gray)
IMG_SIZE=(80,80)  #img data should be of same size

In [18]:
char_dict={}
#getting into every folder name and storing all the data set
    #in char_dict dictionary
      
for char in os.listdir(char_path):
    char_dict[char]=len(os.listdir(os.path.join(char_path,char)))
#sorting in descending order
char_dict=caer.sort_dict(char_dict,descending=True)
char_dict

In [19]:
#stroing top 10 characters(character having most number of images in data set)
count=0
characters=[]
for i in char_dict:
    characters.append(i[0])
    count+=1
    if(count>=10):
        break
characters

In [20]:
#creating the training data
#getting into character folder of characters(top10) and giving image to train for training purpose

train=caer.preprocess_from_dir(char_path,characters,channels=channels,IMG_SIZE=IMG_SIZE,isShuffle=True)

In [21]:
len(train) #total images sent for trainning

In [22]:
#displaying the image training()
import matplotlib.pyplot as plt
plt.figure(figsize=(30,30))
plt.imshow(train[0][0],cmap='gray')
plt.show()

In [23]:
#train was a list containing 13133 images,so we divide it into labels ,and a features set which is a array containing
#features for recognition
featureSet,labels=caer.sep_train(train,IMG_SIZE=IMG_SIZE)


In [24]:
#Normalization of features=>(0,1) ,so that network will be able to learn data more faster
featureSet=caer.normalize(featureSet)

#we dont normalize the labels so we convert them from numerical integers to binary class vectors
from tensorflow.keras.utils import to_categorical
labels=to_categorical(labels,len(characters))

In [25]:
x_train,x_val,y_train,y_val=caer.train_val_split(featureSet,labels,val_ratio=.2)
#x_train,y_train are used as training data set for the model
#x_val ,y_val are used for validation so a model can test itself
#0.2 means 80%for training data set and 20%for validation set

In [26]:
#we imported gc which meant garbage collection
#Now the data set not required we will be freeing them up
del train
del featureSet
del labels
gc.collect()

In [27]:
BATCH_SIZE=32
EPOCHS=10

In [28]:
#Image data generator->which senthesizes new images from already existing images to help introduce some 
#randomness to our network.

datagen=canaro.generators.imageDataGenerator()
train_gen=datagen.flow(x_train,y_train,batch_size=BATCH_SIZE)

In [29]:
#creating model
model=canaro.models.createSimpsonsModel(IMG_SIZE=IMG_SIZE,channels=channels,output_dim=len(characters)
                                       ,loss='binary_crossentropy',decay=1e-6,learning_rate=0.001,momentum=0.9
                                       ,nesterov=True)

In [30]:
model.summary()

In [31]:
#callback lists->will contain something called a learning rate schedule that will essentially schedule the
#learning rate at specific intervals so that our network can essentially train better
from tensorflow.keras.callbacks import LearningRateScheduler
callbacks_list=[LearningRateScheduler(canaro.lr_schedule)]

In [32]:
training=model.fit(train_gen,steps_per_epoch=len(x_train)//BATCH_SIZE,
                  epochs=EPOCHS,
                    validation_data=(x_val,y_val),
                   validation_steps=len(y_val)//BATCH_SIZE,
                  callbacks=callbacks_list)


In [34]:
characters


In [45]:
#testing our model
test_path=r'../input/the-simpsons-characters-dataset/simpsons_dataset/bart_simpson/pic_0032.jpg'
img=cv.imread(test_path)

plt.imshow(img)
plt.show()
def prepare(img):
    img=cv.cvtColor(img,cv.COLOR_BGR2GRAY)
    img=cv.resize(img,IMG_SIZE)
    img=caer.reshape(img,IMG_SIZE,1)
    return img



In [46]:
predictions=model.predict(prepare(img))

In [47]:
predictions


In [48]:
print(characters[np.argmax(predictions[0])])