In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
pip install caer canaro

In [None]:
import os
import caer
import canaro
import numpy as np
import cv2 as cv
import gc

In [None]:
IMG_SIZE = (80,80)
channels = 1
char_path = r'/kaggle/input/the-simpsons-characters-dataset/simpsons_dataset'

In [None]:
char_dict = {}
for char in os.listdir(char_path):
    char_dict[char] = len(os.listdir(os.path.join(char_path,char)))

#sort in ascending order
char_dict = caer.sort_dict(char_dict, descending = True)
char_dict


In [None]:
characters = []
count = 0
for i in char_dict:
    characters.append(i[0])
    count += 1
    if count>= 10:
        break
characters    

In [None]:
# Create the training data
train = caer.preprocess_from_dir(char_path, characters, channels = channels, IMG_SIZE = IMG_SIZE, isShuffle = True)


In [None]:
len(train)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(30,30))
plt.imshow(train[0][0], cmap = 'gray')
plt.show()

In [None]:
featureSet,labels = caer.sep_train(train, IMG_SIZE = IMG_SIZE)
#Converts the featureSet into 4D TENSORFLOW SO THAT IT CAN BE FED INTO THE MODEL

In [None]:
from tensorflow.keras.utils import to_categorical
# Normalize the featureset (0,1)
# To make the model learn faster
featureSet = caer.normalize(featureSet)
labels = to_categorical(labels, len(characters))



In [None]:
x_train, x_val , y_train, y_val = caer.train_val_split(featureSet, labels, val_ratio = .2)

In [None]:
BATCH_SIZE = 32
EPOCHS = 10

In [None]:
del train
del featureSet
del labels
gc.collect()

In [None]:
#Image data generator
# it will generate new images from existing ones and make them random and make it perform better
datagen = canaro.generators.imageDataGenerator()
train_gen = datagen.flow(x_train,y_train, batch_size = BATCH_SIZE)

In [None]:
# Create our model (returns a compiled model)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D

output_dim=10

w, h = IMG_SIZE[:2]

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(w, h,channels)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(256, (3, 3), padding='same', activation='relu')) 
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(1024, activation='relu'))

# Output Layer
model.add(Dense(output_dim, activation='softmax'))

In [None]:
model.summary()

In [None]:
from tensorflow.keras.optimizers import SGD

# Define the optimizer without decay
optimizer = SGD(learning_rate=0.001, momentum=0.9, nesterov=True)

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [None]:
characters

In [None]:
from tensorflow.keras.callbacks import LearningRateScheduler
callbacks_list = [LearningRateScheduler(canaro.lr_schedule)]
training = model.fit(train_gen,
                    steps_per_epoch=len(x_train)//BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=(x_val,y_val),
                    validation_steps=len(y_val)//BATCH_SIZE,
                    callbacks = callbacks_list)

In [None]:
test_path = r'/kaggle/input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/bart_simpson_19.jpg'

img = cv.imread(test_path)

plt.imshow(img)
plt.show()

In [None]:
def prepare(image):
    image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    image = cv.resize(image, IMG_SIZE)
    image = caer.reshape(image, IMG_SIZE, 1)
    return image

In [None]:
predictions = model.predict(prepare(img))

In [None]:
# Getting class with the highest probability
print(characters[np.argmax(predictions[0])])