# 🧠 Simpsons Character Classifier

This notebook implements a deep learning-based image classification model that can identify characters from *The Simpsons* using a Convolutional Neural Network (CNN). It uses the top 10 characters from the official Kaggle dataset.

## 📁 1. Dataset and Setup

We begin by setting up the image size, reading the dataset, and filtering the top 10 characters with the most images.

## 🧼 2. Preprocessing

Images are normalized, reshaped, and labels are one-hot encoded. We also split the dataset into training and validation sets.

## 🧱 3. Model Architecture

We define a multi-layer CNN with increasing depth, max-pooling, dropout, and a final softmax layer for 10-class prediction.

## 🏋️ 4. Training the Model

We compile the model with SGD, use a custom learning rate schedule, and train it for 10 epochs using a data generator.

## 🧪 5. Testing and Predictions

We test the trained model on a sample image and visualize the predicted result.

## ✅ Conclusion

This model demonstrates effective character recognition from images using CNNs. You can further expand it to include more characters, add GUI for predictions, or deploy using Streamlit/Flask.

!pip install caer canaro

import os
import caer
import canaro
import numpy as np
import cv2 as cv
import gc
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import SGD

IMG_SIZE = (80,80)
channels = 1
char_path = r'/kaggle/input/the-simpsons-characters-dataset/simpsons_dataset'

char_dict = {}
for char in os.listdir(char_path):
    char_dict[char] = len(os.listdir(os.path.join(char_path, char)))
#grab all the folders in the path and find no. of images in them

char_dict = caer.sort_dict(char_dict, descending = True)
char_dict

#now adding characters
characters = []
count = 0
for i in char_dict:
    characters.append(i[0])
    count += 1
    if count >= 10:
        break
characters
    

#training data
train = caer.preprocess_from_dir(char_path, characters, channels = channels, IMG_SIZE=IMG_SIZE, isShuffle = True)
len(train)

featureSet, labels = caer.sep_train(train, IMG_SIZE = IMG_SIZE)
#Seperate training set into features and labels

from tensorflow.keras.utils import to_categorical
#to_categorical is used to convert class labels (integers) into one-hot encoded vectors.
#normalize the featureset (0,1)
featureSet = caer.normalize(featureSet)
labels = to_categorical(labels, len(characters))

x_train, x_val, y_train, y_val = caer.train_val_split(featureSet, labels, val_ratio = .2 )
#20 percent goes to validation set and 80 percent goes to the training

del train
del featureSet
del labels

gc.collect()

BATCH_SIZE = 32
EPOCHS = 20

#image data generator
datagen = canaro.generators.imageDataGenerator()
train_gen = datagen.flow(x_train, y_train, batch_size = 32)

#creating the model
w, h = IMG_SIZE[:2]

output_dim = 10

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(w, h,channels)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(256, (3, 3), padding='same', activation='relu')) 
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(1024, activation='relu'))

# Output Layer
model.add(Dense(output_dim, activation='softmax'))

model.summary()


# Training the model
optimizer = SGD(learning_rate=0.001, decay=1e-7, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

callbacks_list = [LearningRateScheduler(canaro.lr_schedule)]

training = model.fit(train_gen,
                    steps_per_epoch = len(x_train) // BATCH_SIZE, epochs = EPOCHS,
                    validation_data = (x_val, y_val), validation_steps = len(y_val) // BATCH_SIZE,
                    callbacks = callbacks_list)

#testing
test_path = r'/kaggle/input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/milhouse_van_houten_15.jpg'

img = cv.imread(test_path)

plt.imshow(img)
plt.show()


def prepare(img):
    img = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
    img = cv.resize(img, IMG_SIZE)
    img = caer.reshape(img, IMG_SIZE, 1)
    return img

predictions = model.predict(prepare(img))

predictions

# Getting class with the highest probability
print(characters[np.argmax(predictions[0])])