In [None]:
import os
import numpy as np
import cv2 as cv
import gc
from tqdm.notebook import tqdm

In [None]:
# Keras/TensorFlow Imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical

In [None]:
# -------------------------------------------------------------------
# 1. SETUP VARIABLES AND DATA PATHS
# -------------------------------------------------------------------
IMG_SIZE = (80, 80)
channels = 1  # 1 for grayscale, 3 for color (RGB)
char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'

In [None]:
# Creating a character dictionary and sorting it
char_dict = {}
for char in os.listdir(char_path):
    # Only process directories
    if os.path.isdir(os.path.join(char_path, char)):
        try:
            char_dict[char] = len(os.listdir(os.path.join(char_path, char)))
        except Exception:
            # Handle permissions or other errors if necessary
            pass

# Sort in descending order (Standard Python Replacement for caer.sort_dict)
sorted_char_dict_list = sorted(char_dict.items(), key=lambda item: item[1], reverse=True)

In [None]:
# Select the top 10 characters to use as classes (common for this dataset)
characters = [item[0] for item in sorted_char_dict_list[:10]]
num_classes = len(characters)

In [None]:
print(f"Number of classes selected: {num_classes}")
print(f"Classes: {characters}")
print("-" * 30)

In [None]:
# -------------------------------------------------------------------
# 2. CUSTOM PREPROCESSING FUNCTION (Replacement for caer.preprocess_from_dir)
# -------------------------------------------------------------------

def preprocess_images_custom(DIR, classes, IMG_SIZE, channels):
    data = []
    
    # Create a mapping from class name to numerical index (0 to num_classes-1)
    class_to_index = {class_name: i for i, class_name in enumerate(classes)}
    
    print("[INFO] Starting preprocessing...")

    for class_name in tqdm(classes, desc="Processing Classes"):
        class_path = os.path.join(DIR, class_name)
        class_index = class_to_index[class_name]
        
        if not os.path.exists(class_path):
            continue
            
        # Iterate over all files in the class directory
        for img_name in os.listdir(class_path):
            try:
                img_path = os.path.join(class_path, img_name)
                
                # Load image using OpenCV
                # Use cv.IMREAD_GRAYSCALE for channels=1, or cv.IMREAD_COLOR for channels=3
                if channels == 1:
                    img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
                else:
                    # By default, load color and convert to RGB (OpenCV loads BGR)
                    img = cv.imread(img_path)
                    img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
                
                if img is None:
                    continue
                    
                # Resize image
                resized_img = cv.resize(img, IMG_SIZE)
                
                # Append the image (features) and the class index (label)
                data.append([resized_img, class_index])
                
            except Exception as e:
                # print(f"Error loading {img_name}: {e}")
                pass
    
    print("[INFO] Preprocessing complete. Shuffling data...")
    # Shuffle the data
    np.random.shuffle(data)
    
    # Separate features (X) and labels (y)
    X = np.array([item[0] for item in data])
    y = np.array([item[1] for item in data])
    
    # Reshape features to (N, H, W, C)
    # The -1 means infer the number of samples (N)
    X = X.reshape(-1, IMG_SIZE[0], IMG_SIZE[1], channels)
    
    # Normalize features
    X = X.astype('float32') / 255.0
    
    # Convert labels to categorical (one-hot encoding)
    y = to_categorical(y, num_classes=len(classes))
    
    print(f"[INFO] Features shape: {X.shape}")
    print(f"[INFO] Labels shape: {y.shape}")
    
    return X, y

In [None]:
# -------------------------------------------------------------------
# 3. CUSTOM MODEL FUNCTION (Replacement for canaro.createDefaultModel)
# -------------------------------------------------------------------

def create_simpsons_classifier_model(IMG_SIZE, channels, num_classes):
    model = Sequential()
    
    # Layer 1: Conv -> MaxPool -> Dropout
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], channels)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    
    # Layer 2: Conv -> MaxPool -> Dropout
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # Layer 3: Conv -> MaxPool -> Dropout
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    
    # Classification head
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))  # Output layer with num_classes
    
    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [None]:
# -------------------------------------------------------------------
# 4. EXECUTION
# -------------------------------------------------------------------

# 4.1 Create the training data
X_train, y_train = preprocess_images_custom(char_path, characters, IMG_SIZE, channels)

# Clear memory
gc.collect()

In [None]:
# 4.2 Create and train the model
model = create_simpsons_classifier_model(IMG_SIZE, channels, num_classes)

BATCH_SIZE = 32
EPOCHS = 10

print("-" * 30)
print(f"Starting model training for {EPOCHS} epochs...")
model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.2)

# Clear memory
del X_train
del y_train
gc.collect()

In [None]:
# 4.3 Prediction Utility (Replacement for 'prepare' function using caer)
def prepare_image(img_path, IMG_SIZE, channels):
    try:
        if channels == 1:
            img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
        else:
            img = cv.imread(img_path)
            img = cv.cvtColor(img, cv.COLOR_BGR2RGB)  # Convert BGR to RGB
            
        if img is None:
            raise FileNotFoundError(f"Image not found at {img_path}")
            
        resized_img = cv.resize(img, IMG_SIZE)
        
        # Reshape for Keras (1, H, W, C) and normalize
        reshaped_img = resized_img.reshape(1, IMG_SIZE[0], IMG_SIZE[1], channels) / 255.0
        
        return reshaped_img
    except Exception as e:
        print(f"Error in prepare_image: {e}")
        return None

In [None]:
# 4.4 Example Prediction and Saving
# Example test image path - uses first image from first character class
test_img_path = os.path.join(char_path, characters[0], os.listdir(os.path.join(char_path, characters[0]))[0])

# Prepare the image
prepared_img = prepare_image(test_img_path, IMG_SIZE, channels)

if prepared_img is not None:
    # Make prediction
    predictions = model.predict(prepared_img)

    # Get class with the highest probability
    predicted_index = np.argmax(predictions[0])
    predicted_class = characters[predicted_index]
    
    print("-" * 30)
    print(f"Predicted class index: {predicted_index}")
    print(f"Predicted character: {predicted_class}")

    # Save the model
    model.save('simpsons_classifier.h5')
    print("Model saved to 'simpsons_classifier.h5'")
else:
    print("Skipping prediction and saving due to image loading error.")