In [1]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

IMAGE_SIZE = 64  #for consistency in dataset convert every image to 64X64
NUM_CLASSES = 10  


In [2]:
def load_data(folder_path):
    X, y = [],[]
    class_names = sorted([d for d in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, d))])
    label_map = {name: idx for idx, name in enumerate(class_names)}

    for class_name in class_names:
        class_dir = os.path.join(folder_path, class_name)
        img_files = os.listdir(class_dir)
        if not img_files:
            print(f"Warning: No images found in {class_dir}")
        for img_file in img_files:
            try:
                img_path = os.path.join(class_dir, img_file)
                img = Image.open(img_path).convert('L').resize((IMAGE_SIZE, IMAGE_SIZE))  # Grayscale conversion
                img_array = np.asarray(img, dtype=np.float32) / 255.0  # Normalize to [0, 1]
                X.append(img_array.flatten())  # Flatten to 1D
                y.append(label_map[class_name])
            except Exception as e:
                print(f"Skipping {img_path} due to {e}")
                continue

    return np.array(X), np.array(y), label_map




In [3]:
def one_hot_encode(y, num_classes):
    one_hot = np.zeros((len(y), num_classes))
    for i, label in enumerate(y):
        one_hot[i, label] = 1
    return one_hot


In [4]:
def shuffle_data(X, y):
    assert len(X) == len(y)
    permutation = np.random.permutation(len(X))
    X_shuffled = X[permutation]
    y_shuffled = y[permutation]
    return X_shuffled, y_shuffled



In [5]:

# Load and shuffle training data
X_train, y_train, label_map = load_data("simpson_train_augmented")
X_train, y_train = shuffle_data(X_train, y_train)

# One-hot encode training labels
y_train_encoded = one_hot_encode(y_train, NUM_CLASSES)

# Load and shuffle test data
X_test, y_test, _ = load_data("test (1)/test")
X_test, y_test = shuffle_data(X_test, y_test)

# One-hot encode test labels
y_test_encoded = one_hot_encode(y_test, NUM_CLASSES)

def get_batches(X, Y, batch_size):
    for i in range(0, X.shape[0], batch_size):
        X_batch = X[i:i + batch_size]
        Y_batch = Y[i:i + batch_size]
        yield X_batch, Y_batch

In [6]:
print("Training samples:", X_train.shape)
print("Test samples:", X_test.shape)
print("Image size:", IMAGE_SIZE, "x", IMAGE_SIZE)
print("Number of classes:", NUM_CLASSES)
print("Label map:", label_map)
print(f"X_train: {X_train}")
print(f"y_train_encoded: {y_train_encoded}")

Training samples: (50000, 4096)
Test samples: (2000, 4096)
Image size: 64 x 64
Number of classes: 10
Label map: {'bart_simpson': 0, 'charles_montgomery_burns': 1, 'homer_simpson': 2, 'krusty_the_clown': 3, 'lisa_simpson': 4, 'marge_simpson': 5, 'milhouse_van_houten': 6, 'moe_szyslak': 7, 'ned_flanders': 8, 'principal_skinner': 9}
X_train: [[0.08235294 0.0627451  0.03137255 ... 0.         0.         0.        ]
 [0.49411765 0.5137255  0.5568628  ... 0.29803923 0.30588236 0.3019608 ]
 [0.43529412 0.43529412 0.43529412 ... 0.34117648 0.34509805 0.34509805]
 ...
 [0.32156864 0.3137255  0.29411766 ... 0.29411766 0.29411766 0.29411766]
 [0.47058824 0.47058824 0.46666667 ... 0.52156866 0.52156866 0.52156866]
 [0.10588235 0.09411765 0.08627451 ... 0.15686275 0.15686275 0.15686275]]
y_train_encoded: [[0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]


In [7]:
# Network architecture
input_size = IMAGE_SIZE * IMAGE_SIZE
hidden_size1 = 512
hidden_size2 = 128
output_size = 10
        


def leaky_relu(Z, alpha=0.01):
    return np.where(Z > 0, Z, alpha * Z)

def leaky_relu_deriv(Z, alpha=0.01):
    return np.where(Z > 0, 1.0, alpha)


def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return expZ / np.sum(expZ, axis=1, keepdims=True)


In [8]:
#initialising the parameters
np.random.seed(42)

W1 = np.random.randn(input_size, hidden_size1) * np.sqrt(2. / input_size)
b1 = np.zeros((1, hidden_size1))

W2 = np.random.randn(hidden_size1, hidden_size2) * np.sqrt(2. / hidden_size1)
b2 = np.zeros((1, hidden_size2))

W3 = np.random.randn(hidden_size2, output_size) * np.sqrt(2. / hidden_size2)
b3 = np.zeros((1, output_size))




In [9]:
def forward_propagation(X, W1, b1, W2, b2, W3, b3):
    # Layer 1
    Z1 = np.dot(X, W1) + b1
    A1 = leaky_relu(Z1)

    # Layer 2
    Z2 = np.dot(A1, W2) + b2
    A2 = leaky_relu(Z2)

    # Output layer
    Z3 = np.dot(A2, W3) + b3
    A3 = softmax(Z3)

    cache = {
        "Z1": Z1, "A1": A1,
        "Z2": Z2, "A2": A2,
        "Z3": Z3, "A3": A3
    }

    return A3, cache




In [10]:
def compute_loss(A3, Y):
    m = Y.shape[0]
    loss = -np.sum(Y * np.log(A3 + 1e-9)) / m
    return loss


In [11]:
def backward_propagation(X, Y, W1, b1, W2, b2, W3, b3, cache):
    Z1, A1 = cache['Z1'], cache['A1']
    Z2, A2 = cache['Z2'], cache['A2']
    Z3, A3 = cache['Z3'], cache['A3']  # output layer pre-activation and activation (softmax)
    m = X.shape[0]

    # Output layer: Softmax + Cross-Entropy derivative
    dZ3 = A3 - Y  # (m, output_size)
    dW3 = A2.T @ dZ3 / m
    db3 = np.sum(dZ3, axis=0, keepdims=True) / m

    # Layer 2 (hidden): Leaky ReLU backward
    dA2 = dZ3 @ W3.T
    dZ2 = dA2 * leaky_relu_deriv(Z2)
    dW2 = A1.T @ dZ2 / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    # Layer 1 (hidden): Leaky ReLU backward
    dA1 = dZ2 @ W2.T
    dZ1 = dA1 * leaky_relu_deriv(Z1)
    dW1 = X.T @ dZ1 / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    return dW1, db1, dW2, db2, dW3, db3



In [12]:
def update_parameters(W1, b1, W2, b2, W3, b3,
                      dW1, db1, dW2, db2, dW3, db3,
                      learning_rate):
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1

    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2

    W3 = W3 - learning_rate * dW3
    b3 = b3 - learning_rate * db3

    return W1, b1, W2, b2, W3, b3



In [13]:
num_epochs = 50
learning_rate = 0.01
batch_size = 64

for epoch in range(num_epochs):
    total_loss = 0
    for X_batch, y_batch in get_batches(X_train, y_train_encoded, batch_size):
        # Forward pass
        A3, cache = forward_propagation(X_batch, W1, b1, W2, b2, W3, b3)

        # Compute batch loss
        loss = compute_loss(A3, y_batch)
        total_loss += loss

        # Backward pass
        dW1, db1, dW2, db2, dW3, db3 = backward_propagation(
            X_batch, y_batch, W1, b1, W2, b2, W3, b3, cache
        )

        # Update parameters
        W1, b1, W2, b2, W3, b3 = update_parameters(
            W1, b1, W2, b2, W3, b3,
            dW1, db1, dW2, db2, dW3, db3,
            learning_rate
        )

    # Print average loss after each epoch
    if epoch % 10 == 0 or epoch == num_epochs - 1:
        avg_loss = total_loss / (X_train.shape[0] // batch_size)
        print(f"Epoch {epoch + 1}/{num_epochs} - Loss: {avg_loss:.4f}")


Epoch 1/50 - Loss: 2.1305
Epoch 11/50 - Loss: 1.4322
Epoch 21/50 - Loss: 0.9083
Epoch 31/50 - Loss: 0.5691
Epoch 41/50 - Loss: 0.3674
Epoch 50/50 - Loss: 0.2551


In [14]:
def predict(X, W1, b1, W2, b2, W3, b3):
    A4, _ = forward_propagation(X, W1, b1, W2, b2, W3, b3)
    predictions = np.argmax(A4, axis=1)  # Class with highest probability
    return predictions

def compute_accuracy(predictions, true_labels):
    return np.mean(predictions == true_labels)

# After training
test_predictions = predict(X_test, W1, b1, W2, b2, W3, b3)
accuracy = compute_accuracy(test_predictions, y_test)
print(f"Test set accuracy: {accuracy * 100:.2f}%")


Test set accuracy: 47.70%
