#CSC2042S 2025
## Assignment 2
## Perceptron Image Classification
### Maryam Abrahams (ABRMAR043)

In [1]:
# Imports and Setup

import numpy as np
import os
import matplotlib.pyplot as plt

from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler


## Task 1: Data Processing

We start by loading the Simpsons-MNIST dataset from the directory structure and handling it so that we can create train/validation splits and prepare the data for the perceptron model.

We'll do this by creating a function, load, which takes in the parameters (base_path: the path to the main dataset directory, mode: color vs gray, size: target image size ) and which returns a numpy array of converted images (both images and labels), as well as label_map: a dictionary mapping the folder names to numeric labels.



In [2]:
# Loading the dataset 

def load(base_path, mode="grayscale", size = (28, 28)):

    path = os.path.join(base_path, 'dataset', mode, 'train')
    
    characters = ['bart_simpson', 'charles_montgomery_burns', 'homer_simpson',
                 'krusty_the_clown', 'lisa_simpson', 'marge_simpson',
                 'milhouse_van_houten', 'moe_szyslak', 'ned_flanders',
                 'principal_skinner']

    label_map = {char: idx for idx, char in enumerate(characters)}
    images = []
    labels = []

    print(f"Loading {mode} images from: {path}\n")

    for character in characters: 
        char_path = os.path.join(path, character)

        if not os.path.exists(char_path): 
            print(f"Warning: Directory {char_path} does not exist\n")
            continue

        for file in os.listdir(char_path): 
            if file.endswith(".jpg"): 
                img_path = os.path.join(char_path, file)

                try: 
                    with Image.open(img_path) as img: 
                        if mode == "grayscale": 
                            img = img.convert("L") # make grescale
                        else: 
                            img = img.convert("RGB") # make colorful

                        if img.size != size: 
                            img = img.resize(size)

                        img_array = np.array(img)
                        images.append(img_array) 
                        labels.append(label_map[character])
                        
                except Exception as e: 
                    print(f"Error loading {img_path}: {e}\n")

# Converting the images to numpy arrays
    
    X = np.array(images) 
    y = np.array(labels) 

    print(f"Loaded {X.shape[0]} {mode} images with shape {X.shape[1:]}\n")
    return X, y, label_map

In [3]:
# Loading both the gray and colored datasets: 

base_path = r"C:\Users\Yello\OneDrive - University of Cape Town\2025 Second Year\Second Semester\CSC2042S\Assignment 2"

try:
    X_gray, y_gray, label_map = load(base_path, mode='grayscale')
    X_rgb, y_rgb, _ = load(base_path, mode='rgb')
    
    print(f"Grayscale data shape: {X_gray.shape}")
    print(f"RGB data shape: {X_rgb.shape}")
    print(f"Label mapping: {label_map}")
    
except Exception as e:
    print(f"Error loading dataset: {e}")
    print("Please check that the dataset path is correct.")

Loading grayscale images from: C:\Users\Yello\OneDrive - University of Cape Town\2025 Second Year\Second Semester\CSC2042S\Assignment 2\dataset\grayscale\train

Loaded 8000 grayscale images with shape (28, 28)

Loading rgb images from: C:\Users\Yello\OneDrive - University of Cape Town\2025 Second Year\Second Semester\CSC2042S\Assignment 2\dataset\rgb\train

Loaded 8000 rgb images with shape (28, 28, 3)

Grayscale data shape: (8000, 28, 28)
RGB data shape: (8000, 28, 28, 3)
Label mapping: {'bart_simpson': 0, 'charles_montgomery_burns': 1, 'homer_simpson': 2, 'krusty_the_clown': 3, 'lisa_simpson': 4, 'marge_simpson': 5, 'milhouse_van_houten': 6, 'moe_szyslak': 7, 'ned_flanders': 8, 'principal_skinner': 9}


Next I'll create a function, splits, to create training and validation splits from the loaded data so that the stratified data is better understood. And in preparation for the perceptron implementation I'll normalize the data, creating multiple normalization options for better hyperparameter tuning later on. 

In [4]:
# Training and validation splits

def splits(X, y, test_size = 0.2, random_state = 42, flatten = True): 

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state = random_state, stratify = y)

    if flatten: 
        if len(X_train.shape) > 2: 
            X_train = X_train.reshape(X_train.shape[0], -1) 
            X_val = X_val.reshape(X_val.shape[0], -1) 

    print(f"Training set: {X_train.shape}, Validation set: {X_val.shape}")
    return X_train, X_val, y_train, y_val
    
X_gray_train, X_gray_val, y_gray_train, y_gray_val = splits(X_gray, y_gray)
X_rgb_train, X_rgb_val, y_rgb_train, y_rgb_val = splits(X_rgb, y_rgb)
    

Training set: (6400, 784), Validation set: (1600, 784)
Training set: (6400, 2352), Validation set: (1600, 2352)


In [5]:
# Normalization options

def normalize(X_train, X_val, method = "none"): 

    if method == "minmax": # to [0, 1]
        X_train = X_train.astype('float32') / 255.0
        X_val = X_val.astype('float32') / 255.0
        
    elif method == 'zscore':
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train.astype('float32'))
        X_val = scaler.transform(X_val.astype('float32'))
        
    elif method == 'none':
        X_train = X_train.astype('float32')
        X_val = X_val.astype('float32')
    
    return X_train, X_val

X_gray_train_norm, X_gray_val_norm = normalize(X_gray_train, X_gray_val, 'minmax')
print(f"Normalized grayscale - Train range: [{X_gray_train_norm.min():.3f}, {X_gray_train_norm.max():.2f}]")


Normalized grayscale - Train range: [0.000, 1.00]


## Task 2: Multi-class Perceptron Implementation

For our multi-class perceptron implementation, we  create both a binary perceptron and a multiclass perceptron class using the outline provided to us in the tutorials. For the multiclass implementation, it should be from scratch using a one-vs-rest approach, and for the binary version, we should implement the perceptron learning rule, plus a predict function to return a binary label, selecting the class with the highest score.

In [6]:
# Binary perceptron class

class BinaryPerceptron:

    def __init__(self, n_features,learning_rate = 0.1, random_state = None):
        self.weights = np.ones(n_features, dtype=float)
        self.bias = 0.0
        self.lr = learning_rate
        self.errors = []
        self.random_state = random_state

    def predict(self, x):
        x = np.array(x, dtype=float)
        net_input = np.dot(x, self.weights) + self.bias
        return 1 if net_input >= 0 else 0

    def apply_learning_rule(self, x, y):
        y_hat = self.predict(x)
        error = y - y_hat
        self.weights += self.lr * error * x
        self.bias += self.lr * error
        return abs(error)

    def fit(self, X, y, max_epochs = 1000): 
        self.errors = []

        for epoch in range(max_epochs):
            total_error = 0
            
            for i in range(len(X)):
                error = self.apply_learning_rule(X[i], y[i])
                total_error += error
            
            self.errors.append(total_error)
            if total_error == 0:
                print(f"Converged after {epoch + 1} epochs")
                break
        
        return self
        
    def __repr__(self):
        return f"BinaryPerceptron(weights={self.weights}, bias={self.bias:.3f}, learning rate={self.lr})\n"

In [7]:
# Multiclass perceptron

class MulticlassPerceptron:

    def __init__(self, n_features, n_classes =10, learning_rate =0.1, random_state = 42):
        self.n_classes = n_classes
        self.perceptrons = [
            BinaryPerceptron(n_features, learning_rate, random_state + i) 
            for i in range(n_classes)
        ]

    def fit(self, X, y, max_epochs=1000):
        for i in range(self.n_classes):
            print(f"Training perceptron for class {i}...")
            y_binary = np.where(y == i, 1, 0)
            self.perceptrons[i].fit(X, y_binary, max_epochs)
        return self
    
    def predict(self, X):
        scores = np.zeros((len(X), self.n_classes))
        for i, perceptron in enumerate(self.perceptrons):
            scores[:, i] = np.dot(X, perceptron.weights) + perceptron.bias
        return np.argmax(scores, axis=1)
        

## Task 3: Training 

We seek to implement a training loop to find optimal weights and learning rules, the hyperparameters. By investigating two stopping criteria.

In [32]:
# Training loop

class EnhancedBinaryPerceptron(BinaryPerceptron): 

    def __init__(self, n_features, learning_rate=0.1, random_state=None): 
        super().__init__(n_features, learning_rate, random_state)
        self.val_accuracies = []

    def fit(self, X, y, X_val=None, y_val =None, max_epochs = 1000, error_threshold=0.0, patience=5, verbose=True): 
        self.errors = []
        self.val_accuracies = []

        # Collecting for early stopping: 
        best_weights = self.weights.copy()
        best_bias = self.bias
        best_val_acc = 0 
        patience_counter = 0

        for epoch in range(max_epochs):

            indices = np.random.permutation(len(X))
            X_shuffled, y_shuffled = X[indices], y[indices]
            
            total_error = 0
            for i in range(len(X)):
                error = self.apply_learning_rule(X_shuffled[i], y_shuffled[i])
                total_error += error
            self.errors.append(total_error)

            if X_val is not None and y_val is not None: 
                val_acc = self.accuracy(X_val, y_val)
                self.val_accuracies.append(val_acc)
                
                # Early stop logic: 
                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    best_weights = self.weights.copy()
                    best_bias = self.bias
                    patience_counter = 0
                else: 
                    patience_counter += 1
                if patience_counter >= patience: 
                    if verbose: 
                        print(f"Early stopping at epoch {epoch+1}, best accuracy value: {best_val_acc:.4f}")
                        self.weights = best_weights
                        self.bias = best_bias
                        break
                    
            if total_error <= error_threshold:
                if verbose: 
                    print(f"Converged after {epoch + 1} epochs")
                break
            if verbose and (epoch + 1) % 100 == 0: 
                print(f"Epoch {epoch+1}, Training error: {total_error}")
                
        if verbose and epoch == max_epochs - 1: 
            print(f"Reached maximum epochs ({max_epochs})")
                
        return self

    def accuracy(self, X, y): 
        predictions = np.array([self.predict(x) for x in X])
        return np.mean(predictions == y)

    def predict_score(self, x): 
        return np.dot(x, self.weights) + self.bias


In [34]:
# Investigating stop conditions

class_names = {
    0: 'Bart Simpson',
    1: 'Charles Montgomery Burns', 
    2: 'Homer Simpson',
    3: 'Krusty the Clown',
    4: 'Lisa Simpson',
    5: 'Marge Simpson',
    6: 'Milhouse Van Houten',
    7: 'Moe Szyslak',
    8: 'Ned Flanders',
    9: 'Principal Skinner' 
}

class_index = []
convergence_epochs = []
final_accuracy = []

for i in range(0, 10): 
    
    y_binary_train = np.where(y_gray_train == i, 1, 0)
    y_binary_val = np.where(y_gray_val == i, 1, 0)
    
    print(f"Training binary labels for class {i}:")
    class_index.append(i)
    print(f"Class {i} samples: {np.sum(y_binary_train == 1)}")
    print(f"Other classes samples: {np.sum(y_binary_train == 0)}")
    print(f"Validation - Class {i}: {np.sum(y_binary_val == 1)}, Others: {np.sum(y_binary_val == 0)}")
    
    enhanced_perceptron = EnhancedBinaryPerceptron( n_features = X_gray_train_norm.shape[1], learning_rate = 0.1, random_state = 42 )

    enhanced_perceptron.fit( X_gray_train_norm, y_binary_train, X_val = X_gray_val_norm, y_val=y_binary_val, max_epochs = 1000, patience = 10, verbose =True ) 

    acc = enhanced_perceptron.accuracy(X_gray_val_norm, y_binary_val) 
    final_accuracy.append(acc) 
    convergence_epochs.append(len(enhanced_perceptron.errors))
    
    print(f"Final validation accuracy: {acc:.4f}")
    print(f"Epochs trained: {len(enhanced_perceptron.errors)}\n")
    

Training binary labels for class 0:
Class 0 samples: 640
Other classes samples: 5760
Validation - Class 0: 160, Others: 1440
Early stopping at epoch 19, best accuracy value: 0.8994
Final validation accuracy: 0.8994
Epochs trained: 19

Training binary labels for class 1:
Class 1 samples: 640
Other classes samples: 5760
Validation - Class 1: 160, Others: 1440
Early stopping at epoch 12, best accuracy value: 0.9012
Final validation accuracy: 0.9012
Epochs trained: 12

Training binary labels for class 2:
Class 2 samples: 640
Other classes samples: 5760
Validation - Class 2: 160, Others: 1440
Early stopping at epoch 11, best accuracy value: 0.8981
Final validation accuracy: 0.8981
Epochs trained: 11

Training binary labels for class 3:
Class 3 samples: 640
Other classes samples: 5760
Validation - Class 3: 160, Others: 1440
Early stopping at epoch 14, best accuracy value: 0.9000
Final validation accuracy: 0.9000
Epochs trained: 14

Training binary labels for class 4:
Class 4 samples: 640
Oth

## Task 4: Hyperparameter tuning 

## Task 5: Evaluation

## Task 6: RGB vs grayscale analysis