# Data Loading

In [None]:
import os
import numpy as np
from PIL import Image
import kagglehub
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

# 1. Download FER-2013 dataset
fer_path = kagglehub.dataset_download("msambare/fer2013")
print("FER-2013 downloaded to:", fer_path)

# 2. Define image loader
def load_images_from_dir(root_dir, target_size=(48, 48)):
    X, y = [], []
    for class_folder in os.listdir(root_dir):
        class_path = os.path.join(root_dir, class_folder)
        if not os.path.isdir(class_path):
            continue
        label = class_folder.lower()
        for file in os.listdir(class_path):
            img_path = os.path.join(class_path, file)
            try:
                img = Image.open(img_path).convert("L").resize(target_size)
                X.append(np.array(img))
                y.append(label)
            except Exception as e:
                print(f"Error reading {img_path}: {e}")
    return np.array(X), np.array(y)

# 3. Load training and testing sets
train_dir = os.path.join(fer_path, "train")
test_dir = os.path.join(fer_path, "test")

X_train, y_train = load_images_from_dir(train_dir)
X_test, y_test = load_images_from_dir(test_dir)

# 4. Preprocess data (Standardize input features to [0,1])
X_train = X_train.astype("float32") / 255.0
X_test = X_test.astype("float32") / 255.0
X_train = np.expand_dims(X_train, -1)
X_test = np.expand_dims(X_test, -1)

# 5. Encode response class as one-hot labels
label_encoder = LabelEncoder()
y_train = to_categorical(label_encoder.fit_transform(y_train))
y_test = to_categorical(label_encoder.transform(y_test))

# 6. Output summary
print("✅ FER-2013 Loaded")
print(f"Classes: {label_encoder.classes_}")
print(f"Train: {X_train.shape}, {y_train.shape}")
print(f"Test: {X_test.shape}, {y_test.shape}")

FER-2013 downloaded to: /kaggle/input/fer2013
✅ FER-2013 Loaded
Classes: ['angry' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']
Train: (28709, 48, 48, 1), (28709, 7)
Test: (7178, 48, 48, 1), (7178, 7)


# MLP Example (2 Dense Layer)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam

# Flatten input shape from 48x48x1 to 2304
input_shape = X_train.shape[1:]  # (48, 48, 1)
flattened_input_dim = np.prod(input_shape)

# Build the MLP (Feedforward Network)

# Input shape - 2304
# First layer - 512 Neurons
# Second layer - 256 Neurons
# Output layer - 7 Neurons (Softmax)

# Relu activation after each hidden layer

mlp_model = Sequential([
    Flatten(input_shape=input_shape),
    Dense(512, activation='relu'),
    Dense(256, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile
mlp_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train
mlp_model.fit(X_train, y_train, epochs=30, batch_size=64, validation_data=(X_test, y_test))

  super().__init__(**kwargs)


Epoch 1/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.2406 - loss: 1.9651 - val_accuracy: 0.3142 - val_loss: 1.7083
Epoch 2/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3186 - loss: 1.7070 - val_accuracy: 0.3629 - val_loss: 1.6596
Epoch 3/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3470 - loss: 1.6687 - val_accuracy: 0.3466 - val_loss: 1.6538
Epoch 4/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3492 - loss: 1.6479 - val_accuracy: 0.3689 - val_loss: 1.6228
Epoch 5/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3676 - loss: 1.6090 - val_accuracy: 0.3640 - val_loss: 1.6083
Epoch 6/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3754 - loss: 1.5972 - val_accuracy: 0.3572 - val_loss: 1.6247
Epoch 7/30
[1m449/449[0m 

<keras.src.callbacks.history.History at 0x78d0bb1886d0>

# RNN Architecture (Recurrent Layer into Dense Layer)



In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.math import confusion_matrix

# Reshape: (samples, 48 time steps, 48 features)
X_train_rnn = X_train.reshape(-1, 48, 48)
X_test_rnn = X_test.reshape(-1, 48, 48)

# Build the RNN (Recurrent Neural Network)

# We feed in each row of the data (48 pixels) into the recurrent layer, which
# updates the hidden state of dimension 128. Then, after all 48 rows have been
# fed through the recurrent layer, the output is fed into a dense layer of dim
# 64. Finally, this output is fed into the last dense layer of dim 7 (softmax).

# Input shape - 48 rows x 48 pixels in each row
# Recurrent Layer - Hidden State dim 128, 48 time steps (one for each row)
# Dense Layer 1 - 256 Neurons
# Output layer - 7 Neurons (Softmax)

# Relu activation after the dense layer

rnn_model = Sequential([
    SimpleRNN(128, input_shape=(48, 48), return_sequences=False),
    Dense(64, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile
rnn_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train
rnn_model.fit(X_train_rnn, y_train, epochs=30, batch_size=64, validation_data=(X_test_rnn, y_test))

  super().__init__(**kwargs)


Epoch 1/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.2541 - loss: 1.8002 - val_accuracy: 0.2952 - val_loss: 1.7401
Epoch 2/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.3045 - loss: 1.7255 - val_accuracy: 0.3374 - val_loss: 1.6833
Epoch 3/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.3184 - loss: 1.6997 - val_accuracy: 0.3376 - val_loss: 1.6684
Epoch 4/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.3306 - loss: 1.6739 - val_accuracy: 0.3431 - val_loss: 1.6580
Epoch 5/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.3378 - loss: 1.6634 - val_accuracy: 0.3419 - val_loss: 1.6553
Epoch 6/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.3375 - loss: 1.6604 - val_accuracy: 0.3390 - val_loss: 1.6571
Epoch 7/30
[1m449/449[0m 

<keras.src.callbacks.history.History at 0x78d1686eb850>

# Confusion Matrix


In [None]:
"""
The Standard Confusion Matrix
"""
def get_max_idx(arr):
  cmax = 0
  for i in range(len(arr)):
    if arr[i] > arr[cmax]:
      cmax = i
  return cmax

def create_cmatrix(predictions, labels):
  cmatrix = []
  for i in range(7):
    r = []
    for j in range(7):
      r.append(0)
    cmatrix.append(r)

  for entry in range(len(predictions)):
    predicted = get_max_idx(predictions[entry])
    actual = get_max_idx(labels[entry])
    cmatrix[actual][predicted] += 1
  return cmatrix

def display_cmatrix(cmatrix):
  print("\t\tPREDICTED\t\t")
  print("['angry' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']")
  print("-----------------")
  print("A |" + str(cmatrix[0]) + "| angry")
  print("C |" + str(cmatrix[1]) + "| disgust")
  print("T |" + str(cmatrix[2]) + "| fear")
  print("U |" + str(cmatrix[3]) + "| happy")
  print("A |" + str(cmatrix[4]) + "| neutral")
  print("L |" + str(cmatrix[5]) + "| sad")
  print("  |" + str(cmatrix[6]) + "| suprise")


mlp_predictions = mlp_model.predict(X_test)
rnn_predictions = rnn_model.predict(X_test)

mlp_cmatrix = create_cmatrix(predictions=mlp_predictions, labels=y_test)
rnn_cmatrix = create_cmatrix(predictions=rnn_predictions, labels=y_test)

display_cmatrix(mlp_cmatrix)
display_cmatrix(rnn_cmatrix)

print(mlp_cmatrix)
print(rnn_cmatrix)

[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
		PREDICTED		
['angry' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']
-----------------
A |[218, 2, 110, 202, 256, 109, 61]| angry
C |[31, 2, 17, 19, 30, 9, 3]| disgust
T |[113, 0, 187, 167, 330, 129, 98]| fear
U |[129, 0, 87, 1095, 314, 92, 57]| happy
A |[78, 0, 72, 219, 692, 131, 41]| neutral
L |[135, 1, 152, 234, 417, 261, 47]| sad
  |[36, 2, 97, 98, 190, 28, 380]| suprise
		PREDICTED		
['angry' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']
-----------------
A |[0, 0, 65, 346, 241, 173, 133]| angry
C |[0, 0, 9, 49, 24, 7, 22]| disgust
T |[0, 0, 91, 303, 262, 148, 220]| fear
U |[1, 0, 58, 1161, 298, 139, 117]| happy
A |[0, 0, 83, 390, 403, 193, 164]| neutral
L |[2, 0, 78, 392, 380, 259, 136]| sad
  |[0, 0, 63, 154, 113, 62, 439]| suprise
[[218, 2, 110, 202, 256, 109, 61], [31, 2, 17, 19, 30, 9, 3], [113, 0, 187, 167, 330, 1

**Sample Tests for Comparison with LLM**

In [None]:
# Very first block of code need to be ran for this
files_to_test = {
    "angry": ["PublicTest_26506006.jpg", "PrivateTest_18347688.jpg", "PrivateTest_22419133.jpg", "PublicTest_47452496.jpg", "PublicTest_47804687.jpg"],
    "disgust": ["PrivateTest_60490187.jpg", "PublicTest_99162116.jpg", "PublicTest_75786377.jpg", "PrivateTest_30523217.jpg", "PrivateTest_92933222.jpg"],
    "fear": ["PrivateTest_7261364.jpg", "PublicTest_89131102.jpg", "PrivateTest_31388255.jpg", "PublicTest_4506555.jpg", "PublicTest_54050404.jpg"],
    "happy": ["PublicTest_83097075.jpg", "PrivateTest_4014756.jpg", "PublicTest_46945921.jpg", "PublicTest_15499192.jpg", "PublicTest_40541412.jpg"],
    "neutral": ["PublicTest_10726845.jpg", "PublicTest_78125500.jpg", "PublicTest_65439988.jpg", "PublicTest_80317721.jpg", "PublicTest_67747988.jpg"],
    "sad": ["PrivateTest_60103853.jpg", "PrivateTest_86106478.jpg", "PublicTest_38013120.jpg", "PublicTest_69378300.jpg", "PrivateTest_55277524.jpg"],
    "surprise": ["PrivateTest_25288007.jpg", "PrivateTest_51290776.jpg", "PublicTest_49049109.jpg", "PublicTest_64740817.jpg", "PublicTest_64532931.jpg"]
}


def load_samples_for_test(root_dir, samples, target_size=(48, 48)):
    X, y = [], []
    for class_folder in os.listdir(root_dir):
        class_path = os.path.join(root_dir, class_folder)
        if not os.path.isdir(class_path):
            continue
        label = class_folder.lower()
        # look through file through the samples instead of through each class folder
        for file in samples[class_path.split("/")[-1]]:
            img_path = os.path.join(class_path, file)
            try:
                img = Image.open(img_path).convert("L").resize(target_size)
                X.append(np.array(img))
                y.append(label)
            except Exception as e:
                print(f"Error reading {img_path}: {e}")
    return np.array(X), np.array(y)

X_sample_test, y_sample_test = load_samples_for_test(test_dir, files_to_test)
y_sample_test = to_categorical(label_encoder.transform(y_sample_test))
print(f"Samples: {X_sample_test.shape}, {y_sample_test.shape}")

Samples: (35, 48, 48), (35, 7)


In [None]:
mlp_sample_predictions = mlp_model.predict(X_sample_test)
rnn_sample_predictions = rnn_model.predict(X_sample_test)

mlp_sample_cmatrix = create_cmatrix(predictions=mlp_sample_predictions, labels=y_sample_test)
rnn_sample_cmatrix = create_cmatrix(predictions=rnn_sample_predictions, labels=y_sample_test)

display_cmatrix(mlp_sample_cmatrix)
display_cmatrix(rnn_sample_cmatrix)
print("-"*30)
print("MLP SAMPLE MATRIX")
print(mlp_sample_cmatrix)
print("-"*30)
print("RNN SAMPLE MATRIX")
print(rnn_sample_cmatrix)
print("-"*30)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
		PREDICTED		
['angry' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']
-----------------
A |[0, 0, 1, 0, 1, 0, 3]| angry
C |[0, 0, 3, 0, 1, 0, 1]| disgust
T |[0, 0, 1, 0, 0, 0, 4]| fear
U |[0, 0, 0, 1, 1, 0, 3]| happy
A |[0, 0, 0, 0, 1, 1, 3]| neutral
L |[0, 0, 2, 2, 1, 0, 0]| sad
  |[0, 0, 1, 0, 0, 0, 4]| suprise
		PREDICTED		
['angry' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']
-----------------
A |[0, 0, 0, 2, 0, 0, 3]| angry
C |[0, 0, 1, 2, 1, 0, 1]| disgust
T |[0, 0, 0, 4, 0, 0, 1]| fear
U |[0, 0, 0, 4, 0, 0, 1]| happy
A |[0, 0, 1, 3, 0, 0, 1]| neutral
L |[0, 0, 0, 4, 1, 0, 0]| sad
  |[0, 0, 2, 2, 0, 0, 1]| suprise
------------------------------
MLP SAMPLE MATRIX
[[0, 0, 1, 0, 1, 0, 3], [0, 0, 3, 0, 1, 0, 1], [0, 0, 1, 0, 0, 0, 4], [0, 0, 0, 1, 1, 0, 3], [0, 0, 0, 0, 1, 1, 3], [0, 0, 2, 2, 1, 0, 0], [0, 0, 1, 0, 0, 0, 4]]
---