<a href="https://colab.research.google.com/github/TheRufael/CS770-Assignments/blob/main/Assignment_Three_Q1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ============================================================
# Table of Contents
# 1. Reproducibility and versions
# 2. Imports and GPU check
# 3. Load Fashion MNIST and preprocess
# 4. Train and validation split with fixed seed
# 5. Utility functions for model building and evaluation
# 6. Define 10 configurations for systematic tuning
# 7. Train loop across configurations
# 8. Results table and quick summary



In [1]:
# ============================================================
# 1. Reproducibility and versions
#     Sets fixed seeds and reports library versions
# ============================================================

# Seed values used for reproducibility across executions
SEED = 42

import os, random, time
import numpy as np

random.seed(SEED)
np.random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)

import tensorflow as tf
tf.random.set_seed(SEED)

# Version logging for transparency and reproducibility
print("TensorFlow version", tf.__version__)

try:
    import pandas as pd
    print("Pandas version", pd.__version__)
except Exception:
    print("Pandas not found")

try:
    import sklearn
    print("Scikit-learn version", sklearn.__version__)
except Exception:
    print("Scikit-learn not found")


TensorFlow version 2.19.0
Pandas version 2.2.2
Scikit-learn version 1.6.1


In [2]:
# ============================================================
# 2. Imports and GPU check
#     Loads required modules and confirms hardware availability
# ============================================================

from tensorflow import keras
from tensorflow.keras import layers, regularizers, initializers, optimizers, utils
from sklearn.model_selection import train_test_split

# Displays available GPU devices for acceleration
print("GPU available", tf.config.list_physical_devices('GPU'))


GPU available [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
# ============================================================
# 3. Load Fashion MNIST and preprocess
#     Normalizes image data and applies one-hot encoding
# ============================================================

# Fashion MNIST dataset loading
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

# Normalization of pixel intensity to [0, 1]
x_train_full = x_train_full.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# One-hot encoding of class labels
num_classes = 10
y_train_full_oh = utils.to_categorical(y_train_full, num_classes)
y_test_oh = utils.to_categorical(y_test, num_classes)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:
# ============================================================
# 3. Load Fashion MNIST and preprocess
#     Normalizes image data and applies one-hot encoding
# ============================================================

# Fashion MNIST dataset loading
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

# Normalization of pixel intensity to [0, 1]
x_train_full = x_train_full.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# One-hot encoding of class labels
num_classes = 10
y_train_full_oh = utils.to_categorical(y_train_full, num_classes)
y_test_oh = utils.to_categorical(y_test, num_classes)


In [5]:
# ============================================================
# 4. Train and validation split with fixed seed
#     Creates stratified 80/20 division for model evaluation
# ============================================================

x_train, x_val, y_train, y_val = train_test_split(
    x_train_full,
    y_train_full_oh,
    test_size=0.20,
    random_state=SEED,
    stratify=y_train_full  # Preserves label distribution
)

# Shape reporting for verification
print("Train shape", x_train.shape, "Val shape", x_val.shape, "Test shape", x_test.shape)


Train shape (48000, 28, 28) Val shape (12000, 28, 28) Test shape (10000, 28, 28)


In [6]:
# ============================================================
# 5. Utility functions
#     Includes model builder, compiler, parameter counter,
#     and evaluation helper
# ============================================================

# MLP architecture generator with configurable depth and regularization
def build_mlp(
    input_shape=(28, 28),
    hidden_units=(128, 64),
    activation="relu",
    kernel_init="he_normal",
    l2_reg=0.0,
    dropout_rate=0.0
):
    # Optional L2 regularizer
    l2 = regularizers.l2(l2_reg) if l2_reg and l2_reg > 0 else None
    init = initializers.get(kernel_init)

    model = keras.Sequential(name="mlp_fashion_mnist")
    model.add(layers.Input(shape=input_shape))
    model.add(layers.Flatten())  # Converts 2D image to vector form

    # Hidden dense layers
    for units in hidden_units:
        model.add(layers.Dense(units, activation=activation, kernel_initializer=init, kernel_regularizer=l2))
        if dropout_rate and dropout_rate > 0:
            model.add(layers.Dropout(dropout_rate))

    # Output layer using softmax for multi-class prediction
    model.add(layers.Dense(10, activation="softmax", kernel_initializer="glorot_uniform"))
    return model

# Model compilation helper with selectable optimizer
def compile_model(model, optimizer_name="adam", lr=1e-3):
    if optimizer_name == "adam":
        opt = optimizers.Adam(learning_rate=lr)
    elif optimizer_name == "sgd":
        opt = optimizers.SGD(learning_rate=lr, momentum=0.0)
    elif optimizer_name == "rmsprop":
        opt = optimizers.RMSprop(learning_rate=lr)
    else:
        opt = optimizers.Adam(learning_rate=lr)

    model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"])
    return model

# Trainable parameter counter for model complexity analysis
def count_trainable_params(model):
    return int(np.sum([np.prod(v.shape) for v in model.trainable_variables]))

# Test-set evaluation helper returning accuracy
def evaluate_model(model, x_test, y_test_oh, batch_size=64):
    test_loss, test_acc = model.evaluate(x_test, y_test_oh, batch_size=batch_size, verbose=0)
    return test_acc


In [7]:
# ============================================================
# 6. Configurations for systematic hyperparameter tuning
#     Defines baseline model and nine variations
# ============================================================

BASELINE = {
    "name": "baseline",
    "hidden_units": (128, 64),
    "activation": "relu",
    "optimizer": "adam",
    "lr": 1e-3,
    "dropout": 0.0,
    "l2_reg": 0.0,
    "kernel_init": "he_normal",
    "batch_size": 64,
    "epochs": 30
}

# List of structured configurations used for controlled comparisons
CONFIGS = [
    BASELINE,
    {
        "name": "depth_3_layers",
        "hidden_units": (128, 64, 64),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "depth_4_layers",
        "hidden_units": (128, 128, 64, 32),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "units_larger",
        "hidden_units": (256, 128),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "units_smaller",
        "hidden_units": (64, 32),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "activation_tanh",
        "hidden_units": (128, 64),
        "activation": "tanh",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "optimizer_sgd",
        "hidden_units": (128, 64),
        "activation": "relu",
        "optimizer": "sgd",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "lr_0_01",
        "hidden_units": (128, 64),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-2,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "l2_1e_4",
        "hidden_units": (128, 64),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 1e-4,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "dropout_0_3",
        "hidden_units": (128, 64),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.3,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
]


In [8]:
# ============================================================
# 7. Training loop
#     Executes models sequentially and records metrics
# ============================================================

results = []

for cfg in CONFIGS:
    print("\nRunning", cfg["name"])

    # Clears backend state before each training run
    tf.keras.backend.clear_session()

    # Model construction and compilation for current configuration
    model = build_mlp(
        input_shape=(28, 28),
        hidden_units=cfg["hidden_units"],
        activation=cfg["activation"],
        kernel_init=cfg["kernel_init"],
        l2_reg=cfg["l2_reg"],
        dropout_rate=cfg["dropout"]
    )
    model = compile_model(model, optimizer_name=cfg["optimizer"], lr=cfg["lr"])

    # Parameter count for complexity assessment
    params = count_trainable_params(model)

    # Training timer start
    start = time.time()

    # Model training with validation tracking
    hist = model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        epochs=cfg["epochs"],
        batch_size=cfg["batch_size"],
        verbose=2
    )

    elapsed = time.time() - start

    # Accuracy extraction from history logs
    train_acc = float(hist.history["accuracy"][-1])
    val_acc = float(hist.history["val_accuracy"][-1])

    # Test-set performance measurement
    test_acc = float(evaluate_model(model, x_test, y_test_oh, batch_size=cfg["batch_size"]))

    # Results storage
    results.append({
        "config": cfg["name"],
        "hidden_units": str(cfg["hidden_units"]),
        "activation": cfg["activation"],
        "optimizer": cfg["optimizer"],
        "lr": cfg["lr"],
        "dropout": cfg["dropout"],
        "l2_reg": cfg["l2_reg"],
        "batch_size": cfg["batch_size"],
        "epochs": cfg["epochs"],
        "train_acc": round(train_acc, 4),
        "val_acc": round(val_acc, 4),
        "test_acc": round(test_acc, 4),
        "train_time_sec": int(elapsed),
        "trainable_params": params
    })



Running baseline
Epoch 1/30
750/750 - 5s - 7ms/step - accuracy: 0.8121 - loss: 0.5351 - val_accuracy: 0.8558 - val_loss: 0.4080
Epoch 2/30
750/750 - 2s - 3ms/step - accuracy: 0.8602 - loss: 0.3881 - val_accuracy: 0.8657 - val_loss: 0.3760
Epoch 3/30
750/750 - 2s - 3ms/step - accuracy: 0.8727 - loss: 0.3476 - val_accuracy: 0.8767 - val_loss: 0.3431
Epoch 4/30
750/750 - 2s - 3ms/step - accuracy: 0.8803 - loss: 0.3232 - val_accuracy: 0.8776 - val_loss: 0.3365
Epoch 5/30
750/750 - 2s - 3ms/step - accuracy: 0.8872 - loss: 0.3027 - val_accuracy: 0.8826 - val_loss: 0.3276
Epoch 6/30
750/750 - 2s - 3ms/step - accuracy: 0.8930 - loss: 0.2876 - val_accuracy: 0.8826 - val_loss: 0.3274
Epoch 7/30
750/750 - 2s - 3ms/step - accuracy: 0.8997 - loss: 0.2716 - val_accuracy: 0.8851 - val_loss: 0.3215
Epoch 8/30
750/750 - 2s - 3ms/step - accuracy: 0.9036 - loss: 0.2594 - val_accuracy: 0.8839 - val_loss: 0.3266
Epoch 9/30
750/750 - 3s - 3ms/step - accuracy: 0.9076 - loss: 0.2493 - val_accuracy: 0.8828 - 

In [9]:
# ============================================================
# 8. Results table and summary
#     Displays validation ranking and performance metrics
# ============================================================

try:
    import pandas as pd

    df_results = pd.DataFrame(results)
    df_display = df_results.sort_values("val_acc", ascending=False).reset_index(drop=True)

    print("\nTop results by validation accuracy")

    from IPython.display import display
    display(df_display)

except Exception:
    print("Install pandas for tabular display")
    for r in sorted(results, key=lambda x: x["val_acc"], reverse=True):
        print(r)



Top results by validation accuracy


Unnamed: 0,config,hidden_units,activation,optimizer,lr,dropout,l2_reg,batch_size,epochs,train_acc,val_acc,test_acc,train_time_sec,trainable_params
0,depth_3_layers,"(128, 64, 64)",relu,adam,0.001,0.0,0.0,64,30,0.9465,0.8921,0.8769,66,113546
1,activation_tanh,"(128, 64)",tanh,adam,0.001,0.0,0.0,64,30,0.9523,0.8887,0.875,66,109386
2,dropout_0_3,"(128, 64)",relu,adam,0.001,0.3,0.0,64,30,0.8964,0.8883,0.8779,66,109386
3,baseline,"(128, 64)",relu,adam,0.001,0.0,0.0,64,30,0.9499,0.8872,0.8793,72,109386
4,depth_4_layers,"(128, 128, 64, 32)",relu,adam,0.001,0.0,0.0,64,30,0.9447,0.8856,0.8737,70,127658
5,l2_1e_4,"(128, 64)",relu,adam,0.001,0.0,0.0001,64,30,0.9339,0.885,0.8711,67,109386
6,units_larger,"(256, 128)",relu,adam,0.001,0.0,0.0,64,30,0.9596,0.8837,0.8731,66,235146
7,units_smaller,"(64, 32)",relu,adam,0.001,0.0,0.0,64,30,0.9372,0.8781,0.8688,66,52650
8,lr_0_01,"(128, 64)",relu,adam,0.01,0.0,0.0,64,30,0.8914,0.8729,0.857,67,109386
9,optimizer_sgd,"(128, 64)",relu,sgd,0.001,0.0,0.0,64,30,0.8387,0.8431,0.829,63,109386
