<a href="https://colab.research.google.com/github/TheRufael/CS770-Assignments/blob/main/Assignment_Three_Q1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ============================================================
# Table of Contents
# 1. Reproducibility and versions
# 2. Imports and GPU check
# 3. Load Fashion MNIST and preprocess
# 4. Train and validation split with fixed seed
# 5. Utility functions for model building and evaluation
# 6. Define 10 configurations for systematic tuning
# 7. Train loop across configurations
# 8. Results table and quick summary


# 1. Reproducibility and versions
#     Set seeds and print library versions

# Set seeds for reproducibility across different runs
SEED = 42

import os, random, time
import numpy as np

random.seed(SEED)
np.random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)

import tensorflow as tf
tf.random.set_seed(SEED)

# Print versions of key libraries used
print("TensorFlow version", tf.__version__)

try:
    import pandas as pd
    print("Pandas version", pd.__version__)
except Exception as e:
    print("Pandas not found")

try:
    import sklearn
    print("Scikit-learn version", sklearn.__version__)
except Exception as e:
    print("Scikit-learn not found")


# 2. Imports and GPU check
#     Confirm we are on GPU if available

# Import necessary modules from TensorFlow and scikit-learn
from tensorflow import keras
from tensorflow.keras import layers, regularizers, initializers, optimizers, utils
from sklearn.model_selection import train_test_split

# Check if a GPU is available and print the information
print("GPU available", tf.config.list_physical_devices('GPU'))


# 3. Load Fashion MNIST and preprocess
#     Normalize to 0 to 1 and one hot encode labels

# Load the Fashion MNIST dataset
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

# Normalize the pixel values of the images to the range [0, 1]
x_train_full = x_train_full.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Define the number of classes in the dataset
num_classes = 10
# One-hot encode the labels for both training and testing sets
y_train_full_oh = utils.to_categorical(y_train_full, num_classes)
y_test_oh = utils.to_categorical(y_test, num_classes)


# 4. Train and validation split with fixed seed
#     Use 80 by 20 split of the original training set

# Split the original training data into training and validation sets
# Use an 80/20 split and stratify to maintain class distribution
x_train, x_val, y_train, y_val = train_test_split(
    x_train_full,
    y_train_full_oh,
    test_size=0.20,
    random_state=SEED,
    stratify=y_train_full
)

# Print the shapes of the resulting datasets
print("Train shape", x_train.shape, "Val shape", x_val.shape, "Test shape", x_test.shape)


# 5. Utility functions for model building and evaluation
#     Build MLP with variable depth, units, activation, regularization, dropout

# Function to build a Multi-Layer Perceptron (MLP) model
def build_mlp(
    input_shape=(28, 28),
    hidden_units=(128, 64),
    activation="relu",
    kernel_init="he_normal",
    l2_reg=0.0,
    dropout_rate=0.0
):
    # Build a shallow feedforward network. No convolution.
    # Define L2 regularization if a value greater than 0 is provided
    l2 = regularizers.l2(l2_reg) if l2_reg and l2_reg > 0 else None
    # Get the specified kernel initializer
    init = initializers.get(kernel_init)

    # Create a Sequential model
    model = keras.Sequential(name="mlp_fashion_mnist")
    # Add the input layer, specifying the input shape
    model.add(layers.Input(shape=input_shape))
    # Flatten the input images from 2D to 1D
    model.add(layers.Flatten())

    # Add hidden layers based on the hidden_units configuration
    for units in hidden_units:
        model.add(layers.Dense(units, activation=activation, kernel_initializer=init, kernel_regularizer=l2))
        # Add a Dropout layer if a dropout rate greater than 0 is provided
        if dropout_rate and dropout_rate > 0:
            model.add(layers.Dropout(dropout_rate))

    # Add the output layer with softmax activation for multi-class classification
    model.add(layers.Dense(10, activation="softmax", kernel_initializer="glorot_uniform"))
    return model

# Function to compile the model with a specified optimizer and learning rate
def compile_model(model, optimizer_name="adam", lr=1e-3):
    # Support common optimizers. Keep settings simple for fair comparison.
    # Select and configure the optimizer based on the provided name
    if optimizer_name == "adam":
        opt = optimizers.Adam(learning_rate=lr)
    elif optimizer_name == "sgd":
        opt = optimizers.SGD(learning_rate=lr, momentum=0.0)
    elif optimizer_name == "rmsprop":
        opt = optimizers.RMSprop(learning_rate=lr)
    else:
        # Default to Adam optimizer if an unknown name is provided
        opt = optimizers.Adam(learning_rate=lr)
    # Compile the model with the chosen optimizer, categorical crossentropy loss, and accuracy metric
    model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"])
    return model

# Function to count the number of trainable parameters in the model
def count_trainable_params(model):
    # Compute number of trainable parameters by summing the product of dimensions of trainable variables
    return int(np.sum([np.prod(v.shape) for v in model.trainable_variables]))

# Function to evaluate the model on the test set
def evaluate_model(model, x_test, y_test_oh, batch_size=64):
    # Return test accuracy
    # Evaluate the model on the test data and get the loss and accuracy
    test_loss, test_acc = model.evaluate(x_test, y_test_oh, batch_size=batch_size, verbose=0)
    return test_acc


# 6. Define 10 configurations for systematic tuning
#     Start with a baseline and change one factor at a time

# Define a baseline configuration for the MLP model
BASELINE = {
    "name": "baseline",
    "hidden_units": (128, 64),
    "activation": "relu",
    "optimizer": "adam",
    "lr": 1e-3,
    "dropout": 0.0,
    "l2_reg": 0.0,
    "kernel_init": "he_normal",
    "batch_size": 64,
    "epochs": 30
}

# Define a list of configurations to systematically tune different hyperparameters
CONFIGS = [
    BASELINE,  # baseline configuration
    {
        "name": "depth_3_layers",      # configuration with increased depth (3 hidden layers)
        "hidden_units": (128, 64, 64),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "depth_4_layers",      # configuration with increased depth (4 hidden layers)
        "hidden_units": (128, 128, 64, 32),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "units_larger",        # configuration with wider layers
        "hidden_units": (256, 128),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "units_smaller",       # configuration with narrower layers
        "hidden_units": (64, 32),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "activation_tanh",     # configuration using tanh activation
        "hidden_units": (128, 64),
        "activation": "tanh",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "optimizer_sgd",       # configuration using SGD optimizer
        "hidden_units": (128, 64),
        "activation": "relu",
        "optimizer": "sgd",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "lr_0_01",             # configuration with a higher learning rate
        "hidden_units": (128, 64),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-2,
        "dropout": 0.0,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "l2_1e_4",             # configuration with L2 regularization
        "hidden_units": (128, 64),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.0,
        "l2_reg": 1e-4,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
    {
        "name": "dropout_0_3",         # configuration with dropout regularization
        "hidden_units": (128, 64),
        "activation": "relu",
        "optimizer": "adam",
        "lr": 1e-3,
        "dropout": 0.3,
        "l2_reg": 0.0,
        "kernel_init": "he_normal",
        "batch_size": 64,
        "epochs": 30
    },
]


# 7. Train loop across configurations
#     Record training accuracy, validation accuracy, test accuracy, time, params

# Initialize an empty list to store the results of each configuration
results = []

# Iterate through each configuration defined in the CONFIGS list
for cfg in CONFIGS:
    print("\nRunning", cfg["name"])
    # Clear the Keras backend session to avoid interference between runs
    tf.keras.backend.clear_session()

    # Build the MLP model with the current configuration
    model = build_mlp(
        input_shape=(28, 28),
        hidden_units=cfg["hidden_units"],
        activation=cfg["activation"],
        kernel_init=cfg["kernel_init"],
        l2_reg=cfg["l2_reg"],
        dropout_rate=cfg["dropout"]
    )
    # Compile the model with the specified optimizer and learning rate
    model = compile_model(model, optimizer_name=cfg["optimizer"], lr=cfg["lr"])

    # Count the number of trainable parameters in the built model
    params = count_trainable_params(model)

    # Record the start time for training
    start = time.time()
    # Train the model using the training data and validate on the validation data
    hist = model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        epochs=cfg["epochs"],
        batch_size=cfg["batch_size"],
        verbose=2 # Set verbose to 2 to show progress per epoch
    )
    # Calculate the training time
    elapsed = time.time() - start

    # Extract the final training and validation accuracy from the training history
    train_acc = float(hist.history["accuracy"][-1])
    val_acc = float(hist.history["val_accuracy"][-1])
    # Evaluate the model on the test set and get the test accuracy
    test_acc = float(evaluate_model(model, x_test, y_test_oh, batch_size=cfg["batch_size"]))

    # Append the results of the current configuration to the results list
    results.append({
        "config": cfg["name"],
        "hidden_units": str(cfg["hidden_units"]),
        "activation": cfg["activation"],
        "optimizer": cfg["optimizer"],
        "lr": cfg["lr"],
        "dropout": cfg["dropout"],
        "l2_reg": cfg["l2_reg"],
        "batch_size": cfg["batch_size"],
        "epochs": cfg["epochs"],
        "train_acc": round(train_acc, 4),
        "val_acc": round(val_acc, 4),
        "test_acc": round(test_acc, 4),
        "train_time_sec": int(elapsed),
        "trainable_params": params
    })

# 7. Results table and quick summary
#     Show a DataFrame and top configs by validation accuracy

# Try to use pandas to display the results in a table
try:
    import pandas as pd
    # Convert the results list to a pandas DataFrame
    df_results = pd.DataFrame(results)
    # Sort the DataFrame by validation accuracy in descending order and reset the index
    df_display = df_results.sort_values("val_acc", ascending=False).reset_index(drop=True)
    # Print a header for the results table
    print("\nTop results by validation accuracy")
    # Display the DataFrame as a rich output
    from IPython.display import display
    display(df_display)
except Exception as e:
    # If pandas is not installed, print the results as a sorted list of dictionaries
    print("Install pandas for tabular display")
    for r in sorted(results, key=lambda x: x["val_acc"], reverse=True):
        print(r)

TensorFlow version 2.19.0
Pandas version 2.2.2
Scikit-learn version 1.6.1
GPU available [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Train shape (48000, 28, 28) Val shape (12000, 28, 28) Test shape (10000, 28, 28)

Runni

Unnamed: 0,config,hidden_units,activation,optimizer,lr,dropout,l2_reg,batch_size,epochs,train_acc,val_acc,test_acc,train_time_sec,trainable_params
0,dropout_0_3,"(128, 64)",relu,adam,0.001,0.3,0.0,64,30,0.8979,0.8922,0.8848,66,109386
1,units_larger,"(256, 128)",relu,adam,0.001,0.0,0.0,64,30,0.9605,0.8888,0.8796,65,235146
2,depth_4_layers,"(128, 128, 64, 32)",relu,adam,0.001,0.0,0.0,64,30,0.9458,0.8881,0.8796,67,127658
3,depth_3_layers,"(128, 64, 64)",relu,adam,0.001,0.0,0.0,64,30,0.9484,0.8878,0.8776,66,113546
4,activation_tanh,"(128, 64)",tanh,adam,0.001,0.0,0.0,64,30,0.9546,0.8848,0.8751,65,109386
5,baseline,"(128, 64)",relu,adam,0.001,0.0,0.0,64,30,0.9527,0.8847,0.8753,66,109386
6,l2_1e_4,"(128, 64)",relu,adam,0.001,0.0,0.0001,64,30,0.9331,0.8808,0.8732,69,109386
7,lr_0_01,"(128, 64)",relu,adam,0.01,0.0,0.0,64,30,0.8964,0.8758,0.8619,64,109386
8,units_smaller,"(64, 32)",relu,adam,0.001,0.0,0.0,64,30,0.9365,0.8754,0.8676,65,52650
9,optimizer_sgd,"(128, 64)",relu,sgd,0.001,0.0,0.0,64,30,0.8387,0.8431,0.8291,62,109386
