# Authors

 - Mohammed Essam Mohammed 20220299
 - Amr Ehab Abd-Al-Zaher 20221110 

In [1]:
# %pip install scikit-learn tensorflow
import time
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder

In [2]:
def log_experiment(name, hyperparams, history, train_time, test_time, model=None):
    print(f"\n--- {name} ---")
    print("Hyperparameters:", hyperparams)
    if model:
        model.summary()
        print("Total parameters:", model.count_params())
    print("Training time (s):", train_time)
    print("Testing time (s):", test_time)
    print("First 5 epochs accuracy:", history['accuracy'][:5] if 'accuracy' in history else history[:5])
    print("Final accuracy:", history['accuracy'][-1] if 'accuracy' in history else history[-1])

In [3]:
def load_preprocess_mnist():
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
    x_train = x_train.astype('float32') / 255.0
    x_test = x_test.astype('float32') / 255.0
    x_train = np.expand_dims(x_train, -1)
    x_test = np.expand_dims(x_test, -1)
    # One-hot encode labels
    y_train_cat = keras.utils.to_categorical(y_train, 10)
    y_test_cat = keras.utils.to_categorical(y_test, 10)
    # Shuffle training data
    idx = np.random.permutation(len(x_train))
    x_train, y_train, y_train_cat = x_train[idx], y_train[idx], y_train_cat[idx]
    return (x_train, y_train, y_train_cat), (x_test, y_test, y_test_cat)


In [4]:
def build_ann(input_shape=(28,28,1), num_classes=10, hidden_units=128, activation='relu'):
    model = keras.Sequential([
        layers.Flatten(input_shape=input_shape),
        layers.Dense(hidden_units, activation=activation),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

# --- SECTION: Baseline SVM Model ---
def train_svm(x_train, y_train, x_test, y_test):
    # Flatten images
    x_train_flat = x_train.reshape((x_train.shape[0], -1))
    x_test_flat = x_test.reshape((x_test.shape[0], -1))
    clf = svm.SVC()
    start = time.time()
    clf.fit(x_train_flat, y_train)
    train_time = time.time() - start
    start = time.time()
    y_pred = clf.predict(x_test_flat)
    test_time = time.time() - start
    acc = accuracy_score(y_test, y_pred)
    return clf, train_time, test_time, acc

# --- SECTION: CNN Model Builder ---
def build_cnn(input_shape=(28,28,1), num_classes=10, 
              conv_layers=2, filters=[32,64], kernel_size=3, 
              fc_layers=1, fc_units=[128], activation='relu', 
              dropout=None, dropout_rate=0.5):
    model = keras.Sequential()
    for i in range(conv_layers):
        if i == 0:
            model.add(layers.Conv2D(filters[i], (kernel_size, kernel_size), activation=activation, input_shape=input_shape))
        else:
            model.add(layers.Conv2D(filters[i], (kernel_size, kernel_size), activation=activation))
        if i == 0:
            model.add(layers.MaxPooling2D((2,2)))
        if dropout and i in dropout:
            model.add(layers.Dropout(dropout_rate))
    model.add(layers.Flatten())
    for i in range(fc_layers):
        model.add(layers.Dense(fc_units[i], activation=activation))
        if dropout and (conv_layers + i) in dropout:
            model.add(layers.Dropout(dropout_rate))
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model


In [5]:
def train_model(model, x_train, y_train, x_test, y_test, 
                optimizer, loss, batch_size=64, epochs=10):
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    start = time.time()
    history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, 
                        validation_data=(x_test, y_test), verbose=0)
    train_time = time.time() - start
    start = time.time()
    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
    test_time = time.time() - start
    return history.history, train_time, test_time, test_acc

In [6]:
(x_train, y_train, y_train_cat), (x_test, y_test, y_test_cat) = load_preprocess_mnist()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [7]:
print("\n# STEP 1: Baseline ANN")
ann = build_ann()
history, train_time, test_time, test_acc = train_model(
    ann, x_train, y_train_cat, x_test, y_test_cat,
    optimizer=optimizers.Adam(), loss='categorical_crossentropy', batch_size=64, epochs=10)
log_experiment("ANN Baseline", 
               {"optimizer": "Adam", "batch_size": 64, "epochs": 10, "activation": "relu"}, 
               history, train_time, test_time, ann)

# STEP 1: Baseline SVM
print("\n# STEP 1: Baseline SVM")
svm_clf, train_time, test_time, acc = train_svm(x_train[:10000], y_train[:10000], x_test[:2000], y_test[:2000])
log_experiment("SVM Baseline", 
               {"kernel": "rbf", "train_samples": 10000, "test_samples": 2000}, 
               [acc], train_time, test_time)


# STEP 1: Baseline ANN


  super().__init__(**kwargs)



--- ANN Baseline ---
Hyperparameters: {'optimizer': 'Adam', 'batch_size': 64, 'epochs': 10, 'activation': 'relu'}


Total parameters: 101770
Training time (s): 24.383331298828125
Testing time (s): 0.4466679096221924
First 5 epochs accuracy: [0.9154833555221558, 0.9606833457946777, 0.972350001335144, 0.9785333275794983, 0.9831166863441467]
Final accuracy: 0.9941999912261963

# STEP 1: Baseline SVM

--- SVM Baseline ---
Hyperparameters: {'kernel': 'rbf', 'train_samples': 10000, 'test_samples': 2000}
Training time (s): 10.353143692016602
Testing time (s): 4.465569972991943
First 5 epochs accuracy: [0.946]
Final accuracy: 0.946


In [None]:

print("\n# STEP 2: CNN Baseline")
cnn = build_cnn(conv_layers=3, filters=[32,64,128], fc_layers=1, fc_units=[128], activation='relu')
history, train_time, test_time, test_acc = train_model(
    cnn, x_train, y_train_cat, x_test, y_test_cat,
    optimizer=optimizers.SGD(learning_rate=0.01, momentum=0.9), loss='categorical_crossentropy', batch_size=64, epochs=15)
log_experiment("CNN Baseline", 
               {"optimizer": "SGD", "lr": 0.01, "momentum": 0.9, "batch_size": 64, "epochs": 15, "activation": "relu"}, 
               history, train_time, test_time, cnn)


# STEP 2: CNN Baseline


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:

print("\n# STEP 3: Learning Rate Study")
for lr in [0.01, 0.001, 0.0001]:
    cnn = build_cnn(conv_layers=3, filters=[32,64,128], fc_layers=1, fc_units=[128], activation='relu')
    history, train_time, test_time, test_acc = train_model(
        cnn, x_train, y_train_cat, x_test, y_test_cat,
        optimizer=optimizers.SGD(learning_rate=lr, momentum=0.9), loss='categorical_crossentropy', batch_size=64, epochs=15)
    log_experiment(f"CNN LR={lr}", 
                   {"optimizer": "SGD", "lr": lr, "momentum": 0.9, "batch_size": 64, "epochs": 15, "activation": "relu"}, 
                   history, train_time, test_time, cnn)

In [None]:

print("\n# STEP 4: CNN + FC Variants")
for conv_layers, fc_layers, filters, fc_units in [
    (2, 1, [32,64], [128]),
    (3, 2, [32,64,128], [256,128]),
    (3, 1, [64,128,128], [256])
]:
    cnn = build_cnn(conv_layers=conv_layers, filters=filters, fc_layers=fc_layers, fc_units=fc_units, activation='relu')
    history, train_time, test_time, test_acc = train_model(
        cnn, x_train, y_train_cat, x_test, y_test_cat,
        optimizer=optimizers.SGD(learning_rate=0.01, momentum=0.9), loss='categorical_crossentropy', batch_size=64, epochs=15)
    log_experiment(f"CNN {conv_layers}Conv {fc_layers}FC", 
                   {"optimizer": "SGD", "lr": 0.01, "momentum": 0.9, "batch_size": 64, "epochs": 15, "activation": "relu",
                    "conv_layers": conv_layers, "fc_layers": fc_layers, "filters": filters, "fc_units": fc_units}, 
                   history, train_time, test_time, cnn)


In [None]:

print("\n# STEP 5: Batch Size Study")
for batch_size in [64, 128, 192]:
    cnn = build_cnn(conv_layers=3, filters=[32,64,128], fc_layers=1, fc_units=[128], activation='relu')
    history, train_time, test_time, test_acc = train_model(
        cnn, x_train, y_train_cat, x_test, y_test_cat,
        optimizer=optimizers.SGD(learning_rate=0.01, momentum=0.9), loss='categorical_crossentropy', batch_size=batch_size, epochs=15)
    log_experiment(f"CNN BatchSize={batch_size}", 
                   {"optimizer": "SGD", "lr": 0.01, "momentum": 0.9, "batch_size": batch_size, "epochs": 15, "activation": "relu"}, 
                   history, train_time, test_time, cnn)

In [None]:

print("\n# STEP 6: Activation Functions")
for activation in ['relu', 'sigmoid', 'tanh']:
    cnn = build_cnn(conv_layers=3, filters=[32,64,128], fc_layers=1, fc_units=[128], activation=activation)
    history, train_time, test_time, test_acc = train_model(
        cnn, x_train, y_train_cat, x_test, y_test_cat,
        optimizer=optimizers.SGD(learning_rate=0.01, momentum=0.9), loss='categorical_crossentropy', batch_size=64, epochs=15)
    log_experiment(f"CNN Activation={activation}", 
                   {"optimizer": "SGD", "lr": 0.01, "momentum": 0.9, "batch_size": 64, "epochs": 15, "activation": activation}, 
                   history, train_time, test_time, cnn)

# LeakyReLU as a special case
cnn = keras.Sequential([
    layers.Conv2D(32, (3,3), input_shape=(28,28,1)),
    layers.LeakyReLU(alpha=0.1),
    layers.Conv2D(64, (3,3)),
    layers.LeakyReLU(alpha=0.1),
    layers.Conv2D(128, (3,3)),
    layers.LeakyReLU(alpha=0.1),
    layers.MaxPooling2D((2,2)),
    layers.Flatten(),
    layers.Dense(128),
    layers.LeakyReLU(alpha=0.1),
    layers.Dense(10, activation='softmax')
])
history, train_time, test_time, test_acc = train_model(
    cnn, x_train, y_train_cat, x_test, y_test_cat,
    optimizer=optimizers.SGD(learning_rate=0.01, momentum=0.9), loss='categorical_crossentropy', batch_size=64, epochs=15)
log_experiment("CNN Activation=LeakyReLU", 
               {"optimizer": "SGD", "lr": 0.01, "momentum": 0.9, "batch_size": 64, "epochs": 15, "activation": "LeakyReLU"}, 
               history, train_time, test_time, cnn)


In [None]:

print("\n# STEP 7: Optimizers")
for opt_name, opt in [("Adam", optimizers.Adam()), ("RMSProp", optimizers.RMSprop()), ("SGD", optimizers.SGD(learning_rate=0.01, momentum=0.9))]:
    cnn = build_cnn(conv_layers=3, filters=[32,64,128], fc_layers=1, fc_units=[128], activation='relu')
    history, train_time, test_time, test_acc = train_model(
        cnn, x_train, y_train_cat, x_test, y_test_cat,
        optimizer=opt, loss='categorical_crossentropy', batch_size=64, epochs=15)
    log_experiment(f"CNN Optimizer={opt_name}", 
                   {"optimizer": opt_name, "batch_size": 64, "epochs": 15, "activation": "relu"}, 
                   history, train_time, test_time, cnn)

In [None]:

print("\n# STEP 8: Dropout Study")
for dropout_rate in [0.3, 0.5]:
    cnn = build_cnn(conv_layers=3, filters=[32,64,128], fc_layers=1, fc_units=[128], activation='relu', dropout=[1,3], dropout_rate=dropout_rate)
    history, train_time, test_time, test_acc = train_model(
        cnn, x_train, y_train_cat, x_test, y_test_cat,
        optimizer=optimizers.Adam(), loss='categorical_crossentropy', batch_size=64, epochs=15)
    log_experiment(f"CNN Dropout={dropout_rate}", 
                   {"optimizer": "Adam", "batch_size": 64, "epochs": 15, "activation": "relu", "dropout_rate": dropout_rate}, 
                   history, train_time, test_time, cnn)