In [1]:
import numpy as np
import matplotlib.pyplot as plt
import mlflow
from mlflow import log_metric, log_param
import warnings
warnings.filterwarnings('ignore')

In [2]:
mlflow.set_tracking_uri("http://127.0.0.1:8000")
experiment_name = "MNIST Models"
mlflow.set_experiment(experiment_name)

2024/04/21 09:51:32 INFO mlflow.tracking.fluent: Experiment with name 'MNIST Models' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/452122521761317738', creation_time=1713673292188, experiment_id='452122521761317738', last_update_time=1713673292188, lifecycle_stage='active', name='MNIST Models', tags={}>

In [3]:
"""### Load & Prepare MNIST Dataset"""
from tensorflow import keras
(X_train, Y_train), (X_test, Y_test) = keras.datasets.mnist.load_data()
num_classes = 10
x_train = X_train.reshape(60000, 784)
x_test = X_test.reshape(10000, 784)
x_train = x_train.astype('float16') / 255
x_test = x_test.astype('float16') / 255
y_train = keras.utils.to_categorical(Y_train, num_classes)
y_test = keras.utils.to_categorical(Y_test, num_classes)

2024-04-21 09:51:32.598508: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
from keras import layers

In [5]:
# Model 1: Simple neural network with two hidden layers
with mlflow.start_run(run_name="Model 1"):
    log_param("hidden_layers", [20,20])
    log_param("learning_rate", 0.001)
    log_param("optimizer", "Adam")
    log_param("regularization", "None")
    log_param("batch_size", 32)

    model = keras.Sequential([
        layers.Dense(20, activation='sigmoid', input_shape=(784,)),
        layers.Dense(20, activation='sigmoid'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))
    
    train_loss = history.history['loss'][-1]
    train_accuracy = history.history['accuracy'][-1]
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_accuracy'][-1]

    log_metric('train_loss', train_loss)
    log_metric('train_accuracy', train_accuracy)
    log_metric('val_loss', val_loss)
    log_metric('val_accuracy', val_accuracy)

2024-04-21 09:51:36.346086: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-21 09:51:36.346978: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.5774 - loss: 1.5324 - val_accuracy: 0.8964 - val_loss: 0.4386
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9052 - loss: 0.3826 - val_accuracy: 0.9191 - val_loss: 0.2843
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9264 - loss: 0.2662 - val_accuracy: 0.9312 - val_loss: 0.2374
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9385 - loss: 0.2197 - val_accuracy: 0.9398 - val_loss: 0.2099
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - accuracy: 0.9459 - loss: 0.1938 - val_accuracy: 0.9432 - val_loss: 0.1943
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9499 - loss: 0.1758 - val_accuracy: 0.9455 - val_loss: 0.1860
Epoch 7/10
[1m1

In [6]:
# Model 2: Bigger Model
with mlflow.start_run(run_name="Model 2"):
    log_param("hidden_layers", [256,128])
    log_param("learning_rate", 0.001)
    log_param("optimizer", "Adam")
    log_param("regularization", "None")
    log_param("batch_size", 32)

    model = keras.Sequential([
        layers.Dense(256, activation='sigmoid', input_shape=(784,)),
        layers.Dense(128, activation='sigmoid'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))
    
    train_loss = history.history['loss'][-1]
    train_accuracy = history.history['accuracy'][-1]
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_accuracy'][-1]

    log_metric('train_loss', train_loss)
    log_metric('train_accuracy', train_accuracy)
    log_metric('val_loss', val_loss)
    log_metric('val_accuracy', val_accuracy)

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.8179 - loss: 0.6883 - val_accuracy: 0.9444 - val_loss: 0.1916
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.9477 - loss: 0.1763 - val_accuracy: 0.9588 - val_loss: 0.1286
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.9674 - loss: 0.1084 - val_accuracy: 0.9673 - val_loss: 0.1046
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.9773 - loss: 0.0784 - val_accuracy: 0.9659 - val_loss: 0.1076
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9820 - loss: 0.0586 - val_accuracy: 0.9744 - val_loss: 0.0801
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9876 - loss: 0.0396 - val_accuracy: 0.9786 - val_loss: 0.0675
Epoch 7/10


In [7]:
from keras import regularizers
# Model 3: L2 Regularization
with mlflow.start_run(run_name="Model 3"):
    log_param("hidden_layers", [256,128])
    log_param("learning_rate", 0.001)
    log_param("optimizer", "Adam")
    log_param("regularization", "L2")
    log_param("batch_size", 32)

    model = keras.Sequential([
        layers.Dense(256, activation='sigmoid', input_shape=(784,), kernel_regularizer=regularizers.l2(0.01)),
        layers.Dense(128, activation='sigmoid', kernel_regularizer=regularizers.l2(0.01)),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=20, validation_data=(x_test, y_test))
    
    train_loss = history.history['loss'][-1]
    train_accuracy = history.history['accuracy'][-1]
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_accuracy'][-1]

    log_metric('train_loss', train_loss)
    log_metric('train_accuracy', train_accuracy)
    log_metric('val_loss', val_loss)
    log_metric('val_accuracy', val_accuracy)

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.5612 - loss: 2.4032 - val_accuracy: 0.7766 - val_loss: 1.3754
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.7612 - loss: 1.3375 - val_accuracy: 0.7799 - val_loss: 1.1977
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.7814 - loss: 1.2026 - val_accuracy: 0.7567 - val_loss: 1.1696
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.7862 - loss: 1.1385 - val_accuracy: 0.7849 - val_loss: 1.0807
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.7869 - loss: 1.1015 - val_accuracy: 0.8189 - val_loss: 1.0393
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.7941 - loss: 1.0704 - val_accuracy: 0.7725 - val_loss: 1.0699
Epoch 7/20


In [8]:
# Model 4: Dropout
with mlflow.start_run(run_name="Model 4"):
    log_param("hidden_layers", [256, 128])
    log_param("learning_rate", 0.001)
    log_param("optimizer", "Adam")
    log_param("regularization", "Dropout")
    log_param("batch_size", 32)

    model = keras.Sequential([
        layers.Dense(256, activation='sigmoid', input_shape=(784,)),
        layers.Dropout(0.5),
        layers.Dense(128, activation='sigmoid'),
        layers.Dropout(0.5),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))
    
    train_loss = history.history['loss'][-1]
    train_accuracy = history.history['accuracy'][-1]
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_accuracy'][-1]

    log_metric('train_loss', train_loss)
    log_metric('train_accuracy', train_accuracy)
    log_metric('val_loss', val_loss)
    log_metric('val_accuracy', val_accuracy)

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.6555 - loss: 1.0640 - val_accuracy: 0.9257 - val_loss: 0.2482
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9035 - loss: 0.3271 - val_accuracy: 0.9465 - val_loss: 0.1732
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9277 - loss: 0.2461 - val_accuracy: 0.9564 - val_loss: 0.1402
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9414 - loss: 0.1994 - val_accuracy: 0.9638 - val_loss: 0.1171
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9489 - loss: 0.1721 - val_accuracy: 0.9682 - val_loss: 0.1074
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9540 - loss: 0.1532 - val_accuracy: 0.9716 - val_loss: 0.0924
Epoch 7/10
[1m

In [9]:
# Model 5: Early Stopping
with mlflow.start_run(run_name="Model 5"):
    log_param("hidden_layers", [256,128])
    log_param("learning_rate", 0.001)
    log_param("optimizer", "Adam")
    log_param("regularization", "None")
    log_param("batch_size", 32)

    model = keras.Sequential([
        layers.Dense(256, activation='sigmoid', input_shape=(784,)),
        layers.Dense(128, activation='sigmoid'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.01, patience=2)
    history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), callbacks=[es])
    
    train_loss = history.history['loss'][-1]
    train_accuracy = history.history['accuracy'][-1]
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_accuracy'][-1]

    log_metric('train_loss', train_loss)
    log_metric('train_accuracy', train_accuracy)
    log_metric('val_loss', val_loss)
    log_metric('val_accuracy', val_accuracy)

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.8164 - loss: 0.6887 - val_accuracy: 0.9440 - val_loss: 0.1854
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9494 - loss: 0.1692 - val_accuracy: 0.9607 - val_loss: 0.1261
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9673 - loss: 0.1082 - val_accuracy: 0.9691 - val_loss: 0.0988
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9779 - loss: 0.0739 - val_accuracy: 0.9763 - val_loss: 0.0797
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9841 - loss: 0.0525 - val_accuracy: 0.9762 - val_loss: 0.0725
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9883 - loss: 0.0383 - val_accuracy: 0.9777 - val_loss: 0.0726


In [10]:
# Model 6: Using High LR
with mlflow.start_run(run_name="Model 6"):
    log_param("hidden_layers", [20,10])
    log_param("learning_rate", 10.0)
    log_param("optimizer", "SGD")
    log_param("regularization", "None")
    log_param("batch_size", 32)

    model = keras.Sequential([
        layers.Dense(20, activation='sigmoid', input_shape=(784,)),
        layers.Dense(10, activation='sigmoid'),
        layers.Dense(10, activation='softmax')
    ])
    opt_new = keras.optimizers.SGD(learning_rate=10.0)
    model.compile(optimizer=opt_new, loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=20, validation_data=(x_test, y_test))
    
    train_loss = history.history['loss'][-1]
    train_accuracy = history.history['accuracy'][-1]
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_accuracy'][-1]

    log_metric('train_loss', train_loss)
    log_metric('train_accuracy', train_accuracy)
    log_metric('val_loss', val_loss)
    log_metric('val_accuracy', val_accuracy)

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.1338 - loss: 2.4156 - val_accuracy: 0.3657 - val_loss: 2.5588
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.3311 - loss: 1.8843 - val_accuracy: 0.3640 - val_loss: 1.6194
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.4770 - loss: 1.5479 - val_accuracy: 0.6407 - val_loss: 1.1260
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.6609 - loss: 1.1361 - val_accuracy: 0.7622 - val_loss: 0.7883
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7187 - loss: 0.9749 - val_accuracy: 0.7684 - val_loss: 0.8418
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7507 - loss: 0.9143 - val_accuracy: 0.7818 - val_loss: 0.8430
Epoch 7/20
[1m1

In [11]:
# Model 7: Using Low LR
with mlflow.start_run(run_name="Model 7"):
    log_param("hidden_layers", [20,10])
    log_param("learning_rate", 0.00001)
    log_param("optimizer", "SGD")
    log_param("regularization", "None")
    log_param("batch_size", 32)

    model = keras.Sequential([
        layers.Dense(20, activation='sigmoid', input_shape=(784,)),
        layers.Dense(10, activation='sigmoid'),
        layers.Dense(10, activation='softmax')
    ])
    opt_new = keras.optimizers.SGD(learning_rate=.00001)
    model.compile(optimizer=opt_new, loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=20, validation_data=(x_test, y_test))
    
    train_loss = history.history['loss'][-1]
    train_accuracy = history.history['accuracy'][-1]
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_accuracy'][-1]

    log_metric('train_loss', train_loss)
    log_metric('train_accuracy', train_accuracy)
    log_metric('val_loss', val_loss)
    log_metric('val_accuracy', val_accuracy)

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.1058 - loss: 2.4867 - val_accuracy: 0.1028 - val_loss: 2.4926
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.1050 - loss: 2.4859 - val_accuracy: 0.1028 - val_loss: 2.4891
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.1043 - loss: 2.4835 - val_accuracy: 0.1028 - val_loss: 2.4857
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.1037 - loss: 2.4818 - val_accuracy: 0.1028 - val_loss: 2.4823
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.1044 - loss: 2.4743 - val_accuracy: 0.1028 - val_loss: 2.4790
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.1029 - loss: 2.4720 - val_accuracy: 0.1028 - val_loss: 2.4758
Epoch 7/20
[1m1

In [12]:
# Model 8: Using Optimal LR
with mlflow.start_run(run_name="Model 8"):
    log_param("hidden_layers", [20,10])
    log_param("learning_rate", 0.01)
    log_param("optimizer", "SGD")
    log_param("regularization", "None")
    log_param("batch_size", 32)

    model = keras.Sequential([
        layers.Dense(20, activation='sigmoid', input_shape=(784,)),
        layers.Dense(10, activation='sigmoid'),
        layers.Dense(10, activation='softmax')
    ])
    opt_new = keras.optimizers.SGD(learning_rate=.01)
    model.compile(optimizer=opt_new, loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=20, validation_data=(x_test, y_test))
    
    train_loss = history.history['loss'][-1]
    train_accuracy = history.history['accuracy'][-1]
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_accuracy'][-1]

    log_metric('train_loss', train_loss)
    log_metric('train_accuracy', train_accuracy)
    log_metric('val_loss', val_loss)
    log_metric('val_accuracy', val_accuracy)

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.1827 - loss: 2.3045 - val_accuracy: 0.3911 - val_loss: 2.1875
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.3953 - loss: 2.1447 - val_accuracy: 0.4525 - val_loss: 1.9531
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.4531 - loss: 1.8854 - val_accuracy: 0.5088 - val_loss: 1.6388
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.5325 - loss: 1.5760 - val_accuracy: 0.6008 - val_loss: 1.3654
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6228 - loss: 1.3105 - val_accuracy: 0.6835 - val_loss: 1.1413
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6933 - loss: 1.1064 - val_accuracy: 0.7340 - val_loss: 0.9734
Epoch 7/20
[1m1

In [13]:
# Model 9: LR with Momentum
with mlflow.start_run(run_name="Model 9"):
    log_param("hidden_layers", [20,10])
    log_param("learning_rate", 0.01)
    log_param("optimizer", "SGD with Momentum")
    log_param("regularization", "None")
    log_param("batch_size", 32)

    model = keras.Sequential([
        layers.Dense(20, activation='sigmoid', input_shape=(784,)),
        layers.Dense(10, activation='sigmoid'),
        layers.Dense(10, activation='softmax')
    ])
    opt_new = keras.optimizers.SGD(learning_rate=.01, momentum=0.5)
    model.compile(optimizer=opt_new, loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=20, validation_data=(x_test, y_test))
    
    train_loss = history.history['loss'][-1]
    train_accuracy = history.history['accuracy'][-1]
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_accuracy'][-1]

    log_metric('train_loss', train_loss)
    log_metric('train_accuracy', train_accuracy)
    log_metric('val_loss', val_loss)
    log_metric('val_accuracy', val_accuracy)

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.2643 - loss: 2.2269 - val_accuracy: 0.5561 - val_loss: 1.8228
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.5932 - loss: 1.6673 - val_accuracy: 0.7200 - val_loss: 1.2275
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7409 - loss: 1.1398 - val_accuracy: 0.8072 - val_loss: 0.8903
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8121 - loss: 0.8448 - val_accuracy: 0.8471 - val_loss: 0.6960
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8447 - loss: 0.6757 - val_accuracy: 0.8647 - val_loss: 0.5790
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8668 - loss: 0.5626 - val_accuracy: 0.8768 - val_loss: 0.5049
Epoch 7/20
[1m1

In [14]:
# Model 10: Mini-batch SGD
with mlflow.start_run(run_name="Model 10"):
    log_param("hidden_layers", [20,10])
    log_param("learning_rate", 0.01)
    log_param("optimizer", "SGD with Momentum")
    log_param("regularization", "None")
    log_param("batch_size", 512)

    model = keras.Sequential([
        layers.Dense(20, activation='sigmoid', input_shape=(784,)),
        layers.Dense(10, activation='sigmoid'),
        layers.Dense(10, activation='softmax')
    ])
    opt_new = keras.optimizers.SGD(learning_rate=.01, momentum=0.5)
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(x_train, y_train, batch_size=512, epochs=10, validation_data=(x_test, y_test))
    
    train_loss = history.history['loss'][-1]
    train_accuracy = history.history['accuracy'][-1]
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_accuracy'][-1]

    log_metric('train_loss', train_loss)
    log_metric('train_accuracy', train_accuracy)
    log_metric('val_loss', val_loss)
    log_metric('val_accuracy', val_accuracy)

Epoch 1/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.1926 - loss: 2.2673 - val_accuracy: 0.4629 - val_loss: 1.9775
Epoch 2/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4970 - loss: 1.9110 - val_accuracy: 0.6240 - val_loss: 1.6955
Epoch 3/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6553 - loss: 1.6315 - val_accuracy: 0.7372 - val_loss: 1.4284
Epoch 4/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7356 - loss: 1.3767 - val_accuracy: 0.7766 - val_loss: 1.1920
Epoch 5/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7756 - loss: 1.1520 - val_accuracy: 0.8184 - val_loss: 0.9992
Epoch 6/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8133 - loss: 0.9648 - val_accuracy: 0.8458 - val_loss: 0.8376
Epoch 7/10
[1m118/118[0m 

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [16]:
# Define the model variants
models = [
    {"name": "Model 1", "hidden_layers": [20, 20], "learning_rate": 0.01, "optimizer": "Adam", "regularization": None, "batch_size": 32},
    {"name": "Model 2", "hidden_layers": [256, 128], "learning_rate": 0.01, "optimizer": "SGD", "regularization": None, "batch_size": 32},
    {"name": "Model 3", "hidden_layers": [256, 128], "learning_rate": 0.01, "optimizer": "Adam", "regularization": "L2", "batch_size": 32},
    {"name": "Model 4", "hidden_layers": [256, 128], "learning_rate": 0.01, "optimizer": "Adam", "regularization": None, "batch_size": 32, "dropout": 0.7},
    {"name": "Model 5", "hidden_layers": [256, 128], "learning_rate": 0.01, "optimizer": "SGD", "regularization": None, "batch_size": 32, "early_stopping": True},
    {"name": "Model 6", "hidden_layers": [20, 20], "learning_rate": 10.0, "optimizer": "Adam", "regularization": None, "batch_size": 32},
    {"name": "Model 7", "hidden_layers": [20, 20], "learning_rate": 0.00001, "optimizer": "SGD", "regularization": None, "batch_size": 32},
    {"name": "Model 8", "hidden_layers": [20, 20], "learning_rate": 0.01, "optimizer": "SGD", "regularization": None, "batch_size": 32},
    {"name": "Model 9", "hidden_layers": [20, 20], "learning_rate": 0.01, "optimizer": "SGD", "regularization": None, "batch_size": 32, "momentum": 0.5},
    {"name": "Model 10", "hidden_layers": [20, 20], "learning_rate": 0.01, "optimizer": "Adam", "regularization": None, "batch_size": 512}
]

In [17]:
# Create a nested run for each model variant
with mlflow.start_run(run_name="Superset MNIST") as parent_run:
    for model_config in models:
        run_name = model_config["name"]
        print(f"Running {run_name}") 
        with mlflow.start_run(run_name=run_name, nested=True) as child_run:
            # Create model
            model = Sequential()
            for i, units in enumerate(model_config["hidden_layers"]):
                if i == 0:
                    model.add(Dense(units, activation='sigmoid', input_shape=(784,)))
                else:
                    model.add(Dense(units, activation='sigmoid'))
            model.add(Dense(10, activation='softmax'))
            
            # Compile model
            optimizer = keras.optimizers.SGD(learning_rate=model_config["learning_rate"])
            if model_config["optimizer"] == "SGD":
                optimizer = keras.optimizers.SGD(learning_rate=model_config["learning_rate"])
            elif model_config["optimizer"] == "Adam":
                optimizer = keras.optimizers.Adam(learning_rate=model_config["learning_rate"])
            model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
            
            # Log parameters
            mlflow.log_param("hidden_layers", model_config["hidden_layers"])
            mlflow.log_param("learning_rate", model_config["learning_rate"])
            mlflow.log_param("optimizer", model_config["optimizer"])
            mlflow.log_param("regularization", model_config["regularization"])
            mlflow.log_param("batch_size", model_config["batch_size"])
            
            # Train model
            history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), batch_size=model_config["batch_size"])
            
            # Log metrics
            mlflow.log_metric("train_loss", history.history["loss"][-1])
            mlflow.log_metric("train_accuracy", history.history["accuracy"][-1])
            mlflow.log_metric("val_loss", history.history["val_loss"][-1])
            mlflow.log_metric("val_accuracy", history.history["val_accuracy"][-1])

Running Model 1
Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8091 - loss: 0.6637 - val_accuracy: 0.9308 - val_loss: 0.2308
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9351 - loss: 0.2152 - val_accuracy: 0.9342 - val_loss: 0.2182
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9436 - loss: 0.1919 - val_accuracy: 0.9404 - val_loss: 0.2080
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9478 - loss: 0.1767 - val_accuracy: 0.9389 - val_loss: 0.2107
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9503 - loss: 0.1672 - val_accuracy: 0.9427 - val_loss: 0.2023
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9522 - loss: 0.1603 - val_accuracy: 0.9410 - val_loss: 0.2063
