In [1]:
import os
import tensorflow as tf
from models import create_model
from data_loader import load_dataset
import datetime

tf.config.run_functions_eagerly(True)
print("Eager execution:", tf.executing_eagerly())

Eager execution: True


In [2]:
%pwd

'/Users/rahulshelke/Documents/Data-Science/Hands-on DL/optimizer_model_comparison'

In [3]:
%ls

README.md                     requirements.txt
[34m__pycache__[m[m/                  [34mresults[m[m/
data_loader.py                run_experiments.py
models.py                     train.py
optimizers_comparision.ipynb


## Load Datasets

In [4]:
def load_dataset(name):
    if name == 'mnist':
        data = tf.keras.datasets.mnist
    elif name == 'fashion_mnist':
        data = tf.keras.datasets.fashion_mnist
    elif name == 'cifar10':
        data = tf.keras.datasets.cifar10
    else:
        raise ValueError("Unsupported dataset")

    (x_train, y_train), (x_test, y_test) = data.load_data()

    if name == 'cifar10':
        x_train = x_train.astype('float32') / 255.0
        x_test = x_test.astype('float32') / 255.0
    else:
        x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
        x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

    return (x_train, y_train), (x_test, y_test)

## Models

In [5]:
def create_model(size='small', input_shape=(28, 28, 1), num_classes=10):
    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=input_shape))

    if size == 'small':
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(64, activation='relu'))

    elif size == 'medium':
        model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu'))
        model.add(tf.keras.layers.MaxPooling2D((2, 2)))
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(128, activation='relu'))

    elif size == 'large':
        model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu'))
        model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(tf.keras.layers.MaxPooling2D((2, 2)))
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(256, activation='relu'))
        model.add(tf.keras.layers.Dropout(0.5))

    model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
    return model

## Training Loop

In [6]:
def train_model(model_size, dataset_name, optimizer, epochs=5, batch_size=64):
    (x_train, y_train), (x_test, y_test) = load_dataset(dataset_name)
    input_shape = x_train.shape[1:]
    num_classes = 10

    model = create_model(model_size, input_shape, num_classes)

    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    log_dir = f"results/{dataset_name}/{model_size}/{type(optimizer).__name__}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    os.makedirs(log_dir, exist_ok=True)

    callbacks = [tf.keras.callbacks.TensorBoard(log_dir=log_dir)]

    history = model.fit(
        x_train, y_train,
        validation_data=(x_test, y_test),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=callbacks,
        verbose=2
    )

    return history


In [8]:
datasets = ['mnist', 
# 'fashion_mnist', 'cifar10'
]
model_sizes = ['small', 'medium', 'large']
optimizers = {
    'SGD': tf.keras.optimizers.SGD(),
    'Adam': tf.keras.optimizers.Adam(),
    'RMSprop': tf.keras.optimizers.RMSprop()
}

In [9]:
for dataset in datasets:
    for size in model_sizes:
        for name, opt in optimizers.items():
            print(f"\n--- Training {size} model on {dataset} with {name} ---")
            train_model(model_size=size, dataset_name=dataset, optimizer=opt)



--- Training small model on mnist with SGD ---
Epoch 1/5




938/938 - 13s - 14ms/step - accuracy: 0.7749 - loss: 0.9092 - val_accuracy: 0.8823 - val_loss: 0.4626
Epoch 2/5
938/938 - 13s - 14ms/step - accuracy: 0.8874 - loss: 0.4205 - val_accuracy: 0.9024 - val_loss: 0.3568
Epoch 3/5
938/938 - 12s - 13ms/step - accuracy: 0.9018 - loss: 0.3525 - val_accuracy: 0.9107 - val_loss: 0.3184
Epoch 4/5
938/938 - 13s - 14ms/step - accuracy: 0.9099 - loss: 0.3186 - val_accuracy: 0.9165 - val_loss: 0.2933
Epoch 5/5
938/938 - 13s - 14ms/step - accuracy: 0.9161 - loss: 0.2965 - val_accuracy: 0.9217 - val_loss: 0.2752

--- Training small model on mnist with Adam ---
Epoch 1/5
938/938 - 23s - 24ms/step - accuracy: 0.9035 - loss: 0.3446 - val_accuracy: 0.9426 - val_loss: 0.1967
Epoch 2/5
938/938 - 23s - 24ms/step - accuracy: 0.9532 - loss: 0.1633 - val_accuracy: 0.9601 - val_loss: 0.1355
Epoch 3/5
938/938 - 23s - 24ms/step - accuracy: 0.9653 - loss: 0.1202 - val_accuracy: 0.9679 - val_loss: 0.1112
Epoch 4/5
938/938 - 23s - 25ms/step - accuracy: 0.9721 - loss: 0.

ValueError: Unknown variable: <Variable path=sequential_3/conv2d/kernel, shape=(3, 3, 1, 32), dtype=float32, value=[[[[-0.0365007   0.06048135 -0.07531459  0.10370916 -0.03047556
    -0.05668736  0.03010792 -0.00141899 -0.04018178  0.06333803
     0.09014285  0.07875232 -0.10099072 -0.05712081  0.10191517
    -0.04997095 -0.03953873 -0.01535632 -0.09141612  0.08703452
     0.06845456  0.00731577 -0.13980752 -0.05907621  0.08032005
     0.13709901  0.05532143 -0.00927801  0.07934175  0.02873382
     0.02481079 -0.05120631]]

  [[ 0.09291896  0.08649303 -0.04074316 -0.02634504  0.06421356
     0.12940721  0.00873372 -0.01456688  0.11392339  0.09660241
    -0.05467994 -0.13137269 -0.06201024 -0.00159287 -0.12166966
    -0.01512639 -0.05282332  0.04221871  0.06175859  0.01525134
     0.01895292  0.09232108 -0.01249762 -0.03818327  0.13394
     0.01080692 -0.09308334  0.01048559  0.12741838  0.09904006
     0.04582287 -0.03895862]]

  [[ 0.13946585  0.03910677 -0.05318601  0.07292618 -0.10659006
    -0.12931542  0.08710434  0.12538274  0.14114581 -0.04693397
    -0.13414535  0.02772212  0.10298681 -0.06816351  0.07025009
     0.1260746   0.08001469  0.11125375 -0.02608828  0.1396107
     0.00357078  0.01626024 -0.0419817   0.1405565   0.03836386
    -0.10866959 -0.10035387 -0.07813276  0.03364372 -0.02012745
    -0.04511588  0.0748855 ]]]


 [[[-0.14016123  0.10638186  0.02948447  0.01747715  0.06348343
    -0.11677802 -0.09612179 -0.11994577 -0.11724668 -0.09495579
     0.01101701 -0.07292092  0.0358789  -0.08887976 -0.02549186
    -0.11131738 -0.02702641 -0.00101821  0.03792478 -0.07629908
    -0.0204911   0.07556634 -0.04486237 -0.09463128  0.06699964
    -0.06515035  0.0253907   0.07717456  0.09661847 -0.12815166
    -0.10718116 -0.10198037]]

  [[ 0.00343317 -0.10098533 -0.02115221  0.01811533  0.00350368
     0.01815014 -0.13917243  0.00329325  0.06890252  0.09737836
    -0.00254345  0.03471664 -0.04743788 -0.11546031 -0.00090601
     0.12170793  0.13601501 -0.1383567   0.1350741  -0.1270166
     0.12081595  0.07223494 -0.08998577 -0.0290405   0.09675342
    -0.11455603 -0.12504067  0.111237   -0.12740462 -0.13104273
    -0.04000282  0.11229102]]

  [[ 0.10394953 -0.00692518  0.03977486 -0.10951854  0.03724456
    -0.08706939 -0.02455226  0.10682812 -0.10034259 -0.13823654
     0.12680934 -0.06394278  0.07625768 -0.08415132 -0.10647617
    -0.07055053 -0.1402529  -0.1330841   0.10917391 -0.12814495
    -0.09701509 -0.0204363  -0.00040621 -0.10446695 -0.06368624
     0.11621825  0.13233422 -0.03603976 -0.08563142  0.04037911
     0.05221632  0.00189762]]]


 [[[ 0.0842912   0.12093179 -0.05599216 -0.12439288 -0.04704648
     0.0882809  -0.09302069 -0.12376064 -0.0644763  -0.04908863
     0.00375658  0.04916847 -0.04128387 -0.1406442  -0.02328955
    -0.07000051 -0.02965058  0.14122944 -0.06264848 -0.02334123
     0.04940762 -0.12988926 -0.06837405 -0.07479462 -0.11974157
     0.07993235  0.09453593 -0.05633543  0.08967848 -0.06954814
    -0.10618822 -0.03571987]]

  [[-0.05307053  0.02361514 -0.06699911  0.04019293  0.02828597
     0.04502682  0.00246765 -0.04661048  0.12103494 -0.13784818
     0.04627357  0.07132663 -0.14192829 -0.06052811 -0.03641836
     0.033815    0.03533253 -0.1180431   0.01522003 -0.09068704
     0.02514271  0.13442887 -0.09614097  0.10363698 -0.05780794
     0.00116789  0.02642125  0.07582401  0.08124721  0.02052763
    -0.0468666   0.04167472]]

  [[ 0.0983697   0.05026636 -0.0234452   0.07169898  0.11658277
     0.008578   -0.07501841 -0.02274552  0.08629097 -0.0601477
     0.10916607 -0.11128302 -0.00936538  0.10984631 -0.0867821
    -0.07505108  0.08355117 -0.13511677  0.03705859  0.03168108
     0.10961105  0.00837871  0.08665125 -0.07688557  0.08686048
    -0.10510501 -0.07239729 -0.0940551  -0.10961169 -0.01869382
     0.07587084 -0.09531008]]]]>. This optimizer can only be called for the variables it was originally built with. When working with a new set of variables, you should recreate a new optimizer instance.