In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
import time

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the data
x_train = x_train / 255.0
x_test = x_test / 255.0

# Convert labels to one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [2]:
def create_model():
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    return model

In [3]:
# Hyperparameters
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128]
optimizers = {
    'SGD': SGD,
    'Adam': Adam,
    'RMSprop': RMSprop
}

In [4]:
results = {}

for opt_name, opt_func in optimizers.items():
    for lr in learning_rates:
        for batch_size in batch_sizes:
            print(f"Training with {opt_name}, Learning Rate: {lr}, Batch Size: {batch_size}")

            # Create a fresh model for each combination
            model = create_model()

            # Compile the model with the chosen optimizer and learning rate
            optimizer = opt_func(learning_rate=lr)
            model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

            # Track training time
            start_time = time.time()

            # Train the model
            history = model.fit(x_train, y_train, epochs=10, batch_size=batch_size, validation_data=(x_test, y_test), verbose=0)

            # Calculate total training time
            training_time = time.time() - start_time

            # Record the final test accuracy and training time
            test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)

            # Save the results
            results[(opt_name, lr, batch_size)] = {
                'accuracy': test_accuracy,
                'training_time': training_time
            }

            print(f"Test Accuracy: {test_accuracy:.4f}, Training Time: {training_time:.2f} seconds\n")

Training with SGD, Learning Rate: 0.001, Batch Size: 32


  super().__init__(**kwargs)


Test Accuracy: 0.9136, Training Time: 43.57 seconds

Training with SGD, Learning Rate: 0.001, Batch Size: 64
Test Accuracy: 0.8896, Training Time: 22.73 seconds

Training with SGD, Learning Rate: 0.001, Batch Size: 128
Test Accuracy: 0.8393, Training Time: 13.86 seconds

Training with SGD, Learning Rate: 0.01, Batch Size: 32
Test Accuracy: 0.9636, Training Time: 39.02 seconds

Training with SGD, Learning Rate: 0.01, Batch Size: 64
Test Accuracy: 0.9512, Training Time: 21.36 seconds

Training with SGD, Learning Rate: 0.01, Batch Size: 128
Test Accuracy: 0.9318, Training Time: 15.43 seconds

Training with SGD, Learning Rate: 0.1, Batch Size: 32
Test Accuracy: 0.9800, Training Time: 46.40 seconds

Training with SGD, Learning Rate: 0.1, Batch Size: 64
Test Accuracy: 0.9762, Training Time: 26.81 seconds

Training with SGD, Learning Rate: 0.1, Batch Size: 128
Test Accuracy: 0.9721, Training Time: 14.67 seconds

Training with Adam, Learning Rate: 0.001, Batch Size: 32
Test Accuracy: 0.9773, T

In [5]:
# Print the results in a structured way
for params, result in results.items():
    opt_name, lr, batch_size = params
    accuracy = result['accuracy']
    training_time = result['training_time']
    print(f"Optimizer: {opt_name}, Learning Rate: {lr}, Batch Size: {batch_size}")
    print(f"Accuracy: {accuracy:.4f}, Training Time: {training_time:.2f} seconds")
    print('-' * 40)

Optimizer: SGD, Learning Rate: 0.001, Batch Size: 32
Accuracy: 0.9136, Training Time: 43.57 seconds
----------------------------------------
Optimizer: SGD, Learning Rate: 0.001, Batch Size: 64
Accuracy: 0.8896, Training Time: 22.73 seconds
----------------------------------------
Optimizer: SGD, Learning Rate: 0.001, Batch Size: 128
Accuracy: 0.8393, Training Time: 13.86 seconds
----------------------------------------
Optimizer: SGD, Learning Rate: 0.01, Batch Size: 32
Accuracy: 0.9636, Training Time: 39.02 seconds
----------------------------------------
Optimizer: SGD, Learning Rate: 0.01, Batch Size: 64
Accuracy: 0.9512, Training Time: 21.36 seconds
----------------------------------------
Optimizer: SGD, Learning Rate: 0.01, Batch Size: 128
Accuracy: 0.9318, Training Time: 15.43 seconds
----------------------------------------
Optimizer: SGD, Learning Rate: 0.1, Batch Size: 32
Accuracy: 0.9800, Training Time: 46.40 seconds
----------------------------------------
Optimizer: SGD, 