# Optimizer Benchmark: The Nimbus 3000

This notebook benchmarks various optimizers on the character recognition CNN model.

## Import Libraries

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model, clone_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

## Load Data

In [None]:
data_dir = '../datasets/images'

datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

## Define Optimizers

In [None]:
optimizers = {
    'SGD': tf.keras.optimizers.SGD(learning_rate=0.01),
    'Adagrad': tf.keras.optimizers.Adagrad(learning_rate=0.01),
    'RMSprop': tf.keras.optimizers.RMSprop(learning_rate=0.001),
    'Adam': tf.keras.optimizers.Adam(learning_rate=0.001),
    'AdamW': tf.keras.optimizers.AdamW(learning_rate=0.001, weight_decay=0.004)
}

## Benchmark Function

In [None]:
def build_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    return model

results = {}

for name, optimizer in optimizers.items():
    print(f"Training with {name}...")
    model = build_model()
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    history = model.fit(
        train_generator,
        epochs=10,
        validation_data=validation_generator,
        verbose=0
    )
    
    results[name] = {
        'history': history.history,
        'final_accuracy': history.history['val_accuracy'][-1],
        'final_loss': history.history['val_loss'][-1]
    }

## Results Analysis

In [None]:
# Plot comparison
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
for name, result in results.items():
    plt.plot(result['history']['val_accuracy'], label=name)
plt.title('Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
for name, result in results.items():
    plt.plot(result['history']['val_loss'], label=name)
plt.title('Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# Print final results
print("Final Validation Results:")
for name, result in results.items():
    print(f"{name}: Accuracy={result['final_accuracy']:.4f}, Loss={result['final_loss']:.4f}")

## Commentary

Based on the results:

- Adam and AdamW typically perform well due to adaptive learning rates.
- SGD may require more epochs or learning rate scheduling.
- Adagrad adapts learning rates but may slow down over time.
- RMSprop is good for non-stationary objectives.

The best optimizer depends on the dataset and model complexity. In this case, [best optimizer] showed the highest accuracy.