For P1 I use a CNN with the following key aspects:

- A CNN architecture with three convolutional layers, each followed by max pooling and optional dropout.
- Includes a fully connected layer with optional dropout before the final classification layer
- Trains the model using five different optimizers: SGD, AdaGrad, RMSProp, Nesterov, and Adam
- For each optimizer, tracks training loss, training accuracy, and validation accuracy over 100 epochs
- Uses a fixed learning rate of 0.001 and limits each epoch to 50 mini-batches with batch size of 128
- Implements a function to create models with configurable dropout rates, though in the final execution it sets dropout_rate=0
- Shuffles the training data before each epoch to improve training performance


There are 3 parts to my solution. 

- First I run all 5 optimizers without any dropout.
- Then I repeat with a dropout of 0.25 for all 5 optimizers. 
- Finally as an extension I pick the 2 best optimizers (in both validation accuracy and training loss) - Adam and RMSProp and vary dropout values from 0.1-0.3 to gauge the effects of dropout on training loss. 
- I discuss the outputs in the pdf attached to my solution. 


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.optimizers import Adam, RMSprop
from keras.datasets import cifar10
from tensorflow.keras.layers import Dropout

tf.compat.v1.enable_eager_execution()  # Enable eager execution
tf.config.run_functions_eagerly(True)


# Load CIFAR-10 dataset
(data_train, label_train), (data_test, label_test) = cifar10.load_data()
data_train, data_test = data_train / 255.0, data_test / 255.0  # Normalize


In [None]:
def create_cnn_dropout(dropout_rate=0.25):
    model = models.Sequential([
        layers.Conv2D(32, (5, 5), activation='relu', padding='same', input_shape=(32, 32, 3)),
        layers.MaxPooling2D((2, 2)),
        Dropout(dropout_rate),  # Added dropout

        layers.Conv2D(64, (5, 5), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        Dropout(dropout_rate),  # Added dropout

        layers.Conv2D(128, (5, 5), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        Dropout(dropout_rate),  # Added dropout

        layers.Flatten(),
        layers.Dense(1000, activation='relu'),
        Dropout(dropout_rate),  # Added dropout
        layers.Dense(10, activation='softmax')
    ])
    return model

def train_model(optimizer, dropout_rate, num_epochs=100, batch_size=128, validation_data=None):
    model = create_cnn_dropout(dropout_rate)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Initialize lists to store losses, training accuracies, and validation accuracies
    losses = []
    train_accuracies = []
    val_accuracies = []
    num_batches = 50  # Limit to 50 mini-batches per epoch

    for epoch in range(num_epochs):
        indices = np.arange(len(data_train))
        np.random.shuffle(indices)
        data_train_shuffled = data_train[indices]
        label_train_shuffled = label_train[indices]

        epoch_loss = []
        correct_predictions = 0
        total_predictions = 0

        for i in range(num_batches):
            start = i * batch_size
            end = start + batch_size
            batch_x = data_train_shuffled[start:end]
            batch_y = label_train_shuffled[start:end]

            loss, accuracy = model.train_on_batch(batch_x, batch_y)
            epoch_loss.append(loss)

            # Track training accuracy
            correct_predictions += accuracy * batch_size
            total_predictions += batch_size

        # Compute the average loss and training accuracy for this epoch
        losses.append(np.mean(epoch_loss))
        train_accuracies.append(correct_predictions / total_predictions)

        # Track validation accuracy
        val_loss, val_accuracy = model.evaluate(validation_data[0], validation_data[1], verbose=0)
        val_accuracies.append(val_accuracy)

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {losses[-1]:.4f}, Train Accuracy: {train_accuracies[-1]:.4f}, Validation Accuracy: {val_accuracies[-1]:.4f}")

    return losses, train_accuracies, val_accuracies


In [None]:
# All 5 optimizers without dropout

optimizers_dict = {
    "SGD": optimizers.SGD(learning_rate=0.001),
    "AdaGrad": optimizers.Adagrad(learning_rate=0.001),
    "RMSProp": optimizers.RMSprop(learning_rate=0.001),
    "Nesterov": optimizers.SGD(learning_rate=0.001, momentum=0.9, nesterov=True),
    "Adam": optimizers.Adam(learning_rate=0.001)
}

validation_data = (data_test, label_test)  # Validation set

# Train models and store losses, accuracies
num_epochs = 100
batch_size = 128
results_dict = {}

for opt_name, opt in optimizers_dict.items():
    print(f"Training with {opt_name}...")
    losses, train_accuracies, val_accuracies = train_model(opt, dropout_rate=0, num_epochs=num_epochs, batch_size=batch_size, validation_data=validation_data)
    results_dict[opt_name] = {'losses': losses, 'train_accuracies': train_accuracies, 'val_accuracies': val_accuracies}


In [None]:
# All 5 optimizers with dropout rates

optimizers_dict = {
    "SGD": optimizers.SGD(learning_rate=0.001),
    "AdaGrad": optimizers.Adagrad(learning_rate=0.001),
    "RMSProp": optimizers.RMSprop(learning_rate=0.001),
    "Nesterov": optimizers.SGD(learning_rate=0.001, momentum=0.9, nesterov=True),
    "Adam": optimizers.Adam(learning_rate=0.001)
}

validation_data = (data_test, label_test)  # Validation set

# Train models and store losses, accuracies
num_epochs = 100
batch_size = 128
results_dict = {}

for opt_name, opt in optimizers_dict.items():
    print(f"Training with {opt_name}...")
    losses, train_accuracies, val_accuracies = train_model(opt, dropout_rate=0.25, num_epochs=num_epochs, batch_size=batch_size, validation_data=validation_data)
    results_dict[opt_name] = {'losses': losses, 'train_accuracies': train_accuracies, 'val_accuracies': val_accuracies}



In [None]:
# Extract training losses from the results
losses_dict = {}

for opt_name, result in results_dict.items():
    losses_dict[opt_name] = result['losses']

# Plotting training losses for different optimizers
plt.figure(figsize=(10, 6))
for opt_name, losses in losses_dict.items():
    plt.plot(range(1, num_epochs + 1), losses, label=opt_name)

plt.title('Training Loss vs Epochs for Different Optimizers')
plt.xlabel('Epochs')
plt.ylabel('Training Loss')
plt.legend()
plt.grid(True)
plt.show()


# Extract train and validation accuracies from the results
train_accuracies_dict = {}
val_accuracies_dict = {}

for opt_name, result in results_dict.items():
    train_accuracies_dict[opt_name] = result['train_accuracies']
    val_accuracies_dict[opt_name] = result['val_accuracies']

# Plotting train accuracies for different optimizers
plt.figure(figsize=(10, 6))
for opt_name, train_accuracies in train_accuracies_dict.items():
    plt.plot(range(1, num_epochs + 1), train_accuracies, label=opt_name)

plt.title('Train Accuracy vs Epochs for Different Optimizers')
plt.xlabel('Epochs')
plt.ylabel('Train Accuracy')
plt.legend()
plt.grid(True)
plt.show()

# Plotting validation accuracies for different optimizers
plt.figure(figsize=(10, 6))
for opt_name, val_accuracies in val_accuracies_dict.items():
    plt.plot(range(1, num_epochs + 1), val_accuracies, label=opt_name)

plt.title('Validation Accuracy vs Epochs for Different Optimizers')
plt.xlabel('Epochs')
plt.ylabel('Validation Accuracy')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
#Varying dropout values for Adam and RMSProp

optimizers_dict = {
    "Adam": optimizers.Adam(learning_rate=0.001),
    "RMSProp": optimizers.RMSprop(learning_rate=0.001)
}

dropout_values = [0.1, 0.15, 0.2, 0.25, 0.3]

# Train models and store losses
num_epochs = 100
batch_size = 128
losses_dict = {}

for opt_name, optimizer_class in [("Adam", Adam), ("RMSprop", RMSprop)]:
    for dropout in [0.1, 0.15, 0.2, 0.25, 0.3]:
        print(f"Training with {opt_name} optimizer and dropout {dropout}...")
        key = f"{opt_name}_dropout_{dropout}"
        opt = optimizer_class()
        losses_dict[key] = train_model(opt, dropout, num_epochs, batch_size)

In [None]:
# Plot Adam and RMSProp with 5 different dropout values

# Create a figure with two subplots
plt.figure(figsize=(12, 5))

# First chart: Adam optimizer with different dropout rates
plt.subplot(1, 2, 1)
dropout_rates = [0.1, 0.15, 0.2, 0.25, 0.3]

for dropout in dropout_rates:
    losses = losses_dict[f"Adam_dropout_{dropout}"]  # Get loss values
    plt.plot(range(len(losses)), losses, marker='o', label=f"Dropout {dropout}")

plt.xlabel("Epochs")
plt.ylabel("Training Loss")
plt.title("Adam Optimizer - Training Loss vs Epochs")
plt.legend()
plt.grid()

# Second chart: RMSprop optimizer with different dropout rates
plt.subplot(1, 2, 2)

for dropout in dropout_rates:
    losses = losses_dict[f"RMSprop_dropout_{dropout}"]  # Get loss values
    plt.plot(range(len(losses)), losses, marker='o', label=f"Dropout {dropout}")

plt.xlabel("Epochs")
plt.ylabel("Training Loss")
plt.title("RMSprop Optimizer - Training Loss vs Epochs")
plt.legend()
plt.grid()

plt.tight_layout()
plt.show()
