In [None]:
#2.1 Loading the MNIST dataset

import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow.keras import optimizers


(train_ds, test_ds), ds_info = tfds.load('mnist', split=['train', 'test'], as_supervised=True, with_info=True)
#tfds.show_examples(train_ds , ds_info)

# 2.2 Setting up the data pipeline

def data_pipeline(input, batch_size=64, prefetch_buffer_size=4):
    # Map the dataset to extract images and labels
    input =input.map(lambda image, label: (image, label))
    # Reshape each image to a flat vector
    input = input.map(lambda image, label: (tf.reshape(image, (-1,)), label))
    # Normalize(Scale) image values to be in the range [-1, 1]
    input = input.map(lambda image, label: ((tf.cast(image, tf.float32) / 128) - 1, label))
    # One-hot encode the labels
    input = input.map(lambda image, label: (image, tf.one_hot(label, depth=10)))
    # Shuffle the dataset and create batches of size 4
    input = input.shuffle(1024).batch(batch_size)
    # Prefetch the dataset to improve pipeline performance
    input = input.prefetch(prefetch_buffer_size)
    return input

# Save the datasets after applying the data pipeline
train_dataset = data_pipeline(train_ds)
test_dataset = data_pipeline(test_ds)

for elem in train_dataset.take(1):
    print(elem)

for elem in test_dataset.take(1):
    print(elem)


# 2.3 Building a deep neural network with TensorFlow
class MLPModel(tf.keras.Model):
    def __init__(self, layer_sizes, output_size=10):
        super().__init__()
        self.mlp_layers = []  # create a list to store hidden layers

        # Create hidden layers with ReLU activation
        for layer_size in layer_sizes:
            new_layer = layers.Dense(units=layer_size, activation='relu')
            self.mlp_layers.append(new_layer)
        # Output layer with softmax activation for classification
        self.output_layer = layers.Dense(units=output_size, activation='softmax')

    def call(self, x):
        # Forward pass through hidden layers
        for mlp_layer in self.mlp_layers:
            x = mlp_layer(x)
        # Forward pass through the output layer
        y = self.output_layer(x)
        return y

# 2.4 Training the network
"""
Define a training loop function which receives
• The number of epochs
• The model object
• The training dataset
• The test dataset
• The loss function
• The optimizer
• Different arrays for the different values you want to track for visualization
"""


def train_model(num_epochs, model, train_dataset, test_dataset, loss_function, optimizer):
    train_losses = []
    test_losses = []
    train_accuracies = []  # Track training accuracy
    test_accuracies = []   # Track testing accuracy

    for epoch in range(num_epochs):
        epoch_train_losses = []
        correct_train_predictions = 0  # Counter for correct training predictions
        total_train_samples = 0

        # Training phase
        for x_train, target_train in train_dataset:
            with tf.GradientTape() as tape:
                # Forward pass
                pred_train = model(x_train)
                # Calculate the training loss
                loss_train = loss_function(target_train, pred_train)

            # Calculate gradients
            gradients_train = tape.gradient(loss_train, model.trainable_variables)

            # Update weights using optimizer
            optimizer.apply_gradients(zip(gradients_train, model.trainable_variables))

            # Append the training loss to the list
            epoch_train_losses.append(loss_train.numpy())

            # Calculate training accuracy
            correct_train_predictions += np.sum(np.argmax(target_train, axis=1) == np.argmax(pred_train, axis=1))
            total_train_samples += len(x_train)

        train_accuracy = correct_train_predictions / total_train_samples
        train_accuracies.append(train_accuracy)

        # Testing phase
        epoch_test_losses = []
        correct_test_predictions = 0  # Counter for correct testing predictions
        total_test_samples = 0

        for x_test, target_test in test_dataset:
            # Forward pass
            pred_test = model(x_test)
            # Calculate the testing loss
            loss_test = loss_function(target_test, pred_test)
            epoch_test_losses.append(loss_test.numpy())

            # Calculate testing accuracy
            correct_test_predictions += np.sum(np.argmax(target_test, axis=1) == np.argmax(pred_test, axis=1))
            total_test_samples += len(x_test)

        test_accuracy = correct_test_predictions / total_test_samples
        test_accuracies.append(test_accuracy)

        # Print the mean training and testing loss and accuracy for the epoch
        mean_train_loss = np.mean(epoch_train_losses)
        mean_test_loss = np.mean(epoch_test_losses)
        print(f"Epoch {epoch + 1},\n Train Loss: {mean_train_loss}, Train Accuracy: {train_accuracy},\n Test Loss: {mean_test_loss}, Test Accuracy: {test_accuracy}")

        # Append losses to the lists for visualization
        train_losses.append(mean_train_loss)
        test_losses.append(mean_test_loss)

    return train_losses, test_losses, train_accuracies, test_accuracies

# Example usage
EPOCHS = 10
model = MLPModel(layer_sizes=[256, 256], output_size=10)

# Loss function and optimizer
cce = tf.keras.losses.CategoricalCrossentropy()
sgd = tf.keras.optimizers.legacy.SGD(learning_rate=0.1)

# Assuming train_dataset and test_dataset are your TensorFlow datasets
train_losses, test_losses, train_accuracies, test_accuracies = train_model(
    EPOCHS, model, train_dataset, test_dataset, cce, sgd
)


# 2. 5 Visualization
def visualization(train_losses , train_accuracies , test_losses , test_accuracies):
    """
    Visualizes accuracy and loss for training and test data using the mean of each epoch.
    Loss is displayed in a regular line, accuracy in a dotted line.
    Training data is displayed in blue, test data in red. Parameters
    ----------
    train_losses : numpy.ndarray
    training losses train_accuracies : numpy.ndarray
    training accuracies test_losses : numpy.ndarray
    test losses
    test_accuracies : numpy.ndarray
    test accuracies
    """
    plt.figure()
    line1, = plt.plot(train_losses, "b-")
    line2, = plt.plot(test_losses, "r-")
    line3, = plt.plot(train_accuracies, "b:")
    line4, = plt.plot(test_accuracies, "r:")
    plt.xlabel("Training steps")
    plt.ylabel("Loss/Accuracy")
    plt.legend((line1, line2, line3, line4), ("training loss", "test loss", "train accuracy", "test accuracy"))
    plt.show()


visualization(train_losses , train_accuracies , test_losses , test_accuracies)

# hyperparameters
learning_rates = [0.01, 0.1, 0.5]
momentums = [0.5, 0.9]
batch_sizes = [32, 64, 128, 256]
layer_configurations = [[256, 256], [128, 128], [512, 256, 128]]
optimizers_list = [optimizers.SGD, optimizers.Adam, optimizers.RMSprop]

# Record results
experiment_results = []


for lr in learning_rates:
    for momentum in momentums:
        for batch_size in batch_sizes:
            for layer_sizes in layer_configurations:
                for opt in optimizers_list:
                    print(f"Running experiment with LR: {lr}, Momentum: {momentum}, Batch Size: {batch_size}, Layer Sizes: {layer_sizes}, Optimizer: {opt.__name__}")

                    train_dataset = data_pipeline(train_ds, batch_size)
                    test_dataset = data_pipeline(test_ds, batch_size)

                    model = MLPModel(layer_sizes)

                    if opt == optimizers.SGD:
                        optimizer = opt(learning_rate=lr, momentum=momentum)
                    else:
                        optimizer = opt(learning_rate=lr)

                    cce = tf.keras.losses.CategoricalCrossentropy()

                    train_losses, test_losses, train_accuracies, test_accuracies = train_model(
                        EPOCHS, model, train_dataset, test_dataset, cce, optimizer
                    )

                    experiment_results.append({
                        'lr': lr,
                        'momentum': momentum,
                        'batch_size': batch_size,
                        'layer_sizes': layer_sizes,
                        'optimizer': opt.__name__,
                        'train_losses': train_losses,
                        'test_losses': test_losses,
                        'train_accuracies': train_accuracies,
                        'test_accuracies': test_accuracies
                    })


results_df = pd.DataFrame(experiment_results)

def plot_perhyper(hyperparam, method):
    unique_vals = results_df[hyperparam].unique()
    plt.figure(figsize=(12, 6))
    for i in unique_vals:
        subset = results_df[results_df[hyperparam] == i] 
        mean_metric = subset.groupby('epoch')[method].mean()  
        plt.plot(mean_metric, label=f"{hyperparam}:{i}")  
        
    plt.ylabel(method)
    plt.xlabel('Epochs')
    plt.title('Effect of Adjusting the hyperparameters of our model with different hyperparameters and methods')
    plt.legend()
    plt.show()

    
plot_perhyper('lr','test_accuracies')
methods = ['train_losses', 'test_losses', 'train_accuracies', 'test_accuracies']
for i in methods:
    plot_perhyper('lr', i)


results_df = pd.DataFrame(experiment_results)
print("Summary Statistics:")
print(results_df[['train_losses', 'test_losses', 'train_accuracies', 'test_accuracies']].describe())
# Best accuracy
best_accuracy = results_df.sort_values(by='test_accuracies', ascending=False).head(1)
print("\nBest Accuracy Configuration:")
print(best_accuracy)

# Lowest loss
lowest_loss = results_df.sort_values(by='test_losses').head(1)
print("\nLowest Loss Configuration:")
print(lowest_loss)
# Plotting Test Losses for different configurations
plt.figure(figsize=(12, 6))
for index, row in results_df.iterrows():
    label = f"LR: {row['lr']}, Momentum: {row['momentum']}, Batch: {row['batch_size']}, Layer Sizes: {row['layer_sizes']}"
    plt.plot(row['test_losses'], label=label)
plt.xlabel('Epochs')
plt.ylabel('Test Loss')
plt.title('Test Loss per Epoch for Different Configurations')
plt.legend()
plt.show()

# Similarly, you can plot for accuracies
correlation_matrix = results_df[['lr', 'momentum', 'batch_size', 'train_losses', 'test_losses', 'train_accuracies', 'test_accuracies']].corr()
print("\nCorrelation Matrix:")
print(correlation_matrix)
