## *Libraries*

In [23]:
# LIBRARIES NEEDED
# %pip install numpy
# %pip install matplotlib
# %pip install tensorflow
# %pip install scikit-learn

# Libraries
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.activations import relu, softmax

# Used to prepare the data
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## *Loading and splitting the data* 

In [24]:
# Loading the data
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Combining training and test data
x_original = np.concatenate((x_train, x_test), axis=0)
y_original = np.concatenate((y_train, y_test), axis=0)

print("x shape:", x_original.shape)
print("y shape:", y_original.shape)

x shape: (70000, 28, 28)
y shape: (70000,)


*Devide the data into 3 sets (training, cross validation and test) to compare each model performance later*

In [25]:
# Get 60% of the dataset as the training set. 
# Put the remaining 40% in temporary variables: x_ and y_.

x_train, x_, y_train, y_ = train_test_split(x_original, y_original, test_size = 0.3, random_state=1)

# Split the 40% subset above into two: 
# one half for cross validation and the other for the test set

x_dev, x_test, y_dev, y_test = train_test_split(x_, y_, test_size = 0.33, random_state=1)

del x_, y_

print(f"the shape of the training set (input) is: {x_train.shape}")
print(f"the shape of the training set (target) is: {y_train.shape}\n")
print(f"the shape of the cross validation set (input) is: {x_dev.shape}")
print(f"the shape of the cross validation set (target) is: {y_dev.shape}\n")
print(f"the shape of the test set (input) is: {x_test.shape}")
print(f"the shape of the test set (target) is: {y_test.shape}")

the shape of the training set (input) is: (49000, 28, 28)
the shape of the training set (target) is: (49000,)

the shape of the cross validation set (input) is: (14070, 28, 28)
the shape of the cross validation set (target) is: (14070,)

the shape of the test set (input) is: (6930, 28, 28)
the shape of the test set (target) is: (6930,)


*Normalize the data to improve performance and avoid biased weights*

In [26]:
# Normalize the data
x_train_normalized = tf.keras.utils.normalize(x_train, axis = 1)
x_dev_normalized = tf.keras.utils.normalize(x_dev, axis = 1)
x_test_normalized = tf.keras.utils.normalize(x_test, axis = 1)

## *Models implementation and training*

In [27]:
# MODELS IMPLEMENTATION

# for consistent results, 
# If you remove it, the results will not be reproducible 
# setting the random seed is crucial for consistent outcomes across different runs.
tf.random.set_seed(1234)

# Large model with 3 layers 
model1 = Sequential(
    [
        Flatten(input_shape=(28, 28)),
        Dense(units = 128, activation = 'relu', name = 'L1'),
        Dense(units = 128, activation = 'relu', name = 'L2'),
        Dense(units = 10, activation = 'softmax', name = 'L3')
    ], name = 'Model_1'
)

# Medium model with 3 layers
model2 = Sequential(
    [
        Flatten(input_shape=(28, 28)),
        Dense(units = 25, activation = 'relu', name = 'L1'),
        Dense(units = 15, activation = 'relu', name = 'L2'),
        Dense(units = 10, activation = 'softmax', name = 'L3'),
    ], name = 'Model_2'
)

# Small model with 2 layers
model3 = Sequential(
    [
        Flatten(input_shape=(28, 28)),
        Dense(units = 128, activation = 'relu', name = 'L1'),
        Dense(units = 10, activation = 'softmax', name = 'L3')
    ], name = 'Model_3'
)

# List that store all the models to evaluate them later
models = [model1, model2, model3]

  super().__init__(**kwargs)


In [28]:
# Compiling all models
for model in models:

    # Setup the loss and optimizer
    model.compile(
        loss = tf.keras.losses.SparseCategoricalCrossentropy(),
        optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001)
    )

    print(f"Training {model.name}...")

    # Train the model
    model.fit(
        x_train_normalized, y_train,
        epochs = 60,
        verbose = 0   # Silent mode, no output during training.
    )
    
    print("Done!\n")

Training Model_1...
Done!

Training Model_2...
Done!

Training Model_3...
Done!



## *Evaluating the models* 

*First compare the training error with the cross validation error and choose the one with the smallest cross validation error but also  avoiding overfitting in case the training error is to small comparing with the cross validation error*

In [29]:
# Initialize lists that will contain the errors for each model
nn_train_errors = []
nn_cv_errors = []

# Calculate the errors
for model in models:
    # Calculate the training error and store it
    train_predictions = model.predict(x_train_normalized)
    train_accuracy = accuracy_score(y_train, train_predictions.argmax(axis=1))
    nn_train_errors.append(1 - train_accuracy)

    # Calculate the cross-validation error and store it
    dev_predictions = model.predict(x_dev_normalized)
    dev_accuracy = accuracy_score(y_dev, dev_predictions.argmax(axis=1))
    nn_cv_errors.append(1 - dev_accuracy)

# Comparing the errors for all models
print("\nRESULTS:")

for idx, (train_error, dev_error) in enumerate(zip(nn_train_errors, nn_cv_errors), 1):
    print(f"Model {idx}: Training Error: {train_error:.2%}, Cross validation Error: {dev_error:.2%}")

[1m1532/1532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m440/440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1532/1532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m440/440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1532/1532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m440/440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step

RESULTS:
Model 1: Training Error: 0.06%, Cross validation Error: 2.51%
Model 2: Training Error: 0.93%, Cross validation Error: 5.47%
Model 3: Training Error: 0.00%, Cross validation Error: 2.74%


*Even when the cross-validation error of model 3 is the smallest, the perfect training error indicates an issue of overfitting. This is why model 1 emerges as the optimal choice.*

## *Chosing a model*

*For illustrative purposes in this notebook we test all the models with the test set, but in practice this can be done only with the chossed model based on performance*

In [30]:
for model in models:

    # Predict class probabilities for the training set using models
    predictions = model.predict(x_test_normalized)

    # Get the predicted class labels
    yhat = np.argmax(predictions, axis=1)

    # Initialize counter for misclassified data
    misclassified = 0

    for i in range (len(predictions)):
        # Check if it matches the true labels
        if yhat[i] != y_test[i]:
            
            # Add one to the counter if the prediction is wrong
            misclassified += 1

    # Compute the fraction of the data that the model misclassified
    fraction_error = misclassified / len(predictions)
    
    print(f"fraction of misclassified data for {model.name}: {fraction_error}")


[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
fraction of misclassified data for Model_1: 0.023520923520923522
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
fraction of misclassified data for Model_2: 0.05800865800865801
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
fraction of misclassified data for Model_3: 0.028715728715728715


*As expected model 1 show the best performance in the test set*

In [32]:
# Saving the best model based on the performance over the test set
# uncomment the line below to save the model

#model1.save('M1_digit_rec.keras')