# Experimentations
## Experimentations on MNIST dataset

## Instructions

- ~Create and checkout a branch for your work (`git checkout -b YOUR_BRANCH_NAME`)~
- ~Create a copy of this template and name it however you want (e.g. `YOUR_NAME.ipynb`).~
- ~Change the title according to what scenario you are testing.~
- ~In the "involution" part, replace the existing model with the relevant model.~
- ~Create as many cells as you deem necessary for the experiments (explain your use case, comparison with convolution or other networks, results, etc.)~
- Remove this cell and every "TODO" comment.
- When you are done, commit your changes. Make sure you are only committing changes on the file you created! (you can check with `git status`)
- Push your changes on the repo (`git push --set-upstream origin YOUR_BRANCH_NAME`).
- Create a pull request to the `main` branch so that everyone can read your code before we merge it.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import time

from involution import Involution
import tensorflow as tf
import keras
from keras.datasets import mnist
from tensorflow.keras import layers

import visualkeras # for NN visualisation

## Utility function to train models and display results

In [None]:
def train_model(model, x_train, x_test, y_train, y_test, batch_size, epochs):
  # Trains the model
  # Putting the output of model.fit in a variable "history" gives access to information on accuracy and loss
  start = time.time()
  history = model.fit(x=x_train, 
            y=y_train, 
            batch_size=batch_size,
            epochs=epochs, 
            validation_data=(x_test, y_test),
            verbose=1)
  end = time.time()
  execution_time = end - start

  return history, execution_time

In [None]:
def display_results(history):
  fig, ax = plt.subplots(1,2, figsize=(15,8))

  ax[0].plot(history.history['accuracy'], label='training accuracy')
  ax[0].plot(history.history['val_accuracy'], label='validation accuracy')
  ax[0].grid()
  ax[0].legend()
  ax[0].set_title('Accuracy vs. Epochs')
  ax[0].set_xlabel('# Epochs')

  ax[1].plot(history.history['loss'], label='training loss')
  ax[1].plot(history.history['val_loss'], label='validation loss')
  ax[1].grid()
  ax[1].legend()
  ax[1].set_title('Loss vs. Epochs')
  ax[1].set_xlabel('# Epochs')

  plt.show()

## MNIST Dataset

In [None]:
(x_raw_train_mnist, y_train_mnist), (x_raw_test_mnist, y_test_mnist) = mnist.load_data() # downloads the MNIST dataset (handwritten numbers)

# Scales images to the [0,1] range and expands the dimensions so that it has shape (28, 28, 1)
x_train_mnist = np.expand_dims(x_raw_train_mnist.astype("float32") / 255, -1) 
x_test_mnist = np.expand_dims(x_raw_test_mnist.astype("float32") / 255, -1)

In [None]:
fig = plt.figure
id_img = np.random.randint(0, len(x_train_mnist)) # takes a random image from the dataset

plt.imshow(x_raw_train_mnist[id_img], cmap='gray') # imshow an image from the dataset
plt.suptitle("Representation of a "+str(y_train_mnist[id_img])+" in MNIST Dataset")
plt.show()

## Convolution

CNN inspired by Lecun et al in http://yann.lecun.com/exdb/publis/pdf/lecun-iscas-10.pdf.

Model available at https://keras.io/examples/vision/mnist_convnet/

### Convolution model

In [None]:
num_classes = 10

cnn = keras.Sequential( # definition of the CNN
    [
        keras.Input(shape=(28, 28, 1)), # images of size 28x28 pixels, with only 1 channel (greyscale)
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), # convolution layer: dot products with the weights, and activation function
        layers.MaxPooling2D(pool_size=(2, 2)), # pooling layer: downsampling, reduces the size of the representation
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(), # flatten layer: converts the data into a 1-D feature vector to feed it to the final layers
        layers.Dropout(0.5), # dropout layer: drops part of the data to avoid overfitting
        layers.Dense(num_classes, activation="softmax"), # dense layer: computes the result
    ]
)

cnn.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

cnn.summary()

In [None]:
visualkeras.layered_view(cnn, draw_volume=False, legend=True)

In [None]:
batch_size = 128
epochs = 10

cnn_history, cnn_time = train_model(cnn, x_train_mnist, x_test_mnist, y_train_mnist, y_test_mnist, batch_size, epochs)
print("The classification accuracy for the CNN %.2f"%(100*np.max(cnn_history.history['val_accuracy']))+" %")

In [None]:
display_results(cnn_history)

## Involution

In [None]:
inn = keras.Sequential( # definition of the CNN
    [
        keras.Input(shape=(28, 28, 1)), # images of size 28x28 pixels, with only 1 channel (greyscale)
        Involution(channel=3,group_number=1,kernel_size=3,stride=1,reduction_ratio=2),
        layers.MaxPooling2D(pool_size=(2, 2)), # pooling layer: downsampling, reduces the size of the representation
        Involution(channel=3,group_number=1,kernel_size=3,stride=1,reduction_ratio=2),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(), # flatten layer: converts the data into a 1-D feature vector to feed it to the final layers
        layers.Dropout(0.5), # dropout layer: drops part of the data to avoid overfitting
        layers.Dense(num_classes, activation="softmax"), # dense layer: computes the result
    ]
)

inn.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)


In [None]:
batch_size = 128
epochs = 10

inn_history, inn_time = train_model(inn, x_train_mnist, x_test_mnist, y_train_mnist, y_test_mnist, batch_size, epochs)
print("The classification accuracy for the INN %.2f"%(100*np.max(inn_history.history['val_accuracy']))+" %")

In [None]:
inn.summary()
visualkeras.layered_view(inn, draw_volume=False, legend=True)

In [None]:
display_results(inn_history)