# Convolutional Neural Networks

_**Building a Convolutional Neural Network (CNN) from Scratch on MNIST dataset.**_

In [18]:
# Imports required packages

import pickle
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

## Retrieving & Analysing Dataset

In [3]:
# Load MNIST dataset from a pickle dump

with open("data/keras_mnist.pickle", "rb") as f:
    mnist = pickle.load(f)

In [5]:
# Considering dataset is organized in tuple, items are referenced as follows
(X_train_full, y_train_full), (X_test, y_test) = mnist

In [None]:
# Check the shape of the datasets

print("Full training set shape:", <code here>)
print("Test set shape:", <code here>)

## Preprocessing

In [9]:
# Normalize the data between 0 and 1 for effective neural network model training
X_train_full = X_train_full / 255.
X_test = X_test / 255.

In [20]:
# Split train dataset further to seperate 5000 instances to be used as validation set
# Also, consider stratification during splitting.

X_train, X_val, y_train, y_val = <code here>

In [22]:
# To match the input shape of the CNN model, add a channel dimention to each dataset

X_train = X_train[..., np.newaxis]
X_val = X_val[..., np.newaxis]
X_test = X_test[..., np.newaxis]

In [None]:
# Check for the updated shape of the train set

<code here>

## Modeling

In [32]:
# Create CNN model by having convoluted, pooling, dropout and dense layer in the specified order.
# Each convoluted layer is further initialized with specific kernel size, padding, activation and initialization.

tf.random.set_seed(42)

model = tf.keras.Sequential([
    # Initialize a 2D convolutional layer with 32 filters, 3 as kernel size, "same" as padding,
    # "relu" activation function and "he_normal" as kernel initializer
    <code here>,
    
    # Initialize another 2D convolutional layer with 64 filters and all other parameters as those of the previous layer
    <code here>,
    
    # Initialize a 2D max pooling layer
    <code here>,
    
    # Initialize a "Flatten" layer
    <code here>,
    
    # Initialize a dropout layer with 0.25 as rate
    <code here>,
    
    # Initialize a "Dense" layer with 128 as output shape, "relu" as activation function and
    # "he_normal" as kernel initializer
    <code here>,
    
    # Initialize another dropout layer with 0.50 as rate
    <code here>,
    
    # Initialize a "Dense" layer specifying output shape and activation function according to the task
    <code here>
])

# Compile the model by specifying sparse categorical crossentropy as loss function,
# "nadam" as optimizer and "accuracy" as a metric

<code here>

In [None]:
# Fit the model by specifying training dataset, 1 epoch,
# validation data (tuple with features and labels)

<code here>

In [36]:
# Save the trained model for later reference
# NOTE: Make sure the folder "models" exists under the current working directory

model.save("./models/my_mnist_cnn_model.keras")

In [None]:
# Evaluate the model on test dataset
<code here>

**Note down the observed accuracy of this model.**

**Observations:**

Note down all your observations in green/blue book.