**Imports**

In [84]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import math

**Helper Functions**

In [85]:
class Utils:
    def __init__(self):
        self.visualize = Visualize()#class "Visualize" is now a attribute of class "Utils"
        
class Visualize:
        def pixels_to_image(self,df_array,indx):
            #this style of doctring commentin is called google-style doctring
            """
            Displays 28x28 grayscale image from raw datframe

            Args:
                df_array(panda dataframe): data of labels and features
            
            Returns:
                None
            """
            image = df_array.drop(columns = ["label"]).iloc[indx].to_numpy().reshape(28,28)
            plt.imshow(image,cmap = "gray")
            plt.axis("off")
            plt.tight_layout()
            plt.show()
        def display_images(self,df_array,number_of_images):
            """
            diplays the first n-images in the dataset from raw data

            Args:
                df_array(pandas array): data of labels and features
                number_of_images(int): number of images to be displayed
            
            Returns:
                None
            """
            cols = 5
            rows = math.ceil(number_of_images/cols)
            _, ax = plt.subplots(rows,cols)
            ax = ax.flatten()
            df_features = df_array.drop(columns = ["label"])

            for i in range(number_of_images):
                image = df_features.iloc[i].to_numpy().reshape(28,28)
                ax[i].imshow(image, cmap = "gray")
                #0 -> black 255 -> black
                ax[i].axis("off")
            plt.tight_layout()
            plt.show()
    

**About the Data** 
<pre>    mnist_train.csv contains pixel intensity data of 60,000 28x28 grayscale images  
    column represents intensity of  pixels  
    row represent 28x28 pixel intensities of the image flattened into a row 
</pre> 

In [86]:
df_train = pd.read_csv("data\mnist_train.csv")
df_test = pd.read_csv("data\mnist_test.csv")
utils = Utils()
# utils.visualize.display_images(df_train, 20)

**Train-Validation Split and Feature Scaling**

In [87]:
from sklearn.model_selection import train_test_split
x = df_train.drop(columns = ["label"]).to_numpy()
y = df_train["label"].to_numpy()
x_train, x_cv, y_train, y_cv = train_test_split(x, y, test_size = 0.2, random_state = 99)

x_train = x_train.reshape(-1,28,28,1)#1 -> grayscale, -1=> numpy will figure out the dimension
x_cv = x_cv.reshape(-1,28,28,1)

# min-max normalisation
x_train = tf.convert_to_tensor(x_train.astype("float32")/255)
x_cv = tf.convert_to_tensor(x_cv.astype("float32")/255)
x_test = df_test.drop(columns = ["label"]).to_numpy().astype("float32")
x_test = x_test/255

x_test = x_test.reshape(-1, 28, 28, 1)
x_test = tf.convert_to_tensor(x_test)

y_train = tf.convert_to_tensor(y_train)
y_cv = tf.convert_to_tensor(y_cv)
y_test = tf.convert_to_tensor(df_test["label"].to_numpy())

**Building the network**

In [88]:
from keras.layers import Conv2D, Dense, Flatten, MaxPool2D
from keras import Sequential
from keras.losses import SparseCategoricalCrossentropy
from keras.regularizers import l2
model = Sequential([
    Conv2D(filters = 32, kernel_size = (3,3), strides = 1, activation = "relu"),
    MaxPool2D(pool_size = (2,2), strides = 2),
    Conv2D(filters = 64, kernel_size = (3,3), strides = 1, activation = "relu"),
    MaxPool2D(pool_size = (2,2), strides = 2),
    Flatten(),
    Dense(units = 25, activation = "relu", kernel_regularizer = l2(0.001)),
    Dense(units = 15, activation = "relu", kernel_regularizer = l2(0.001)),
    Dense(units = 10, activation = "linear", kernel_regularizer = l2(0.001)),
])
model(x_train)
model.compile(optimizer = "adam", loss = SparseCategoricalCrossentropy(from_logits = True))

**Training and Validating the model**

In [91]:
model.fit(x_train, y_train, epochs = 100)
logits = model.predict(x_train)
y_pred_class = np.argmax(logits, axis = 1)
train_err = np.mean(y_pred_class != y_train)

logits = model.predict(x_cv)
y_pred_class = np.argmax(logits, axis = 1)
cv_err = np.mean(y_pred_class != y_cv)

print(f"training error = {train_err*100}")
print(f"cv error = {cv_err*100}")

Epoch 1/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - loss: 0.0847
Epoch 2/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - loss: 0.0818
Epoch 3/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 9ms/step - loss: 0.0754
Epoch 4/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 19ms/step - loss: 0.0720
Epoch 5/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 18ms/step - loss: 0.0687
Epoch 6/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 7ms/step - loss: 0.0661
Epoch 7/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - loss: 0.0659
Epoch 8/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - loss: 0.0608
Epoch 9/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - loss: 0.0575
Epoch 10/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━

**Cross-Validation Results**
<pre>     Training error : 0.1083%
     Validation error : 1.033%</pre>

In [94]:
logits = model.predict(x_test)
y_pred_class = np.argmax(logits, axis = 1)
test_acc = np.mean(y_pred_class == y_test)
print(f"Test accuracy = {test_acc*100}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Test accuracy = 99.1


**Results**
<pre>   Test Accuracy = 99.1%</pre>