# Example on the MNIST Digits Dataset

### 1. Import of the modules and the dataset

In [1]:
from neural_network import *
from keras.datasets import mnist

2023-08-30 16:09:15.310264: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### 2. Load the MNIST dataset

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

### 3. Build the neural network

In [3]:
net = NeuralNetwork(
    Normalization(samples=X_train),
    Reshape(output_shape=(1, 28, 28)),
    Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2),
    BatchNorm2d(6),
    Tanh(),
    AvgPool2d(kernel_size=(2, 2), stride=(2, 2)),
    Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0),
    BatchNorm2d(16),
    Tanh(),
    AvgPool2d(kernel_size=(2, 2), stride=(2, 2)),
    Flatten(),
    Linear(in_features=400, out_features=120),
    Tanh(),
    Linear(in_features=120, out_features=84),
    Tanh(),
    Linear(in_features=84, out_features=10),
    OutputLayer(activation_function="softmax", loss_function="categorical_cross_entropy")
)
# LeNet-5 alike architecture (see reference: http://d2l.ai/chapter_convolutional-modern/alexnet.html)
print(net)

NeuralNetwork:
 (0) Normalization(norm=255.0, dtype=float32)
 (1) Reshape(output_shape=(1, 28, 28))
 (2) Conv2d(in_channels=1, out_channels=6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), optimizer=Adam(lr=0.001, lr_decay=0.0, beta1=0.9, beta2=0.999, eps=1e-08), initialization=xavier)
 (3) BatchNorm2d(6, eps=1e-05, momentum=0.1)
 (4) Tanh()
 (5) AvgPool2d(kernel_size=(2, 2), stride=(2, 2))
 (6) Conv2d(in_channels=6, out_channels=16, kernel_size=(5, 5), stride=(1, 1), padding=(0, 0), optimizer=Adam(lr=0.001, lr_decay=0.0, beta1=0.9, beta2=0.999, eps=1e-08), initialization=xavier)
 (7) BatchNorm2d(16, eps=1e-05, momentum=0.1)
 (8) Tanh()
 (9) AvgPool2d(kernel_size=(2, 2), stride=(2, 2))
 (10) Flatten()
 (11) Linear(in_features=400, out_features=120, optimizer=Adam(lr=0.001, lr_decay=0.0, beta1=0.9, beta2=0.999, eps=1e-08), initialization=xavier)
 (12) Tanh()
 (13) Linear(in_features=120, out_features=84, optimizer=Adam(lr=0.001, lr_decay=0.0, beta1=0.9, beta2=0.999, eps=1e-08), ini

### 4. Choose the number of samples to train on

In [4]:
# Select N samples to train on:
N = 3000
X_train, y_train = X_train[:N], y_train[:N]
X_train.shape, y_train.shape

((3000, 28, 28), (3000,))

### 5. Train the neural network

In [5]:
net.fit(X_train, y_train, epochs=5, batch_size=64, shuffle=True)

Training on 3000 samples:
Epoch    1 of 5    	 Average Error = 0.803294 	 Average Accuracy = 77.43%                                   
Epoch    2 of 5    	 Average Error = 0.314118 	 Average Accuracy = 91.60%                                   
Epoch    3 of 5    	 Average Error = 0.229122 	 Average Accuracy = 94.00%                                   
Epoch    4 of 5    	 Average Error = 0.175644 	 Average Accuracy = 95.50%                                   
Epoch    5 of 5    	 Average Error = 0.133440 	 Average Accuracy = 96.80%                                   
Training time : 00 hours, 00 minutes, 37 seconds


### 6. Make predictions on the test samples

In [6]:
# Prediction on the 10,000 test samples:
y_pred = net.predict(X_test, to="labels")
# Shows the first 10 predicted labels vs the true labels:
y_pred[:10], y_test[:10]

(array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9]),
 array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9], dtype=uint8))

### 7. Evaluate the accuracy score

In [7]:
print(f"Accuracy score on the test set: {accuracy_score(y_test, y_pred):.2%}")

Accuracy score on the test set: 94.19%


### 8. Build a confusion matrix

In [8]:
cm = confusion_matrix(y_test, y_pred)
print(f"Confusion matrix:\n{cm}")

Confusion matrix:
[[ 967    0    1    0    0    1    4    1    6    0]
 [   0 1113    3    2    0    1    5    0   10    1]
 [   7    1  968    7    8    1    5   16   15    4]
 [   0    0   21  922    0   40    1   16    6    4]
 [   1    4    4    0  896    1   11    3    3   59]
 [  12    1    0   11    1  839    8    3    9    8]
 [  16    3    3    0    8   10  913    0    5    0]
 [   0    5   39    2    2    0    0  955    0   25]
 [  12    2    5    5    7    7    3    8  897   28]
 [   5    5    3    6    6    9    0   18    8  949]]


### 9. Displays a classification report

In [9]:
print(classification_report(cm, formatted=True))

Class: 0
Precision: 94.80%
Recall: 98.67%
F1-Score: 96.70%
Support: 980

Class: 1
Precision: 98.15%
Recall: 98.06%
F1-Score: 98.10%
Support: 1135

Class: 2
Precision: 92.45%
Recall: 93.80%
F1-Score: 93.12%
Support: 1032

Class: 3
Precision: 96.54%
Recall: 91.29%
F1-Score: 93.84%
Support: 1010

Class: 4
Precision: 96.55%
Recall: 91.24%
F1-Score: 93.82%
Support: 982

Class: 5
Precision: 92.30%
Recall: 94.06%
F1-Score: 93.17%
Support: 892

Class: 6
Precision: 96.11%
Recall: 95.30%
F1-Score: 95.70%
Support: 958

Class: 7
Precision: 93.63%
Recall: 92.90%
F1-Score: 93.26%
Support: 1028

Class: 8
Precision: 93.53%
Recall: 92.09%
F1-Score: 92.81%
Support: 974

Class: 9
Precision: 88.03%
Recall: 94.05%
F1-Score: 90.94%
Support: 1009

