**Convolutional Neural Network in TensorFlow**

reference: LeNet-5 http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf

#### Load dependencies

In [1]:
import tensorflow
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Flatten, Conv2D, MaxPooling2D
#Flatten: This layer reshapes the input tensor into a one-dimensional array (vector) ..
#.. primarily used to transition from convolutional or recurrent layers, which output multi-dimensional tensors, to fully connected layers, which expect one-dimensional input

#### Load data

In [2]:
(X_train, y_train), (X_valid, y_valid) = mnist.load_data()
#The MNIST dataset is a widely used benchmark for handwritten digit recognition in machine learning.
#It consists of 70,000 grayscale images of handwritten digits (0-9), split into a training set of 60,000 examples and a test set of 10,000 examples.
#The images are 28x28 pixels.

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


#### Preprocess data

In [3]:
X_train = X_train.reshape(60000, 28, 28, 1).astype('float32') #28x28 - 2D image, 1 - for black and white images (vs 3 for RGB images)
X_valid = X_valid.reshape(10000, 28, 28, 1).astype('float32')

In [4]:
X_train /= 255 #to scale to between 0 and 1
X_valid /= 255

In [5]:
n_classes = 10
y_train = to_categorical(y_train, n_classes)
y_valid = to_categorical(y_valid, n_classes)

#### Design neural network architecture

In [6]:
model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1))) #using 32 filters/kernels, using default of stride of 1 and no padding

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5)) #more dropouts in the deeper layers which are believed to be memorizing complex features from training data, causing overfitting

model.add(Dense(n_classes, activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
model.summary()

#### Configure model

In [8]:
model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])
# trying out Nadam (Nesterov-accelerated Adam) instead of Adam (Adaptive Moment Estimation)

#### Train!

In [9]:
model.fit(X_train, y_train, batch_size=128, epochs=10, verbose=1, validation_data=(X_valid, y_valid))

Epoch 1/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 339ms/step - accuracy: 0.8494 - loss: 0.4778 - val_accuracy: 0.9834 - val_loss: 0.0527
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 317ms/step - accuracy: 0.9737 - loss: 0.0849 - val_accuracy: 0.9870 - val_loss: 0.0378
Epoch 3/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 333ms/step - accuracy: 0.9806 - loss: 0.0639 - val_accuracy: 0.9895 - val_loss: 0.0323
Epoch 4/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 328ms/step - accuracy: 0.9835 - loss: 0.0549 - val_accuracy: 0.9911 - val_loss: 0.0293
Epoch 5/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 330ms/step - accuracy: 0.9858 - loss: 0.0442 - val_accuracy: 0.9904 - val_loss: 0.0304
Epoch 6/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 318ms/step - accuracy: 0.9880 - loss: 0.0391 - val_accuracy: 0.9900 - val_loss: 0.0313
Epoc

<keras.src.callbacks.history.History at 0x7c157e250550>