In [None]:
# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

In [None]:
import tensorflow as tf
import numpy as np
import time
import os
from tensorflow import keras

# MNIST dataset

<div style="display:flex;gap:2rem;">
<div style="flex:0.5;">

The MNIST dataset contains handwritten digit images and their labels. The images are 28x28 grayscale pixels.
Normalization is applied so that pixel values are in the range [0, 1].

Important Terms:
- DNN (Deep Neural Network): A feed-forward neural network with multiple layers. In our example, we flatten the image and use dense layers.
  Activation: relu function (f(x)=max(0,x)); dropout is used for regularization to prevent overfitting by randomly setting a fraction of inputs to 0.
- CNN (Convolutional Neural Network): A neural network that uses convolutional layers to automatically extract spatial features.
  Convolution formula (discrete): (f * g)[i, j] = ∑ₖ∑ₗ f[k, l] · g[i - k, j - l].
  LeNet-5 is a classical CNN architecture originally designed for digit recognition.

</div>
<div style="flex:0.5;">

![image.png](attachment:2e9bb22b-bd54-46bf-9b51-9dcec519f5ba.png)

</div>
</div>

The following code loads and preprocesses the MNIST dataset.

In [None]:
# load mnist dataset and normalize images by dividing by 255.0
(train_x, train_y), (test_x, test_y) = keras.datasets.mnist.load_data()
train_x = train_x / 255.0
test_x = test_x / 255.0

# expand dims to add channel dimension (for grayscale images).
train_x = tf.expand_dims(train_x, 3)
test_x = tf.expand_dims(test_x, 3)

# use a subset of training data for validation
val_x = train_x[:5000]
val_y = train_y[:5000]

## Image Visualization

We plot a few examples from the training set to visualize the handwritten digits.

In [None]:
import matplotlib.pyplot as plt

# image visualization: plot a 5x5 grid of sample images
n = 5
fig, axs = plt.subplots(nrows=n, ncols=n, sharex=True, sharey=True, figsize=(12, 12))
for i in range(n**2):
    ax = axs[i // n, i % n]
    # note: we adjust image values for proper display
    ax.imshow((-train_x[i, :, :, 0] + 1) / 2, cmap=plt.cm.gray)
plt.tight_layout()
plt.show()

# Image Processing with DNN

A simple deep neural network (dnn) is defined for image classification.
This dnn flattens the image and applies dense layers with relu activations and dropout.

In [None]:
model_dnn = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])
model_dnn.summary()

In [None]:
# compile the dnn model with adam optimizer and sparse categorical cross-entropy loss
model_dnn.compile(optimizer='adam',
  loss='sparse_categorical_crossentropy',
  metrics=['accuracy'])

# train the dnn model for 5 epochs
model_dnn.fit(train_x, train_y, epochs=5)
# evaluate the dnn model on test data
model_dnn.evaluate(test_x, test_y)

In [None]:
# predict on test data and display first prediction vs actual label
pred_y = model_dnn.predict(test_x)
display((pred_y[0], test_y[0]))

In [None]:
# print predicted class and actual label for each test image
for pred, actual in list(zip(pred_y, test_y)):
    print(np.argmax(pred), actual)

# Image processing with CNN (LeNet)

<div style="display:flex;gap:2rem;">
<div style="flex:0.5;">

> Based on https://github.com/RichmondAlake/tensorflow_2_tutorials/blob/master/13_lenet-5.ipynb

The following section implements a convolutional neural network based on the LeNet-5 architecture (1990).
LeNet-5 uses convolutional layers with tanh activations, average pooling, and fully connected layers.
Key concepts:
- convolution: performs feature extraction using kernels (see formula above).
- average pooling: reduces spatial dimensions by computing the average over windows.

</div>
<div style="flex:0.5;">

![image.png](https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fimg-blog.csdnimg.cn%2Fd1d66cd11bf545ed8bca641334012a8b.png&f=1&nofb=1&ipt=a552f34c6975b62f1c334334839726eea965603c1815e6b982011b2dd60570bc&ipo=images)

</div>
</div>

## LeNet (1990) model

In [None]:
lenet_5_model = keras.models.Sequential([
    keras.layers.Conv2D(6, kernel_size=5, strides=1,  activation='tanh', input_shape=train_x[0].shape, padding='same'),
    keras.layers.AveragePooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(16, kernel_size=5, strides=1, activation='tanh', padding='valid'),
    keras.layers.AveragePooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(120, activation='tanh'),
    keras.layers.Dense(84, activation='tanh'),
    keras.layers.Dense(10, activation='softmax')
])

lenet_5_model.summary()
lenet_5_model.compile(optimizer='adam', loss=keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])

#### Training and Evaluation

We train the LeNet-5 model on the MNIST training data with a validation split.
TensorBoard is used to log training details.

In [None]:
def get_run_logdir():
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

root_logdir = os.path.join(os.curdir, "logs\\fit\\")
run_logdir = get_run_logdir()
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

lenet_5_model.fit(train_x, 
                  train_y, 
                  epochs=15, 
                  validation_data=(val_x, val_y), 
                  callbacks=[tensorboard_cb]
                 )
lenet_5_model.evaluate(test_x, test_y)

# Useful URLs

TensorFlow - Python Deep Learning Neural Network API https://deeplizard.com/learn/video/RznKVRTFkBY