# 0-9 Digit Convolutional Neural Network

## Building dataset

### Import libraries

In [None]:
!pip install pillow
!pip install tensorflow
!pip install numpy
!pip install scikit-learn
!pip install matplotlib
!pip install visualkeras
!pip install scikit-image

In [None]:
import os
import tarfile
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

### Untar dataset archive

In [None]:
if not os.path.isdir('./numbers/'):
    with tarfile.open('numbers.tar.xz') as f:
        f.extractall('.')
else:
    print('Already untarred')

### Creating training and testing datasets

In [None]:
datagen = ImageDataGenerator(
    rescale = 1./255,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    validation_split = 0.2
)

train_dataset = datagen.flow_from_directory(
    'numbers',
    target_size = (64, 64),
    batch_size = 128,
    class_mode = 'categorical',
    subset = 'training'
)

test_dataset = datagen.flow_from_directory(
    'numbers',
    target_size = (64, 64),
    batch_size = 128,
    class_mode = 'categorical',
    subset = 'validation'
)

In [None]:
print(len(train_dataset))
print(len(test_dataset))

## Creating convolutional neural network

### Initialize network

In [None]:
cnn = tf.keras.Sequential()

### Setup layers

In [None]:
cnn.add(tf.keras.Input(shape = (64, 64, 3)))

cnn.add(tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu'))
cnn.add(tf.keras.layers.MaxPooling2D((2, 2)))
cnn.add(tf.keras.layers.Dropout(0.2))

cnn.add(tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu'))
cnn.add(tf.keras.layers.MaxPooling2D((2, 2)))
cnn.add(tf.keras.layers.Dropout(0.2))

cnn.add(tf.keras.layers.Flatten())
cnn.add(tf.keras.layers.Dense(128, activation = 'relu'))
cnn.add(tf.keras.layers.Dense(10, activation = 'softmax'))

### Compile CNN

In [None]:
cnn.compile(
    optimizer = tf.keras.optimizers.Adam(weight_decay = 0.01),
    loss = tf.keras.losses.CategoricalCrossentropy(),
    metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall() ]
)

### Train CNN on dataset

In [None]:
cnn.fit(x = train_dataset, validation_data = test_dataset, epochs = 5)

## Testing model accuracy

### Load model (uncomment if not wanting to train)

In [None]:
# cnn = tf.keras.models.load_model('model.h5')

### Show that model works on an individual image

In [None]:
from tensorflow.keras.preprocessing import image

test_image = image.load_img('numbers/7/384.png', target_size = (64, 64))
display(test_image)
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
test_image /= 255

cnn.predict(test_image)

### Get expected and predicted categories for test dataset

In [None]:
expected_vals = []
predicted_vals = []
for i in range(len(test_dataset)):
    predicted_vals = np.concatenate((predicted_vals, cnn.predict(test_dataset[i]).argmax(axis = 1)))
    for j in range(len(test_dataset[i][1])):
        expected_vals.append(test_dataset[i][1][j].argmax(axis = 0))

In [None]:
print(len(expected_vals))
print(len(predicted_vals))

### Generate confusion matrix graph

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score
cmd = ConfusionMatrixDisplay(confusion_matrix(expected_vals, predicted_vals))
cmd.plot(cmap = 'viridis')

### Get calculated F1 score

In [None]:
f1_score(expected_vals, predicted_vals, average = 'macro')

## Visualizing the model

### Table summary of layers

In [None]:
print(cnn.summary())

### Diagrams of model architecture

In [None]:
import visualkeras
display(visualkeras.layered_view(cnn, legend = True, show_dimension = True))
display(visualkeras.graph_view(cnn))

### Visualizing effects of layers

In [None]:
grad_model = tf.keras.models.Model(
    [cnn.inputs],
    [cnn.get_layer('conv2d_1').output,
    cnn.get_layer('dense_1').output]
)

# code originally from https://stackoverflow.com/questions/63287641/get-each-layer-output-in-keras-model-for-a-single-image

import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import img_to_array, load_img

layer_outputs = [layer.output for layer in grad_model.layers[1:]]
visual_model = tf.keras.models.Model(inputs = grad_model.input, outputs = layer_outputs)
feature_maps = visual_model.predict(test_image)
layer_names = [layer.name for layer in grad_model.layers[1:]]

for layer_name, feature_map in zip(layer_names, feature_maps):
    if len(feature_map.shape) == 4:
        n_features = feature_map.shape[-1]
        size = feature_map.shape[1]
        display_grid = np.zeros((size, size * n_features))
        for i in range(n_features):
            x = feature_map[0, :, :, i]
            x -= x.mean()
            x /= x.std()
            x *= 64
            x += 128
            x = np.clip(x, 0, 255).astype('uint8')
            display_grid[:, i * size : (i + 1) * size] = x
        scale = 20. / n_features
        plt.figure(figsize=(scale * n_features, scale))
        plt.title(layer_name)
        plt.grid(False)
        plt.imshow(display_grid, aspect='auto', cmap='viridis')

## Predicting numbers with more than one digit

### Read image and generate labels

In [None]:
import skimage as ski

image = ski.io.imread("702.png")
gray = ski.color.rgb2gray(image)
label_image = ski.measure.label(gray)

### Segment image based upon generated labels

In [None]:
import matplotlib.pyplot as plt

segments = []
fig, ax = plt.subplots(figsize=(8, 6))
ax.imshow(gray, cmap="gray")

for region in ski.measure.regionprops(label_image):
    minr, minc, maxr, maxc = region.bbox
    segmented_digit = gray[minr:maxr, minc:maxc]  # crop the digit
    segments.append((minc, segmented_digit))  # store x-position for sorting
    rect = plt.Rectangle((minc, minr), maxc - minc, maxr - minr, edgecolor='red', linewidth=2, fill=False) # plot border on base image
    ax.add_patch(rect)

segments = sorted(segments, key=lambda x: x[0]) # sort so digits are predicted on left-to-right

plt.axis('off')
plt.show()

### Show images segments

In [None]:
resized_segments = [ski.transform.resize(seg, (64, 64), anti_aliasing=True) for _, seg in segments]
fig, axes = plt.subplots(1, len(resized_segments), figsize=(10, 5))
for ax, segment in zip(axes, resized_segments):
    ax.imshow(segment, cmap="gray")
    ax.axis("off")

plt.show()

### Iterate through segments and make predictions

In [None]:
predicted_vals = []
for _, s in segments:
    # image to rgb and resize for prediction
    s = ski.color.gray2rgb(s)
    s = ski.transform.resize(s, (64, 64))
    s = np.expand_dims(s, axis = 0)

    prediction = cnn.predict(s)
    display(prediction)

    predicted_vals.append(prediction.argmax())

### Print predicted number

In [None]:
print("".join(str(p) for p in predicted_vals))

## Save model

In [None]:
cnn.save('model.h5')