# 0-9 Digit Convolutional Neural Network

## Building dataset

### Import libraries

In [None]:
!pip install pillow
!pip install tensorflow
!pip install numpy
!pip install scikit-learn
!pip install matplotlib

In [None]:
import os
import tarfile
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

### Untar dataset archive

In [None]:
if not os.path.isdir('./numbers/'):
    with tarfile.open('numbers.tar.xz') as f:
        f.extractall('.')
else:
    print('Already untarred')

### Creating training and testing datasets

In [None]:
datagen = ImageDataGenerator(
    rescale = 1./255,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    validation_split = 0.2
)

train_dataset = datagen.flow_from_directory(
    'numbers',
    target_size = (64, 64),
    batch_size = 128,
    class_mode = 'categorical',
    subset = 'training'
)

test_dataset = datagen.flow_from_directory(
    'numbers',
    target_size = (64, 64),
    batch_size = 128,
    class_mode = 'categorical',
    subset = 'validation'
)

In [None]:
print(len(train_dataset))
print(len(test_dataset))

## Creating convolutional neural network

### Initialize network

In [None]:
cnn = tf.keras.Sequential()

### Setup layers

In [None]:
cnn.add(tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (64, 64, 3)))
cnn.add(tf.keras.layers.MaxPooling2D((2, 2)))
cnn.add(tf.keras.layers.Dropout(0.2))

cnn.add(tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu'))
cnn.add(tf.keras.layers.MaxPooling2D((2, 2)))
cnn.add(tf.keras.layers.Dropout(0.2))

cnn.add(tf.keras.layers.Flatten())
cnn.add(tf.keras.layers.Dense(128, activation = 'relu'))
cnn.add(tf.keras.layers.Dense(10, activation = 'softmax'))

### Compile CNN

In [None]:
cnn.compile(
    optimizer = tf.keras.optimizers.Adam(weight_decay = 0.01),
    loss = tf.keras.losses.CategoricalCrossentropy(),
    metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall() ]
)

### Train CNN on dataset

In [None]:
cnn.fit(x = train_dataset, validation_data = test_dataset, epochs = 5)

## Testing model accuracy

### Show that model works on an individual image

In [None]:
from tensorflow.keras.preprocessing import image

test_image = image.load_img('numbers-new/4/4_3_137_scaled_x1y1_2.png', target_size = (64, 64))
display(test_image)
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
cnn.predict(test_image)

### Get expected and predicted categories for test dataset

In [None]:
expected_vals = []
predicted_vals = []
for i in range(len(test_dataset)):    
    predicted_vals = np.concatenate((predicted_vals, cnn.predict(test_dataset[i]).argmax(axis = 1)))
    for j in range(len(test_dataset[i][1])):
        expected_vals.append(test_dataset[i][1][j].argmax(axis = 0))

In [None]:
print(len(expected_vals))
print(len(predicted_vals))

### Generate confusion matrix graph

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score
cmd = ConfusionMatrixDisplay(confusion_matrix(expected_vals, predicted_vals))
cmd.plot()

### Get calculated F1 score

In [None]:
f1_score(expected_vals, predicted_vals, average = 'macro')

## Visualizing the model

In [None]:
print(cnn.summary())

## Save model

In [None]:
# cnn.save('model.h5')