# Training the digit recognition model

We will use MNIST dataset for training as we only need 0-9 digits

In [1]:
#%% Libraries
from Configurations.models.sudokunet import SudokuNet
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
import tensorflow as tf

'''
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
'''

Found GPU at: /device:GPU:0


Now we set the hyperparameters and load the dataset.


In [2]:
# Hyperparameters
INIT_LR = 1e-3 # Learning Rate
EPOCHS = 10 # Number of epochs
BS = 128 # Batch size

# Load the MNIST dataset
print("[INFO] Loading MNIST dataset...")
((trainX, trainY), (testX, testY)) = mnist.load_data()

[INFO] Loading MNIST dataset...


## MNIST preprocessing

MNIST data comes as numpy arrays [28, 28]. 
We're going to: 
- format the data as batches with a new dimention to indicate is a grayscale image, 
- Normalize the values (i. e. dividing by /255.0)
- Finally, convert the labels from integers to vectors.

What the last step do is changing the encode from [1, 3, 2, 0] to ``[[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0], [1, 0, 0, 0]]``

This is called One-Hot encode.

In [3]:
# Adding the grayscale channel
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))
# Size of trainX = (60000, 28, 28, 1)

# Normalizing
trainX = trainX.astype("float32") / 255.0
testX = testX.astype("float32") / 255.0

# Convert the labels from integers to vectors
lb = LabelBinarizer()

trainY = lb.fit_transform(trainY)
testY = lb.fit_transform(testY)


## Initializing the model

We load the SudokuNet and then compile it with our desire parameters.
The optimizer will be Adam and the loss will be Categorical Cross Entropy.

In [4]:
#%% Initializing the model
print("[INFO] Compiling the model...")
opt = Adam(lr=INIT_LR)
model = SudokuNet.build(width=28, height=28, depth=1, classes=10)

print("[INFO] Compiling Model")
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy", "mse"])

[INFO] Compiling the model...
[INFO] Compiling Model


## Training the model

We start the training.

In [5]:
#%% Training the model

history = model.fit(
    trainX, trainY, 
    validation_data=(testX, testY),
    epochs=EPOCHS, 
    batch_size=BS, 
    verbose=1
)

Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9

## Evaluation of the model

Then we show the evaluations of our model.


In [None]:
#%% Model evaluation
print("[INFO] Evaluating the model...")
predictions = model.predict(testX)
print(
    classification_report(
        testY.argmax(axis=1), 
        predictions.argmax(axis=1), 
        target_names=[str(x) for x in lb.classes_]
        )
    )

# Serializing the model to disk
print("[INFO] Saving the model...")
model.save("./Configurations/models/sudokunet.h5")

[INFO] Evaluating the model...
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       980
           1       0.99      1.00      1.00      1135
           2       0.99      1.00      0.99      1032
           3       1.00      0.99      0.99      1010
           4       0.99      1.00      0.99       982
           5       0.99      0.99      0.99       892
           6       0.99      0.99      0.99       958
           7       1.00      0.99      0.99      1028
           8       0.99      0.99      0.99       974
           9       1.00      0.97      0.98      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000

[INFO] Saving the model...
