# DaKanjiRecognizer - Single Kanji CNN : Training

## Setup

Import the needed libraries.

In [6]:
#std lib
import sys
import os
import time
import datetime

#ML
%load_ext tensorboard
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint

#creating one hot encodings
from sklearn.preprocessing import LabelBinarizer

#plotting/showing graphics
%matplotlib inline
import matplotlib.pyplot as plt
from IPython.display import Image

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Because the data sets are big let's make sure that the GPU is available to speed up training.

In [7]:
print("GPUs Available: ", tf.test.gpu_device_name())

GPUs Available:  /device:GPU:0


If a GPU with native 16 bit float support (ex.: RTX-series) is available, enable support for it here.

In [8]:
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.keras.mixed_precision.set_global_policy("mixed_float16")

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 2070 SUPER, compute capability 7.5


## Setting up the Data pipeline

Read the labels for each class from file.

In [27]:
labels_1, labels_2, labels = [], [], []

# load labels from file
with open(r'F:\data_sets\etlcdb\encoding_1.txt', mode="r", encoding="utf-8") as f:
    labels_1 = list(eval(f.read()).keys())
with open(r'F:\data_sets\etlcdb\encoding_2.txt', mode="r", encoding="utf-8") as f:
    labels_2 = list(eval(f.read()).keys())
    
labels = labels_1 + labels_2

# save the labels to text files
with open(r'E:\projects\DaKanjiRecognizerML\single_kanji_cnn\labels_python_list.txt', mode="w+", encoding="utf-8") as f:
    f.write(str(labels))
with open(r'E:\projects\DaKanjiRecognizerML\single_kanji_cnn\labels.txt', mode="w+", encoding="utf-8") as f:  
    f.write(''.join(labels))

Create a `tf.keras.dataset` from the saved files.

In [30]:
data_set = tf.keras.preprocessing.image_dataset_from_directory(
    directory=r'F:\data_sets\etlcdb',
    labels="inferred",
    label_mode="categorical",
    color_mode="grayscale",
    batch_size=256,
    image_size=(64, 64),
    validation_split=0.15,
    subset="training",
    seed=123
)

Found 6731099 files belonging to 6543 classes.
Using 5721435 files for training.


In [31]:
print(data_set.take(0))

<TakeDataset shapes: ((None, 64, 64, 1), (None, 6543)), types: (tf.float32, tf.float32)>


## Defining and training the CNN

In [33]:
def get_model(name : str):
    _model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(input_shape=(64, 64, 1), kernel_size=3, activation='relu', filters=32, name="conv2D_1_2_input"),
        tf.keras.layers.Conv2D(kernel_size=3, activation='relu', filters=32, name="conv2D_1_1"),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2, name="maxpool_1"),

        tf.keras.layers.Conv2D(kernel_size=3, activation='relu', filters=32, name="conv2D_2_1"),
        tf.keras.layers.Conv2D(kernel_size=3, activation='relu', filters=32, name="conv2D_2_2"),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2, name="maxpool_2"),

        tf.keras.layers.Conv2D(kernel_size=3, activation='relu', filters=64, name="conv2D_3_1"),
        tf.keras.layers.Conv2D(kernel_size=3, activation='relu', filters=64, name="conv2D_3_2"),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2, name="maxpool_3"),

        tf.keras.layers.Conv2D(kernel_size=3, activation='relu', filters=128, name="conv2D_4_1"),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2, name="maxpool_4"),

        tf.keras.layers.Flatten(name="flatten_1"),
        tf.keras.layers.Dropout(0.25, name="dropout_1"),
        
        tf.keras.layers.Dense(2048, name="dense_1"),
        tf.keras.layers.Dropout(0.1, name="dropout_2"),

        tf.keras.layers.Dense(2048, name="dense_2"),
        tf.keras.layers.Dropout(0.25, name="dropout_3"),

        tf.keras.layers.Dense(len(labels), name="dense_3"),

        #set the dtype to float32 for numerical stability
        tf.keras.layers.Softmax(dtype="float32", name="softmax_1_output") 
    ], name=name)
    
    return _model


f16_model = get_model("DaKanjiRecognizer_f16")
print(f16_model.output_shape)
f16_model.summary()

(None, 6543)
Model: "DaKanjiRecognizer_f16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2D_1_2_input (Conv2D)    (None, 62, 62, 32)        320       
_________________________________________________________________
conv2D_1_1 (Conv2D)          (None, 60, 60, 32)        9248      
_________________________________________________________________
maxpool_1 (MaxPooling2D)     (None, 30, 30, 32)        0         
_________________________________________________________________
conv2D_2_1 (Conv2D)          (None, 28, 28, 32)        9248      
_________________________________________________________________
conv2D_2_2 (Conv2D)          (None, 26, 26, 32)        9248      
_________________________________________________________________
maxpool_2 (MaxPooling2D)     (None, 13, 13, 32)        0         
_________________________________________________________________
conv2D_3_1 (Conv2D)          (No

Set the optimizer, loss function and compile the model

In [34]:
#path where the model should be saved
model_dir = os.path.join(os.path.dirname(os.getcwd()), "model")
print(model_dir)

E:\projects\DaKanjiRecognizerML\model


In [35]:
opt = tf.keras.optimizers.Adam(learning_rate=0.0001,
                                beta_1=0.9,
                                beta_2=0.999,
                                epsilon=1e-08,)

f16_model.compile(optimizer=opt,
              loss="categorical_crossentropy",
              metrics=['accuracy'])

In [None]:
#optionally load stored weights to resume training
f16_model.load_weights(os.path.join(model_dir, "tf", "checkpoints", "weights-improvement-145-0.98.hdf5"))

In [None]:
#checkpoints setup
filepath = os.path.join(model_dir, "tf", "checkpoints", "weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5")
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

log_dir = os.path.join(model_dir, "tf", "logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

callbacks_list = [checkpoint, tensorboard_callback]

Finally train the model on the data set (in case of an Interrupt creates checkpoints).

In [36]:
#train the model
hist = f16_model.fit(
    data_set,
    epochs=10,
    initial_epoch=0,
    workers=1
)

Epoch 1/10


UnknownError:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node DaKanjiRecognizer_f16/conv2D_1_2_input/Conv2D (defined at <ipython-input-36-eee8749435b8>:2) ]] [Op:__inference_train_function_2237]

Function call stack:
train_function


## Evaluating the model

After training plot the loss and accuracy for the test and validation sets.

The graphs look good. A training accuracy of ~90% and a validation accuracy of ~95% was reached.<br/>
Let's now make a prediction.

In [None]:
print(x_np[0].shape)

sample = train_x[34]

prediction = f16_model.predict(sample.reshape(1, 64, 64, 1))
t_prediction = lb.inverse_transform(np.array(prediction))
show_image(sample, t_prediction)

## Saving the model

The model is perfoming very well therefore save the trained model as a "*.pb" file.

In [None]:
f16_model.save(os.path.join(model_dir, "tf", "trained_model"))

In [None]:
# Create a float32 model with the same weights as the mixed_float16 model, so
# that it loads into TF Lite
tf.keras.mixed_precision.set_global_policy("float32")
f32_model = get_model("DaKanjiRecognizer_f32")
f32_model.set_weights(f16_model.get_weights())
#f32_model.summary()

Finally convert the model to a TF-Light model to be used in other applications ([DaKanjiRecognizer Desktop](https://github.com/CaptainDario/DaKanjiRecognizer-Desktop))

In [None]:
# Convert the model
converter = tf.lite.TFLiteConverter.from_keras_model(f32_model) # path to the SavedModel directory
tflite_model = converter.convert()

# Save the model.
with open(os.path.join(model_dir, "tflite", "model.tflite"), 'wb') as f:
  f.write(tflite_model)
