In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import mixed_precision
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import load_model
import tensorflow.keras.backend as K
from model_funcs import character_error_rate, word_error_rate, ctc_loss_lambda_func, build_CRNN_model
from data_processing import create_datasets, batch_generator
from configs import Configs 
%run "tester_functions.ipynb"

In [2]:
# empty out VRAM if being used for some reason
K.clear_session()
# allow for mixed prcision compute for more effienct compute
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)
# enable GPU dynamic VRAM allocation 
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 2070 SUPER, compute capability 7.5


In [3]:
# Cell for creating tensorflow datasets to allow for variable images and ground truth labels
c = Configs()
# get complete dataset
batch_size = c.batch_size
# amount of data to shuffle
buffer_size = c.buffer_size
total_dataset = create_datasets(c.image_paths, c.label_path, batch_size, c.image_height, c.image_max_width, c.augmentation_probability, c.cv_add_data)
total_dataset = total_dataset.shuffle(buffer_size=buffer_size)
# get indivdual batches
training_datasets = total_dataset.map(lambda train, cv:train)
cv_datasets = total_dataset.map(lambda train, cv: cv)


In [4]:
# load in model and get it ready for training
model = build_CRNN_model((c.image_height, None, 1), c.num_classes)
model.summary()
learn_rate = c.learning_rate
# define the model optimizer, loss function and metrics we want to track
model.compile(optimizer=Adam(learning_rate=learn_rate),
              loss=ctc_loss_lambda_func,
              # metrics=[character_error_rate, word_error_rate])
              metrics = [])

# Callbacks for selecting the best model and early stopping if more training does nothing 
checkpoint = ModelCheckpoint('OCR model', monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, None, 1)]    0         
                                                                 
 conv1 (Conv2D)              (None, 256, None, 64)     640       
                                                                 
 batch_normalization (BatchN  (None, 256, None, 64)    256       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 256, None, 64)     0         
                                                                 
 max1 (MaxPooling2D)         (None, 256, None, 64)     0         
                                                                 
 conv2 (Conv2D)              (None, 256, None, 128)    73856     
                                                             

In [5]:
# number of epochs for training 
epochs = c.epoch_num 
# epochs = 1
model = model.fit(
    training_datasets,
    epochs=epochs,
    validation_data=cv_datasets,
    callbacks=[checkpoint, early_stopping],
    verbose=1
)

Epoch 1/100


In [None]:
# save the model to be able to import later
model.save('OCR model')