In [4]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Conv2D, MaxPooling2D, Dropout, Lambda
from tensorflow.keras.layers import Input, Activation, BatchNormalization, Reshape
from tensorflow.keras.models import load_model, Model
import tensorflow.keras.backend as K
from configs import Configs 
from data_processing import data_preparator
%run "tester_functions.ipynb"

In [5]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)


In [None]:
c = Configs()
batch_size = c.batch_size
batch_size = 50
image_size = c.form_height
image_size = 128

X, Y = data_preparator(c.image_paths, c.label_path, image_target_height = image_size, batch_size = batch_size, augmentation_probability = c.augmentation_probability )
print(len(X))
print(Y.shape)
inpute_batch_displayer(X)

In [3]:
c = Configs()
# retrive precessed data that can be used for training 
X, Y = data_preparator(c.image_paths, c.label_path, image_target_height = c.image_height, image_target_width = c.image_width, batch_size = c.batch_size, augmentation_probability = c.augmentation_probability )

# scramble arraies but keep corresponding indecies
# Generate a random permutation of indices
shuffled_indices = np.random.permutation(c.batch_size)

# X = X[shuffled_indices]
# Y = Y[shuffled_indices]
# split data set into training, cross validation and test sets
# training sets split 
train_split = int(0.75 * c.batch_size)
X_train = X[:train_split]
Y_train = Y[:train_split]
# Cross validation sets
CV_test_split = int(0.125 * c.batch_size)
X_cv = X[train_split: train_split + CV_test_split]
Y_cv = Y[train_split: train_split + CV_test_split]
# testing sets
X_test = X[train_split + CV_test_split:]
Y_test = Y[train_split + CV_test_split:]

In [None]:
def reshape_for_rnn(x):
    shape = K.int_shape(x)
    return K.reshape(x, shape=(shape[0], -1, shape[-1]))

In [4]:
def build_CRNN_model(input_shape, num_classes):
    # TODO figure out input
    inputs = Input(shape = input_shape)
    # CNN
    # CNN layers inspired by t he VGG architecture 
    inner = Conv2D(64, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')(inputs)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2), name='max1')(inner) # pull size for maintaining vertical information

    inner = Conv2D(128, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')(inner)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2), name='max2')(inner)

    inner = Conv2D(256, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')(inner)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner)  

    inner = Conv2D(512, (3, 3), padding='same', name='conv4', kernel_initializer='he_normal')(inner)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2), name='max4')(inner)

    # Dropout layer to help prevent overfitting
    inner = Dropout(0.25)(inner)

    # CNN to RNN transition
    # use lambda layer and reshape function to create sequence 
    sequence = Lambda(reshape_for_rnn)(inner)

    # RNN layers
    # 2 sets of biderectional LSTM layers to allow for fruther and more nauiced learning 
    sequence = Bidirectional(LSTM(256, return_sequences=True))(inner) 
    sequence = Bidirectional(LSTM(256, return_sequences=True))(inner)
    # final dense layer for character probabilities using softmax output
    Dense(num_classes + 1, activation='softmax')(sequence)
    # create model
    model = Model(inputs = inputs, outputs = sequence)
    
    # return full model
    return model