In [None]:
import numpy as np
import cv2
import os
import pandas as pd
import string
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from tensorflow.keras.backend import ctc_batch_cost

from keras_preprocessing.sequence import pad_sequences
from keras_preprocessing.image import ImageDataGenerator

from keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPooling2D, Lambda, Bidirectional, TimeDistributed, Flatten, Layer
from keras.models import Model
from keras.activations import relu, sigmoid, softmax
import keras.backend as K
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:

# This is for encoding each char to label and then converting label to char while decoding

max_label_len = 0

char_list = ".0123456789"
print(char_list, len(char_list))

def encode_to_labels(txt):

    dig_lst = []
    for index, chara in enumerate(txt):
        dig_lst.append(char_list.index(chara))

    return dig_lst

encode_to_labels(char_list)

.0123456789 11


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [None]:
images = []
labels = []



In [None]:

# Resizing the images and normalizing.

def process_image(img):
    w, h = img.shape

#     _, img = cv2.threshold(img,
#                            128,
#                            255,
#                            cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # Aspect Ratio Calculation
    new_w = 32
    new_h = 32
    img = cv2.resize(img, (new_h, new_w))
    w, h = img.shape

    img = img.astype('float32')

    # Converts each to (32, 32, 1)
    if w < 32:
        add_zeros = np.full((32-w, h), 255)
        img = np.concatenate((img, add_zeros))
        w, h = img.shape

    if h < 32:
        add_zeros = np.full((w, 32-h), 255)
        img = np.concatenate((img, add_zeros), axis=1)
        w, h = img.shape

    if h > 32 or w > 32:
        dim = (32,32)
        img = cv2.resize(img, dim)

    img = cv2.subtract(255, img)

    img = np.expand_dims(img, axis=2)

    # Normalize
    img = img / 255

    return img

In [None]:
from google.colab import drive
drive.mount('content/')

Mounted at content/


In [None]:
traindf= pd.read_csv('/content/content/MyDrive/numbers/train/_annotations.csv')
testdf= pd.read_csv('/content/content/MyDrive/numbers/test/_annotations.csv')

In [None]:
datagen= ImageDataGenerator(rescale= 1./255.,validation_split= 0.25)

In [None]:
# train and test data generator module to train with a batch of 8 images


train_generator= datagen.flow_from_dataframe(dataframe= traindf, directory= '/content/content/MyDrive/numbers/train',
                                           x_col= 'filename',
                                           y_col= 'class',
                                           subset='training',
                                           class_mode= 'raw',
                                           batch_size= 8,
                                           target_size=(32,32),
                                           shuffle= True)

valid_generator= datagen.flow_from_dataframe(dataframe= traindf, directory= '/content/content/MyDrive/numbers/train',
                                           x_col= 'filename',
                                           y_col= 'class',
                                           subset='validation',
                                           class_mode= 'raw',
                                           batch_size= 1,
                                           target_size=(32,32),
                                           shuffle= True)

test_datagen= ImageDataGenerator(rescale=1./255.)

test_generator= test_datagen.flow_from_dataframe(dataframe= testdf,
                                                 batch_size=1,
                                                 directory= '/content/content/MyDrive/numbers/test',
                                                 x_col= 'filename',
                                                 y_col= None,
                                                 shuffle= False,
                                                 target_size=(32,32),
                                                 class_mode= None)



Found 2152 validated image filenames.
Found 717 validated image filenames.
Found 100 validated image filenames.


In [None]:
class CTCLayer(Layer):
  def __init__(self,name= None):
    super().__init__(name= name)
    self.loss_fn= keras.backend.ctc_batch_cost

  def call(self, y_true, y_pred):
    batch_len= tf.cast(tf.shape(y_true)[0], dtype= 'int64')
    input_length= tf.cast(tf.shape(y_pred)[1], dtype= 'int64')
    label_length= tf.cast(tf.shape(y_true)[1], dtype='int64')

    input_length= input_length*tf.ones(shape=(batch_len,1), dtype='int64')
    label_length= label_length*tf.ones(shape=(batch_len,1), dtype='int64')

    loss= self.loss_fn(y_true, y_pred, input_length, label_length)

    self.add_loss(loss)

    return y_pred


In [None]:

inputs = Input(shape=(32,32,1),name= "image", dtype="float32")
labels= Input(name= "label", shape=(None,), dtype= "float32")
activation= 'relu'
x = Conv2D(8, 3, activation=activation, padding="same")(inputs)
x = Conv2D(16, 3, activation=activation, padding="same")(x)
x = MaxPooling2D()(x)

x = Conv2D(32, 3, activation=activation, padding="same")(x)
x = Conv2D(64, 3, activation=activation, padding="same")(x)
x = MaxPooling2D()(x)

tdist = TimeDistributed(Flatten(), name='timedistrib')(x)

rnn_in = Dense(128, activation=activation, name='dense_in')(tdist)
x = Bidirectional(LSTM(64, return_sequences=True))(rnn_in)
x = Bidirectional(LSTM(64, return_sequences=True))(x)
outputs = Dense(len(char_list)+1, name="predictions", activation='softmax')(x)
output= CTCLayer(name="ctc_loss")(labels,outputs)

model = Model(inputs=[inputs,labels], outputs=output)

opt= keras.optimizers.Adam()
model.compile(optimizer= opt)




In [None]:
model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 32, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 32, 32, 8)         80        
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 16)        1168      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 16, 16)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 16, 32)        4640      
                                                                 
 conv2d_3 (Conv2D)           (None, 16, 16, 64)        18496     
                                                             

In [None]:
epochs= 100
early_stopping_patience= 10

early_stopping= keras.callbacks.EarlyStopping(
    monitor= "val_loss", patience= early_stopping_patience, restore_best_weights= True)

history= model.fit(
    train_generator,
    epochs=epochs,
    callbacks=[early_stopping]

)


In [None]:


the_labels = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')


def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args

    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, the_labels, input_length, label_length])


model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'Adam', metrics=['accuracy'])
H = model.fit_generator(generator= train_generator, epochs=10, verbose=1)