In [1]:
from helper import create_batches
import tensorflow as tf
import numpy as np
import shelve
import joblib
import time
import math
import random

#Cuz the file is inside 'code' directory
mount_point = "../"

with shelve.open(mount_point+'IAM_Data') as shelf:
    vocabulary = shelf['chars']
    list_of_images = shelf['list_of_images']
    image_labels = shelf['image_labels']
    
# image_arrays = joblib.load(mount_point+'image_arrays')

#List_images ko sort karo
# list_of_images.sort()

#Convert vocabulary to list
vocabulary = list(vocabulary)
#Sort so as to have the same ordering every time..
vocabulary.sort()
vocabulary.append("<Blank>")

  from ._conv import register_converters as _register_converters
  (fname, cnt))
  (fname, cnt))


In [2]:
len(vocabulary)

75

In [3]:
#1000 --> Test
#13000 --> Train
#94 --> Validation

In [4]:
#Model parameters
img_height = 104
img_width = 688
vocab_size = len(vocabulary)

#Common Hyper Parameters
alpha = 0.0001
epochs = 50

#Should be proportional to the number of Images
#should be divisible by 13000
batch_size = 200 

#Conv_net Params
filter_size_1 = 5
filter_size_2 = 3
filter_size_3 = 3
filter_size_4 = 3
filter_size_5 = 1

#Number of filters in each convolution layer
num_conv1,num_conv2,num_conv3,num_conv4,num_conv5 = (20,50,100,200,400)

#LSTM Params
rnn_hidden_units = 200
rnn_layers = 5

#FC_Params
#hidden layer should be two times vocabulary intuitively
fc_input_units,fc_hidden_units,fc_output_units = (2*rnn_hidden_units, 2*vocab_size, vocab_size)

In [5]:
# with tf.device('/gpu:0'):

#Weights Initializer
fc_initializer = tf.contrib.layers.xavier_initializer()
conv_initializer = tf.contrib.layers.xavier_initializer_conv2d()

#Weights for convolution layer
# -> filter_size = 5 so filter = (5 x 5)
#-> input_channels or (channels_in_image) = 1 
#-> output_channels or (num_of_filters) = num_conv1

wconv1_shape = [filter_size_1,filter_size_1,1,num_conv1]
wconv2_shape = [filter_size_2,filter_size_2,num_conv1,num_conv2]
wconv3_shape = [filter_size_3,filter_size_3,num_conv2,num_conv3]
wconv4_shape = [filter_size_4,filter_size_4,num_conv3,num_conv4]
wconv5_shape = [filter_size_5,filter_size_5,num_conv4,num_conv5]


wfc1_shape = [fc_input_units, fc_hidden_units]
wfc2_shape = [fc_hidden_units, fc_output_units]


#Biases for conv_layer (single value, thus shape is empty tensor [])
bconv_shape = []

#Biases for fc layer (Batch_norm)
bfc1_shape = [fc_hidden_units]
bfc2_shape = [fc_output_units]

#Initialize weights 
wconv1 = tf.Variable(conv_initializer(wconv1_shape))
wconv2 = tf.Variable(conv_initializer(wconv2_shape))
wconv3 = tf.Variable(conv_initializer(wconv3_shape))
wconv4 = tf.Variable(conv_initializer(wconv4_shape))
wconv5 = tf.Variable(conv_initializer(wconv5_shape))

wfc1 = tf.Variable(fc_initializer(wfc1_shape))
wfc2 = tf.Variable(fc_initializer(wfc2_shape))

#Intialize biases
bconv1 = tf.Variable(tf.zeros(bconv_shape))
bconv2 = tf.Variable(tf.zeros(bconv_shape))
bconv3 = tf.Variable(tf.zeros(bconv_shape))
bconv4 = tf.Variable(tf.zeros(bconv_shape))
bconv5 = tf.Variable(tf.zeros(bconv_shape))

bfc1 = tf.Variable(tf.zeros(bfc1_shape))
bfc2 = tf.Variable(tf.zeros(bfc2_shape))


#Model
#----------------------------------------------------------------------------#

#Input Image
inputs = tf.placeholder(tf.float32,shape=[None,img_height,img_width])


X = tf.reshape(inputs,(-1,img_height,img_width,1))

#-------------------Convolution-----------------------#
#1st Convolutional Layer
conv1 = tf.nn.conv2d(input=X,filter=wconv1,padding='SAME',strides=[1,1,1,1]) + bconv1
conv1 = tf.nn.relu(conv1)

#1st Pooling layer
pool1 = tf.nn.max_pool(conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

#2nd Convolutional Layer
conv2 = tf.nn.conv2d(input=pool1,filter=wconv2,padding='SAME',strides=[1,1,1,1]) + bconv2
conv2 = tf.nn.relu(conv2)

#2nd Pooling Layer
pool2 = tf.nn.max_pool(conv2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

#3rd Convolutional Layer
conv3 = tf.nn.conv2d(input=pool2,filter=wconv3,padding='SAME',strides=[1,1,1,1]) + bconv3
conv3 = tf.nn.relu(conv3)

#3rd Pooling Layer
pool3 = tf.nn.max_pool(conv3,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

#4th Convolutional Layer
conv4 = tf.nn.conv2d(input=pool3,filter=wconv4,padding='SAME',strides=[1,1,1,1]) + bconv4
conv4 = tf.nn.relu(conv4)

#4th Pooling Layer
pool4 = tf.nn.max_pool(conv4,ksize=[1,3,3,1],strides=[1,3,3,1],padding='SAME')

#5th Convolutional Layer
conv5 = tf.nn.conv2d(input=pool4,filter=wconv5,padding='SAME',strides=[1,1,1,1]) + bconv5
conv5 = tf.nn.relu(conv5)

#--------All right upto here------------#
conv_out_height, conv_out_width = (int(math.ceil(img_height/(2**3 * 3))),int(math.ceil(img_width/(2**3 * 3))))
print(conv_out_width,conv_out_height)

#----------------LSTM--------------------------#
#Treat a single pixel from each filter or feature map as an individual feature
#So number of features  = num_conv4 filters or feature maps
#length_of_sequence = width * height of the output from conv3 

lstm_inputs = tf.reshape(conv5,(-1,conv_out_height*conv_out_width,num_conv5))

# lstm_inputs = tf.reshape(pool4,(-1,conv_out_width,conv_out_height*num_conv4))

#Number of time_steps to unroll for..
seq_len = conv_out_height * conv_out_width

#So that we can use different batch size during testing...
time_steps = tf.placeholder(tf.int32,shape = [None])

# seq_len = conv_out_width

targets = tf.sparse_placeholder(tf.int32,name='targets')

dropout_lstm = tf.placeholder(tf.float32,shape=[])


# # RNN Cells forward
# cell_fw = tf.contrib.rnn.LSTMCell(rnn_hidden_units,initializer=fc_initializer)
# # cells_fw = [tf.contrib.rnn.LSTMCell(rnn_hidden_units,initializer=fc_initializer) for _ in range(rnn_layers)]
# cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw,output_keep_prob=dropout_lstm,dtype=tf.float32)


# # RNN Cells backward
# cell_bw = tf.contrib.rnn.LSTMCell(rnn_hidden_units,initializer=fc_initializer)
# cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw,input_keep_prob=dropout_lstm,output_keep_prob=dropout_lstm,dtype=tf.float32)

# cells_bw = [tf.contrib.rnn.LSTMCell(rnn_hidden_units,initializer=fc_initializer) for _ in range(rnn_layers)]


list_cells_fw = []
for _ in range(rnn_layers):
    cell_fw = tf.contrib.rnn.LSTMCell(rnn_hidden_units,initializer=fc_initializer)
    drop_fw = tf.contrib.rnn.DropoutWrapper(cell_fw,output_keep_prob=dropout_lstm,dtype=tf.float32)
    list_cells_fw.append(cell_fw)


list_cells_bw = []
for _ in range(rnn_layers):
    cell_bw = tf.contrib.rnn.LSTMCell(rnn_hidden_units,initializer=fc_initializer)
    drop_bw = tf.contrib.rnn.DropoutWrapper(cell_bw,output_keep_prob=dropout_lstm,dtype=tf.float32)
    list_cells_bw.append(cell_bw)

cells_fw = tf.contrib.rnn.MultiRNNCell(list_cells_fw)
cells_bw = tf.contrib.rnn.MultiRNNCell(list_cells_bw)


(outputs_fw,outputs_bw),_ = tf.nn.bidirectional_dynamic_rnn(cells_fw,cells_bw,lstm_inputs,dtype=tf.float32)
# outputs,_,_ = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw,cells_bw,inputs = lstm_inputs,dtype=tf.float32)
print(outputs_fw,outputs_bw)

#Concatenate the output from both cells (forward and backward)
blstm_outputs = tf.concat([outputs_fw,outputs_bw], 2)

#flatten out all except the last dimension
fc_inputs  = tf.reshape(blstm_outputs,[-1,2*rnn_hidden_units])

#Feed into the fully connected layer
#No activation cuz, the output of this layer is feeded into CTC Layer as logits

dropout_fc = tf.placeholder(tf.float32,shape=[])

fc_outputs_1 = tf.matmul(fc_inputs,wfc1) + bfc1
fc_outputs_1 = tf.nn.dropout(fc_outputs_1,dropout_fc)

fc_outputs_2 = tf.matmul(fc_outputs_1,wfc2) + bfc2
fc_outputs_2 = tf.nn.dropout(fc_outputs_2,dropout_fc)

#Reshape back to batch_size, seq_len,vocab_size
logits = tf.reshape(fc_outputs_2,[-1,seq_len,vocab_size])

#convert them to time major
logits = tf.transpose(logits,[1,0,2])

#Calculate loss
loss = tf.nn.ctc_loss(targets, logits, time_steps)
cost = tf.reduce_mean(loss)

#Optimize
optimizer = tf.train.RMSPropOptimizer(learning_rate=alpha)
train = optimizer.minimize(loss)

# CTC decoder.

#decoded, log_prob = tf.nn.ctc_greedy_decoder(logits, seq_len)
decoded, log_prob = tf.nn.ctc_greedy_decoder(logits, time_steps)

label_error_rate = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32),
                                                   targets))

29 5
Tensor("bidirectional_rnn/fw/fw/transpose_1:0", shape=(?, 145, 200), dtype=float32) Tensor("ReverseV2:0", shape=(?, 145, 200), dtype=float32)


In [6]:
conv_out_height,conv_out_width

(5, 29)

In [7]:
dropout_fc

<tf.Tensor 'Placeholder_3:0' shape=() dtype=float32>

## Save my MoDel

In [8]:
saver = tf.train.Saver(max_to_keep=None)

random.shuffle(list_of_images)
train_size = 13000
test_size = 1000
valid_size = 94

training_list = list_of_images[:train_size]
testing_list = list_of_images[train_size:train_size+test_size]
validation_list = list_of_images[train_size+test_size:]

In [9]:
len(training_list),len(testing_list),len(validation_list)

(13000, 1000, 94)

In [10]:
test_batches_x,test_batches_y = create_batches(len(testing_list),testing_list,image_arrays,image_labels,vocabulary)
valid_batches_x,valid_batches_y = create_batches(len(validation_list),validation_list,image_arrays,image_labels,vocabulary)

In [11]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    #Set Checkpoints and Timer
    checkpoint = False
    timer = 0
    
    for e in range(epochs): 
        start_time = time.time()
        
        #Shuffle the training_list
        random.shuffle(training_list)
        
        #Create new training_batches..
        train_batches_x,train_batches_y = create_batches(batch_size,training_list,image_arrays,image_labels,vocabulary)

        #Checkpoint after every 5 epochs..
        if (e%1)==0:
            checkpoint = True
           
        total_cost = 0.0
        total_ler = 0.0
        #Iterate through all images in a single epoch...
        for b in range(len(train_batches_x)):
            
            #Before feeding x reshape it as (batch_size,width,height)
            feed_train = {
                    inputs:train_batches_x[b].transpose([2,0,1]),targets:train_batches_y[b],
                    time_steps:np.array([seq_len]*batch_size),
                    dropout_fc:np.array(1.0),dropout_lstm:np.array(1.0)
                   }

            _,cost_val,train_ler_val = sess.run([train,cost,label_error_rate],feed_dict=feed_train)
            
            total_cost+=cost_val
            total_ler+=train_ler_val
            
          # Finished batches...  
#         if checkpoint:
#             #Calculate cost and ler of last train batch
#             last_cost,train_ler = sess.run([cost,label_error_rate],feed_dict=feed_train)

        avg_cost = total_cost/len(train_batches_x) 
        avg_ler = train_ler/len(train_batches_x)
        
        #After iterating through all batches..
        valid_batch_size = len(validation_list)

        feed_valid = {
            inputs:valid_batches_x[0].transpose([2,0,1]),targets:valid_batches_y[0],
            time_steps:np.array([seq_len]*valid_batch_size),
            dropout_fc:np.array(1.0),dropout_lstm:np.array(1.0)
           }

        #Evaluate the model, and store every 5 epochs...
        if checkpoint:
            #Accuracy on valid_data
            test_ler = sess.run(label_error_rate,feed_dict=feed_valid)                

            end_time = time.time()       
            time_taken = end_time - start_time
            timer += time_taken
     
            print("{},{:.6f},{:.2f},{:.2f},{}\n".format(e,avg_cost,avg_ler,test_ler,timer))

            with open('progress.csv','a') as f:
                f.write("{},{:.6f},{:.2f},{:.2f},{}\n".format(e,avg_cost,avg_ler,test_ler,timer))

            #Save the model
            saver.save(sess,'../model/200_5_Lines_RNN_'+str(e))

            #Reset the checkpoint-flag and timer
            checkpoint = False
            timer = 0
        
        else:
            end_time = time.time()       
            time_taken = end_time - start_time
            timer += time_taken

0,141.033752,0.98,0.98,95.18699598312378

INFO:tensorflow:../model/200_5_Lines_RNN_0 is not in all_model_checkpoint_paths. Manually adding it.
1,141.087387,0.97,0.98,92.12399196624756

INFO:tensorflow:../model/200_5_Lines_RNN_1 is not in all_model_checkpoint_paths. Manually adding it.
2,137.163635,0.97,0.98,92.43513774871826

INFO:tensorflow:../model/200_5_Lines_RNN_2 is not in all_model_checkpoint_paths. Manually adding it.
3,138.639252,0.98,0.98,90.83072066307068

INFO:tensorflow:../model/200_5_Lines_RNN_3 is not in all_model_checkpoint_paths. Manually adding it.
4,135.670486,0.97,0.98,92.41278791427612

INFO:tensorflow:../model/200_5_Lines_RNN_4 is not in all_model_checkpoint_paths. Manually adding it.
5,136.670044,0.98,0.98,90.6999671459198

INFO:tensorflow:../model/200_5_Lines_RNN_5 is not in all_model_checkpoint_paths. Manually adding it.
6,134.577682,0.97,0.98,92.70359778404236

INFO:tensorflow:../model/200_5_Lines_RNN_6 is not in all_model_checkpoint_paths. Manually adding it.


KeyboardInterrupt: 