# Basic Recurrent Neural Network

Testing out original code for a simple LSTM to understand the sequential writing of an author from left to right. (To do: bi-directional recurrent LSTMs.)

Details: 
We require two additional layers that I've written to make the dimensions of the input to other layers consistent. 

### Imports

In [1]:
import pickle
import numpy
import keras
import time

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical
from keras.layers.normalization import BatchNormalization as BN
from keras.layers.core import Layer
from keras.layers.recurrent import LSTM

import theano.tensor as T

import h5py
import random
import numpy as np
from collections import defaultdict

import sys
sys.path.append('../repo/d-script/')
# d-script imports
from data_iters.minibatcher import MiniBatcher
from data_iters.iam_hdf5_iterator import IAM_MiniBatcher

import matplotlib.pylab as plt
%matplotlib inline

Using gpu device 0: GeForce GTX 980M (CNMeM is disabled)


### New Keras layers for use in the recurrent network

In [2]:
class Squeeze(Layer):
    '''
        Get rid of any dimensions of size 1.
        First dimension is assumed to be nb_samples.
    '''
    def __init__(self, **kwargs):
        super(Squeeze, self).__init__(**kwargs)

    @property
    def output_shape(self):
        input_shape = self.input_shape
        data_shape = tuple( np.array(input_shape)[ np.array(input_shape) > 1 ] )
        return (input_shape[0],)+ data_shape

    def get_output(self, train=False):
        X = self.get_input(train)
        # size = T.prod(X.shape) // X.shape[0]
        # nshape = (X.shape[0], size)
        # return T.reshape(X, output_shape)
        return X.squeeze()
    
class Transpose3(Layer):
    '''
        Get rid of any dimensions of size 1.
        First dimension is assumed to be nb_samples.
    '''
    def __init__(self, transpose_order, **kwargs):
        self.transpose_order = transpose_order
        super(Transpose3, self).__init__(**kwargs)

    @property
    def output_shape(self):
        input_shape = self.input_shape
        data_shape = ()
        for j in self.transpose_order:
            data_shape+=(input_shape[j],)
        return data_shape

    def get_output(self, train=False):
        X = self.get_input(train)
        # size = T.prod(X.shape) // X.shape[0]
        # nshape = (X.shape[0], size)
        # return T.reshape(X, output_shape)
        return X.transpose(self.transpose_order)

### Data (40 authors, 15 forms per author)

In [3]:
num_authors=40
num_forms_per_author=15
hdf5_file = '/memory/author_lines.hdf5'
shingle_dim=(120,120)
batch_size=32
use_form=True

iam_m = IAM_MiniBatcher(hdf5_file, num_authors, num_forms_per_author, shingle_dim=shingle_dim, use_form=use_form, default_mode=MiniBatcher.TRAIN, batch_size=batch_size)

### Define the neural network

#### Current architecture
1. Convolution2D (48, 12, 12) + Relu + MaxPool (2,2)
2. Convolution2D (48, 6, 6 ) + Relu + MaxPool (2,2)
3. Convolution2D->1D (48, 6, 35) + Relu

In [4]:
model = Sequential()
model.add(Convolution2D(48, 12, 12,
                    border_mode='full',
                    input_shape=(1, 120, 120),
                    activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Convolution2D(48, 6, 6,
                       border_mode='full',
                       activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(MaxPooling2D(pool_size=(70,2)))

model.add(Convolution2D(48, 6, 35, activation='relu'))
model.add(Squeeze())
model.add(Transpose3((0,2,1)))

model.add(LSTM(output_dim=48, activation='sigmoid', inner_activation='hard_sigmoid'))
model.add(Dense(40, activation='softmax'))

In [5]:
sgd = SGD(lr=0.015, decay=1e-6, momentum=0.5, nesterov=True)
# model.compile(loss='categorical_crossentropy', optimizer='adagrad')
model.compile(loss='categorical_crossentropy', optimizer=sgd)
print "Finished compilation with optimization set to SGD"

Finished compilation with optimization set to SGD


In [None]:
# model.load_weights('basic_recurrent300.hd5')

print "Getting the first validation batch"
[X_val, Y_val] = iam_m.get_val_batch(batch_size*100)
X_val = np.expand_dims(X_val, 1)
Y_val = to_categorical(Y_val, num_authors)
print "Finished getting "+str(batch_size*10)+" data points"


from PIL import Image
def randangle(batch):
    newbatch = np.zeros(batch.shape)
    for i,im in enumerate(batch):
        imangle = np.asarray(Image.fromarray(im.squeeze()).rotate(7.5*np.random.randn()))
        newbatch[i]=imangle
    return newbatch

Getting the first validation batch
Finished getting 320 data points


In [None]:
total_iters = 500
for batch_iter in xrange(total_iters):
    print "Data load "+str(batch_size*1000)+" authors"
    [X_train, Y_train] = iam_m.get_train_batch(batch_size*1000)
    X_train = np.expand_dims(X_train, 1)
    X_train = randangle(X_train)
    Y_train = to_categorical(Y_train, num_authors)
    print "Batch iteration "+str(batch_iter)+"/"+str(total_iters)+" on "+str(num_authors)+" authors."
    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=1, show_accuracy=True, verbose=1, validation_data=(X_val, Y_val))
    if (batch_iter % 100)==0 and batch_iter != 0:
        model.save_weights('recnet.hdf5', overwrite=True)

Batch iteration 0/500 on 40 authors.
Train on 32000 samples, validate on 3200 samples
Epoch 1/1
 4672/32000 [===>..........................] - ETA: 554s - loss: 3.6802 - acc: 0.0330

In [None]:
model.save_weights('basic_recurrent.hd5')