In [1]:
import numpy as np
import h5py
import sys
import logging
sys.path.append('../')

# Neural network stuff
from data_iters.hdf5_iterator import Hdf5MiniBatcher
from data_iters.minibatcher import MiniBatcher
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical
from fielutil import load_verbatimnet
from featextractor import extract_imfeats_debug

Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled)


### File names

You will require:
1. HDF5 Files:
    a. Author-Lines
    b. Flat Images
2. Params (for the neural network you're looking at)

In [2]:
# Do you want to load the features in? Or save them to a file?
load_features = False

# All the images that you require extraction should be in this HDF5 file
# hdf5authors='nmecdata/nmec_scaled_author_form.hdf5'
hdf5authors='/memory/nmec_scaled_author_form.hdf5'
hdf5images='nmecdata/nmec_scaled_flat.hdf5'
hdf5authors='/fileserver/iam/iam-processed/words/author_words.hdf5'

# This is the file that you will load the features from or save the features to
# featurefile = 'icdar13data/benchmarking-processed/icdar13be_fiel657.npy'
# featurefile = 'icdar13data/experimental-processed/icdar13ex_fiel657.npy'
featurefile = 'nmecdata/nmec_fiel657_features.npy'

# This is the neural networks and parameters you are deciding to use
paramsfile = '/fileserver/iam/iam-processed/models/fiel_657.hdf5'

### Parameter Definitions

In [3]:
labels = h5py.File(hdf5authors, 'r')
num_authors=len(labels)
num_forms_per_author=-1
shingle_dim=(56,56)
batch_size=3200
iterations = 1000
lr = 0.001

### Define your model

Here, we're using the Fiel Network

In [None]:
vnet = load_verbatimnet( 'fc7', paramsfile=paramsfile, compiling=False )
vnet.add(Dense(num_authors))
vnet.add(Activation('softmax'))
sgd = SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True)
vnet.compile(loss='categorical_crossentropy', optimizer=sgd)
print "Finished compilation"

Establishing Fiel's verbatim network
Loaded neural network up to fc7 layer

### Minibatcher (to load in your data for each batch)

In [None]:
# logging.getLogger('data_iters.hdf5_iterator').setLevel(logging.DEBUG)
nmec_m = Hdf5MiniBatcher(hdf5authors, num_authors, num_forms_per_author,
                            shingle_dim=shingle_dim, default_mode=MiniBatcher.TRAIN,
                            batch_size=batch_size, add_rotation=True)

### Train your model for however many specified iterations

In [None]:
for batch_iter in range(iterations):
    (X_train,Y_train) = nmec_m.get_train_batch()
    X_train = 1.0 - X_train / 255.0
    X_train = np.expand_dims(X_train, 1)
    Y_train = to_categorical(Y_train, num_authors)
    vnet.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=1, show_accuracy=True, verbose=1)
    print "Finished training on the "+str(i)+"th batch"
    if (batch_iter % 20)==0 and batch_iter != 0:
        model.save_weights('fielnet-nmec.hdf5', overwrite=True)