# Inference Pipeline

### Imports

In [1]:
import numpy as np
import h5py
import sys
sys.path.append('../../d-script/')

# Neural network stuff
from fielutil import load_verbatimnet
from featextractor import extract_imfeats

Using Theano backend.


Using gpu device 0: GeForce GTX 980


### Parameters

In [17]:
# Do you want to load the features in? Or save them to a file?
load_features = False
# All the images that you require extraction should be in this HDF5 file
# hdf5images='icdar13data/benchmarking-processed/icdar_be.hdf5'
hdf5images = 'icdar13data/experimental-processed/icdar13_ex.hdf5'
# This is the file that you will load the features from or save the features to
# featurefile = 'icdar13data/benchmarking-processed/icdar13be_fiel657.npy'
featurefile = 'icdar13data/experimental-processed/icdar13ex_fiel657.npy'
# This is the neural networks and parameters you are deciding to use
paramsfile = '/fileserver/iam/iam-processed/models/fiel_657.hdf5'

### Full image HDF5 file

Each entry in the HDF5 file is a full image/form/document

In [18]:
labels = h5py.File(hdf5images).keys()

### Load feature extractor neural network

In [None]:
vnet = load_verbatimnet( 'fc7', paramsfile=paramsfile )

Establishing Fiel's verbatim network
Compiled neural network up to FC7 layer


### Image features

Currently taken as averages of all shard features in the image. You can either load them or extract everything manually, depending on if you have the .npy array.

In [None]:
if load_features:
    imfeats = np.load(featurefile)
else:
    print "Begin extracting features from"+hdf5images
    imfeats = extract_imfeats( hdf5images, vnet )
    np.save( featurefile, imfeats )

Begin extracting features fromicdar13data/experimental-processed/icdar13_ex.hdf5
Loaded 1176 shards in and predicting on image 027_1.tif
Loaded 1344 shards in and predicting on image 027_2.tif
Loaded 1428 shards in and predicting on image 027_3.tif
Loaded 1596 shards in and predicting on image 027_4.tif
Loaded 840 shards in and predicting on image 028_1.tif
Loaded 924 shards in and predicting on image 028_2.tif
Loaded 756 shards in and predicting on image 028_3.tif
Loaded 1008 shards in and predicting on image 028_4.tif
Loaded 756 shards in and predicting on image 029_1.tif
Loaded 840 shards in and predicting on image 029_2.tif
Loaded 924 shards in and predicting on image 029_3.tif
Loaded 1008 shards in and predicting on image 029_4.tif
Loaded 924 shards in and predicting on image 030_1.tif
Loaded 1008 shards in and predicting on image 030_2.tif
Loaded 1092 shards in and predicting on image 030_3.tif
Loaded 1428 shards in and predicting on image 030_4.tif
Loaded 1008 shards in and pred

### Build classifier

In [None]:
imfeats = ( imfeats.T / np.linalg.norm( imfeats, axis=1 ) ).T
F = imfeats.dot(imfeats.T)
np.fill_diagonal( F , -1 )

### Evaluate classifier on HDF5 file (ICDAR 2013)

In [None]:
# Top k (soft criteria)
k = 10
# Max top (hard criteria)
maxtop = 2

# Run through the adjacency matrix
softcorrect = 0
hardcorrect = 0
totalnum = 0
for j, i in enumerate(F):
    topk = i.argsort()[-k:]
    # Soft criteria
    if j/4 in topk/4:
        softcorrect += 1
    totalnum +=1
    # Hard criteria
    hardcorrect+= sum([1 for jj in (j/4 == topk[-maxtop:]/4) if jj])
    
# Print out results    
print "Top %d (soft criteria) = %f" %( k, (softcorrect+0.0) / totalnum )
print "Top %d (hard criteria) = %f" %( k, (hardcorrect+0.0) / totalnum / maxtop )