In [None]:
import time
import random
import numpy as np
from collections import defaultdict
from optparse import OptionParser
import pickle
import h5py

# Required neural network libraries
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical
from keras.layers.normalization import BatchNormalization as BN

# Plotting and stuffs (which probably won't work due to X11 issues)
import matplotlib.pylab as plt
import sys

# d-script imports
sys.path.append('../')
import data_iters
from data_iters.hdf5_iterator import Hdf5MiniBatcher
from data_iters.archive.iam_iterator import IAM_MiniBatcher
from data_iters.minibatcher import MiniBatcher

# Denoising stuff
from data_iters.CoffeeStainer import *
from data_iters.NoiseAdder import *

%pdb off

### Define which dataset we'd like to use

In [None]:
# Which training dataset do we want to train from?
train_dataset='iam-binlines'
noise_dataset='noisedir/bin'

# All the images that you require extraction should be in this HDF5 file
if train_dataset=='nmec':
    hdf5authors='/fileserver/nmec-handwriting/nmec_scaled_author_form.hdf5'
    hdf5images='nmecdata/nmec_scaled_flat.hdf5'
elif train_dataset=='nmec-bin':
    hdf5authors='nmecdata/author_nmec_bin_uint8.hdf5'
    hdf5images='nmecdata/flat_nmec_bin_uint8.hdf5'
elif train_dataset=='icdar-ex':
    hdf5authors='/fileserver/icdar13/experimental-processed/author_icdar13_ex.hdf5'
    hdf5images='/fileserver/icdar13/experimental-processed/icdar13_ex.hdf5'
elif train_dataset=='icdar-be':
    hdf5authors='/fileserver/icdar13/benchmarking-processed/author_icdar_be.hdf5'
    hdf5images='/fileserver/icdar13/benchmarking-processed/icdar13_be.hdf5'
elif train_dataset=='iam-words':
    hdf5authors='/fileserver/iam/iam-processed/words/author_words.hdf5'
elif train_dataset=='iam-binwords':
    hdf5authors='/fileserver/iam/iam-binary/iam_author_words_bin.hdf5'
    hdf5images='/fileserver/iam/iam-binary/iam_flat_words_bin.hdf5'
elif train_dataset=='iam-lines':
    hdf5authors='/fileserver/iam/iam-processed/lines/author_lines.hdf5'
elif train_dataset=='iam-binlines':
    hdf5authors='/fileserver/iam/iam-binary/iam_author_lines_bin.hdf5'
    hdf5images='/fileserver/iam/iam-binary/iam_flat_lines_bin.hdf5'
elif train_dataset=='iam_binforms':
    hdf5authors='/fileserver/iam/iam-binary/iam_author_forms_bin.hdf5'
else:
    hdf5authors='/fileserver/iam/iam-processed/forms/author_forms.hdf5'
    
# Setup HDF5 Files
labels = h5py.File(hdf5authors, 'r')
# num_authors=len(labels)
# num_forms_per_author=-1
num_authors=300
num_forms_per_author=10
shingle_dim=(120,120)
batch_size=32
load_size=batch_size*1000
iterations = 10000
lr = 0.001

### Get both the minibatcher and the noisebatcher

In [None]:
mini_m = IAM_MiniBatcher(hdf5authors, num_authors, num_forms_per_author,
                            shingle_dim=shingle_dim, default_mode=MiniBatcher.TRAIN,
                            batch_size=load_size)
noise_m = NoiseAdder(noise_dataset)

(Y,Dummy) = mini_m.get_batch(10)
rng = np.random.RandomState(100)
X = noise_m.add_noise_batch(Y, rng=rng, shingle_dim=shingle_dim)

### Plot example noise images

In [None]:
import matplotlib.pylab as plt
%matplotlib inline
for i in xrange(4):
    plt.subplots(1,2)
    plt.subplot(1,2,1)
    plt.imshow(Y[i], cmap='gray')
    plt.subplot(1,2,2)
    plt.imshow(X[i], cmap='gray')

### Define the Neural Network

In [None]:
import noisenet
reload(noisenet)
model = noisenet.conv4p2c_model(shingle_dim=shingle_dim)

batchtrack = 0  # Keep track of the batches for start & stop

In [None]:
import sys
from IPython.display import clear_output

load_size=32000

X_nonthresh = np.zeros( (load_size,)+shingle_dim )
for batch_iter in xrange(iterations):
    
    print "Getting batch number "+str(batch_iter)+"."
    sys.stdout.flush()
    (X_image, Y_train) = mini_m.get_batch(load_size)
    # print "Thresholding the image"
    # X_nonthresh[:] = X_image
    # for im in X_image:
    #     X_image[ X_image < (0.75*im.max()) ] = 0
    #     X_image[ X_image >=(0.75*im.max()) ] = 255
    print "Adding noise to batch"
    sys.stdout.flush()
        
    X_noise = noise_m.add_noise_batch(X_image, rng=rng, shingle_dim=shingle_dim)
    print "Finished adding noise to batch"
    sys.stdout.flush()
    
    X_noise = 1.0 - X_noise/255.0    
    X_image = 1.0 - X_image/255.0
    print "Bounded image pixel values"
    
    X_image = np.expand_dims(X_image, 1)
    X_noise = np.expand_dims(X_noise, 1)
    print "Reshape and preprocessed to scale between 0 and 1"
    sys.stdout.flush()

    model.fit(X_noise, X_image, batch_size=batch_size, nb_epoch=1, show_accuracy=True, verbose=1) #, validation_data=(X_test, Y_test))
    
    ############################# VISUALIZATION #############################
    # Blank image with random noise at the beginning (for the first image)
    X_clean_noise = noise_m.add_noise_np( 255*np.ones(shingle_dim), rng=rng, shingle_dim=shingle_dim )
    X_clean_noise = 1.0 - np.expand_dims( np.expand_dims(X_clean_noise,0), 0 )/255.0
    X_clean_noise_output = model.predict( X_clean_noise )
    X_clean_noise_output = X_clean_noise_output.reshape( X_clean_noise.shape )
    plt.subplots(1,2)
    plt.subplot(1,2,1)
    plt.imshow( 1.0 - X_clean_noise.squeeze(), cmap='gray' )
    plt.title('Noise Image')
    plt.subplot(1,2,2)
    plt.imshow(1.0 - X_clean_noise_output.squeeze(), cmap='gray')

    X_example = model.predict( X_noise[:5] )
    X_example = X_example.reshape( (5,)+X_noise.shape[2:] )
    clear_output()
    for i, exim in enumerate(X_example):
        plt.subplots(1,3,figsize=(5,5))
        plt.subplot(1,3,3)
        plt.imshow(1.0-X_example[i], cmap='gray')
        plt.title('Predicted Image')
        plt.subplot(1,3,2)
        plt.imshow(1.0-X_image[i].reshape(X_noise.shape[2:]), cmap='gray')
        plt.title('Targeted Image')
        plt.subplot(1,3,1)
        plt.imshow(1.0-X_noise[i][0], cmap='gray')
        plt.title('Input Noisy Image')
    plt.show()
        
    if (batch_iter % 25) == 0:
        model.save_weights('conv4p2_linet120-iambin-tifs.hdf5', overwrite=True)
    batchtrack += 1

In [None]:
(X_image, Y_train) = mini_m.get_batch(64)
print X_image.shape
X_noise = noise_m.add_noise_batch(X_image, rng=rng, shingle_dim=shingle_dim)
X_noise = 1.0 - X_noise/255.0    
print X_noise.shape
X_image = X_image.reshape( (64, np.prod(X_image.shape[1:])))
X_image = 1.0 - X_image/255.0
print X_image.shape
X_noise = np.expand_dims(X_noise, 1)

shingle_dim

model.save_weights('conv4p2_linet120-iambin-tifs.hdf5')
# X_noise = X_noise.reshape( (load_size, np.prod(X_noise.shape[1:])))
# print X_noise.reshape((load_size, np.prod(X_noise.shape[1:])))

In [None]:
f = h5py.File('/fileserver/iam/iam-binary/iam_author_lines_bin.hdf5','r')
print f['001']['a01-000x-00'].value
plt.imshow(f['001']['a01-000x-00'])
batchtrack = 0

In [None]:
batchtrack