In [1]:
PRIMUS_PATH = "/primus_data/"
SAVE_MODEL = "/models/semantic_1-"

VOCABULARY_PATH = "/tf-end-to-end/Data/vocabulary_semantic.txt"
WORD_DELIMITER = '\t'
SEMANTIC = True
DISTORTIONS = False

IMG_HEIGHT = 128
MAX_EPOCHS = 100
DROPOUT = 0.5

In [2]:
import os
import numpy as np
import cv2
import multiprocessing

import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops

# Split Training / Testing Samples

In [3]:
data_samples = os.listdir(PRIMUS_PATH)
#shuffle
np.random.shuffle(data_samples)
#split data into train and test, 80% train, 20% test
VAL_SPLIT = 0.4
training_list = data_samples[:int(len(data_samples)*VAL_SPLIT)]
validation_list = data_samples[int(len(data_samples)*VAL_SPLIT):]
print("Train: ", len(training_list))
print("Test: ", len(validation_list))

SAVE_MODEL +=  str(len(training_list))
print(SAVE_MODEL)

('Train: ', 35071)
('Test: ', 52607)
/models/semantic_1-35071


# Create Vocabulary
set of possible outputs

In [37]:
word2int = {}
int2word = {}
            
vocab_file = open(VOCABULARY_PATH,'r')
vocab_list = vocab_file.read().splitlines()
for word in vocab_list:
    if not word in word2int:
        word_idx = len(word2int)
        word2int[word] = word_idx
        int2word[word_idx] = word

vocab_file.close()

vocabulary_size = len(word2int)

# Parameterization

In [38]:
params = {
    "img_height":IMG_HEIGHT,
    "img_width":None,
    "batch_size":16,
    "img_channels":1,
    "conv_blocks":4,
    "conv_filter_n":[32,64,128,256],
    "conv_filter_size":[ [3,3], [3,3], [3,3], [3,3] ],
    "conv_pooling_size":[ [2,2], [2,2], [2,2], [2,2] ],
    "rnn_units":512,
    "rnn_layers":2,
    "vocabulary_size": vocabulary_size
}

# Setup TF

In [39]:
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
tf.reset_default_graph()
sess = tf.InteractiveSession(config=config)

# Feed parameters to the model

In [40]:
def leaky_relu(features, alpha=0.2, name=None):
  with ops.name_scope(name, "LeakyRelu", [features, alpha]):
    features = ops.convert_to_tensor(features, name="features")
    alpha = ops.convert_to_tensor(alpha, name="alpha")
    return math_ops.maximum(alpha * features, features)

In [41]:
# TODO Assert parameters
input = tf.placeholder(shape=(None,
                               params['img_height'],
                               params['img_width'],
                               params['img_channels']),  # [batch, height, width, channels]
                        dtype=tf.float32,
                        name='model_input')
input_shape = tf.shape(input)
width_reduction = 1
height_reduction = 1

# Convolutional blocks
x = input
for i in range(params['conv_blocks']):
    x = tf.layers.conv2d(
        inputs=x,
        filters=params['conv_filter_n'][i],
        kernel_size=params['conv_filter_size'][i],
        padding="same",
        activation=None)

    x = tf.layers.batch_normalization(x)
    x = leaky_relu(x)
    x = tf.layers.max_pooling2d(inputs=x,
                                pool_size=params['conv_pooling_size'][i],
                                strides=params['conv_pooling_size'][i])

    width_reduction = width_reduction * params['conv_pooling_size'][i][1]
    height_reduction = height_reduction * params['conv_pooling_size'][i][0]

# Prepare output of conv block for recurrent blocks
features = tf.transpose(x, perm=[2, 0, 3, 1])  # -> [width, batch, height, channels](time_major=True)
feature_dim = params['conv_filter_n'][-1] * (params['img_height'] / height_reduction)
feature_width = input_shape[2] / width_reduction
features = tf.reshape(features, tf.stack([tf.cast(feature_width,'int32'), input_shape[0], tf.cast(feature_dim,'int32')]))  # -> [width, batch, features]
tf.constant(params['img_height'],name='input_height')
tf.constant(width_reduction,name='width_reduction')

# Recurrent block
rnn_keep_prob = tf.placeholder(dtype=tf.float32, name="keep_prob")
rnn_hidden_units = params['rnn_units']
rnn_hidden_layers = params['rnn_layers']
rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
    tf.contrib.rnn.MultiRNNCell(
        [tf.nn.rnn_cell.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(rnn_hidden_units),input_keep_prob=rnn_keep_prob)
         for _ in range(rnn_hidden_layers)]),
    tf.contrib.rnn.MultiRNNCell(
        [tf.nn.rnn_cell.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(rnn_hidden_units),input_keep_prob=rnn_keep_prob)
         for _ in range(rnn_hidden_layers)]),
    features,
    dtype=tf.float32,
    time_major=True,
)

rnn_outputs = tf.concat(rnn_outputs, 2)
logits = tf.contrib.layers.fully_connected(
    rnn_outputs,
    params['vocabulary_size'] + 1,  # BLANK
    activation_fn=None,
)

tf.add_to_collection("logits",logits) # for restoring purposes
# CTC Loss computation
seq_len = tf.placeholder(tf.int32, [None], name='seq_lengths')
targets = tf.sparse_placeholder(dtype=tf.int32, name='target')
ctc_loss = tf.nn.ctc_loss(labels=targets, inputs=logits, sequence_length=seq_len,time_major=True)
loss = tf.reduce_mean(ctc_loss)
# CTC decoding
decoded, log_prob = tf.nn.ctc_greedy_decoder(logits, seq_len)
inputs = input
# decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits,seq_len,beam_width=50,top_paths=1merge_repeated=True)

In [42]:
saver = tf.train.Saver(max_to_keep=None)

In [43]:
train_opt = tf.train.AdamOptimizer().minimize(loss)
sess.run(tf.global_variables_initializer())

# CTC Utils

In [44]:
def convert_inputs_to_ctc_format(target_text):
    SPACE_TOKEN = '-'
    SPACE_INDEX = 4
    FIRST_INDEX = 0

    original = ' '.join(target_text.strip().lower().split(' ')).replace('.', '').replace('?', '').replace(',', '').replace("'", '').replace('!', '').replace('-', '')
    print(original)
    targets = original.replace(' ', '  ')
    targets = targets.split(' ')

    # Adding blank label
    targets = np.hstack([SPACE_TOKEN if x == '' else list(x) for x in targets])

    # Transform char into index
    targets = np.asarray([SPACE_INDEX if x == SPACE_TOKEN else ord(x) - FIRST_INDEX
                          for x in targets])

    # Creating sparse representation to feed the placeholder
    train_targets = sparse_tuple_from([targets])

    return train_targets, original

def sparse_tuple_from(sequences, dtype=np.int32):
    indices = []
    values = []

    for n, seq in enumerate(sequences):
        indices.extend(zip([n] * len(seq), range(len(seq))))
        values.extend(seq)

    indices = np.asarray(indices, dtype=np.int64)
    values = np.asarray(values, dtype=dtype)
    shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1] + 1], dtype=np.int64)

    return indices, values, shape

def sparse_tensor_to_strs(sparse_tensor):
    indices= sparse_tensor[0][0]
    values = sparse_tensor[0][1]
    dense_shape = sparse_tensor[0][2]

    strs = [ [] for i in range(dense_shape[0]) ]

    string = []
    ptr = 0
    b = 0

    for idx in range(len(indices)):
        if indices[idx][0] != b:
            strs[b] = string
            string = []
            b = indices[idx][0]

        string.append(values[ptr])

        ptr = ptr + 1

    strs[b] = string

    return strs


def pad_sequences(sequences, maxlen=None, dtype=np.float32,
                  padding='post', truncating='post', value=0.):
    lengths = np.asarray([len(s) for s in sequences], dtype=np.int64)

    nb_samples = len(sequences)
    if maxlen is None:
        maxlen = np.max(lengths)

    # take the sample shape from the first non empty sequence
    # checking for consistency in the main loop below.
    sample_shape = tuple()
    for s in sequences:
        if len(s) > 0:
            sample_shape = np.asarray(s).shape[1:]
            break

    x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
    for idx, s in enumerate(sequences):
        if len(s) == 0:
            continue  # empty list was found
        if truncating == 'pre':
            trunc = s[-maxlen:]
        elif truncating == 'post':
            trunc = s[:maxlen]
        else:
            raise ValueError('Truncating type "%s" not understood' % truncating)

        # check `trunc` has expected shape
        trunc = np.asarray(trunc, dtype=dtype)
        if trunc.shape[1:] != sample_shape:
            raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' %
                             (trunc.shape[1:], idx, sample_shape))

        if padding == 'post':
            x[idx, :len(trunc)] = trunc
        elif padding == 'pre':
            x[idx, -len(trunc):] = trunc
        else:
            raise ValueError('Padding type "%s" not understood' % padding)
    return x, lengths


def word_separator():
    return '\t'

def levenshtein(a,b):
    "Computes the Levenshtein distance between a and b."
    n, m = len(a), len(b)

    if n > m:
        a,b = b,a
        n,m = m,n

    current = range(n+1)
    for i in range(1,m+1):
        previous, current = current, [i]+[0]*n
        for j in range(1,n+1):
            add, delete = previous[j]+1, current[j-1]+1
            change = previous[j-1]
            if a[j-1] != b[i-1]:
                change = change + 1
            current[j] = min(add, delete, change)

    return current[n]


def edit_distance(a,b,EOS=-1,PAD=-1):
    _a = [s for s in a if s != EOS and s != PAD]
    _b = [s for s in b if s != EOS and s != PAD]

    return levenshtein(_a,_b)


def normalize(image):
    return (255. - image)/255.


def resize(image, height):
    width = int(float(height * image.shape[1]) / image.shape[0])
    sample_img = cv2.resize(image, (width, height))
    return sample_img

# Helper Functions

In [45]:
def read_files(pid):
    sample_filepath = validation_list[pid]
    sample_full_filepath = PRIMUS_PATH + '/' + sample_filepath + '/' +sample_filepath
    print(sample_full_filepath)
    #Image
    sample_img = cv2.imread(sample_full_filepath + '.png', False)
    height = params['img_height']
    sample_img = resize(sample_img, height)
    images[pid] = (normalize(sample_img))
        
    if SEMANTIC:
        sample_full_filepath += '.semantic'
    else:
        sample_full_filepath += '.agnostic'
            
    sample_gt_file = open(sample_full_filepath, 'r')
    sample_gt_plain = sample_gt_file.readline().rstrip().split(word_separator())
    sample_gt_file.close()
        
    labels.append([word2int[lab] for lab in sample_gt_plain])

In [46]:
PAD_COLUMN = 0

class static_counter:
    def __init__(self, top):
        self.value = 0
        self.top = top
    def __init__(self, value, top):
        self.value = value
        self.top = top
    def incr(self):
        self.value = (self.value+1) % (self.top)

def get_batch(params, img_counter):
    images = []
    labels = []
    # Read files
    for _ in range(params['batch_size']):
        sample_filepath = training_list[img_counter.value]
        sample_full_filepath = PRIMUS_PATH + '/' + sample_filepath + '/' + sample_filepath
        # IMAGE
        if DISTORTIONS:
            sample_img = cv2.imread(sample_full_filepath + '_distorted.jpg', False) # Grayscale is assumed
        else:
            sample_img = cv2.imread(sample_full_filepath + '.png', False)  # Grayscale is assumed!
        
        if sample_img is None:
            raise Exception('Error loading sample: ' + sample_full_filepath + '.png')
            
        height = params['img_height']
        sample_img = resize(sample_img,height)
        images.append(normalize(sample_img))
        # GROUND TRUTH
        if SEMANTIC:
            sample_full_filepath = sample_full_filepath + '.semantic'
        else:
            sample_full_filepath = sample_full_filepath + '.agnostic'
        
        sample_gt_file = open(sample_full_filepath, 'r')
        sample_gt_plain = sample_gt_file.readline().rstrip().split(word_separator())
        sample_gt_file.close()
        labels.append([word2int[lab] for lab in sample_gt_plain])
        img_counter.incr()
    # Transform to batch
    image_widths = [img.shape[1] for img in images]
    max_image_width = max(image_widths)
    batch_images = np.ones(shape=[params['batch_size'],
                                   params['img_height'],
                                   max_image_width,
                                   params['img_channels']], dtype=np.float32)*PAD_COLUMN
    for i, img in enumerate(images):
        batch_images[i, 0:img.shape[0], 0:img.shape[1], 0] = img
    # LENGTH
    width_reduction = 1
    for i in range(params['conv_blocks']):
        width_reduction = width_reduction * params['conv_pooling_size'][i][1]
    lengths = [ batch_images.shape[2] / width_reduction ] * batch_images.shape[0]
    return {
        'inputs': batch_images,
        'seq_lengths': np.asarray(lengths),
        'targets': labels,
    }

In [47]:
def getValidation(params, validation_dict):
    if not (validation_dict is None):
        return validation_dict, len(validation_list)
    
    images = [None] * len(validation_list)
    labels = []
    processes = [] 
    with multiprocessing.Pool(processes=len(validation_list)) as pool:
        # Use the pool to map the worker function to a range of values
        results = pool.map(read_files, range(len(validation_list)))

    pool.close()
    pool.join()
    
    print("flag 1")
    #Transform to batch
    image_widths = [img.shape[1] for img in images]
    max_image_width = max(image_widths)
    
    batch_images = np.ones(shape=[len(validation_list),
                                    params['img_height'],
                                    max_image_width,
                                    params['img_channels']], dtype=np.float32)*PAD_COLUMN
    
    for i, img in enumerate(images):
        batch_images[i, 0:img.shape[0], 0:img.shape[1], 0] = img
    
    # LENGTH
    width_reduction = 1
    for i in range(params['conv_blocks']):
        width_reduction = width_reduction * params['conv_pooling_size'][i][1]
    
    lengths = [ batch_images.shape[2] / width_reduction ] * batch_images.shape[0]
    
    validation_dict = {
        'inputs': batch_images,
        'seq_lengths': np.asarray(lengths),
        'targets': labels,
    }
            
        
    return validation_dict, len(validation_list)

# Training Loop

In [48]:
 current_batch_counter = static_counter(0, len(training_list))
# validation_dict = None
# validation_batch, validation_size = getValidation(params, validation_dict)timeSignature-C 

In [49]:
for epoch in range(MAX_EPOCHS):
    batch = get_batch(params, current_batch_counter)

    _, loss_value = sess.run([train_opt, loss],
                             feed_dict={
                                input: batch['inputs'],
                                seq_len: batch['seq_lengths'],
                                targets: sparse_tuple_from(batch['targets']),
                                rnn_keep_prob: DROPOUT,
                            })

    if epoch % 10 == 0:
        # VALIDATION
        print ('Loss value at epoch ' + str(epoch) + ':' + str(loss_value))
        print ('Validating...')

#         validation_batch, validation_size = getValidation(params, validation_dict)
#         print("exit")
#         val_idx = 0
        
#         val_ed = 0
#         val_len = 0
#         val_count = 0
            
#         while val_idx < validation_size:
#             mini_batch_feed_dict = {
#                 inputs: validation_batch['inputs'][val_idx:val_idx+params['batch_size']],
#                 seq_len: validation_batch['seq_lengths'][val_idx:val_idx+params['batch_size']],
#                 rnn_keep_prob: 1.0            
#             }            
                        
            
#             prediction = sess.run(decoded,
#                                   mini_batch_feed_dict)
    
#             str_predictions = sparse_tensor_to_strs(prediction)
    

#             for i in range(len(str_predictions)):
#                 ed = edit_distance(str_predictions[i], validation_batch['targets'][val_idx+i])
#                 val_ed = val_ed + ed
#                 val_len = val_len + len(validation_batch['targets'][val_idx+i])
#                 val_count = val_count + 1
                
#             val_idx = val_idx + params['batch_size']
#             print(val_idx)
    
#        print ('[Epoch ' + str(epoch) + '] ' + str(1. * val_ed / val_count) + ' (' + str(100. * val_ed / val_len) + ' SER) from ' + str(val_count) + ' samples.')        
        print ('Saving the model...')
        saver.save(sess,SAVE_MODEL,global_step=epoch)
        print ('------------------------------')
        
    print("EPOCH " +str(epoch) + " COMPLETE")


Loss value at epoch 0:909.47
Validating...
Saving the model...
INFO:tensorflow:/models/semantic_1-35071-0 is not in all_model_checkpoint_paths. Manually adding it.
------------------------------
EPOCH 0 COMPLETE
EPOCH 1 COMPLETE
EPOCH 2 COMPLETE
EPOCH 3 COMPLETE
EPOCH 4 COMPLETE
EPOCH 5 COMPLETE
EPOCH 6 COMPLETE
EPOCH 7 COMPLETE
EPOCH 8 COMPLETE
EPOCH 9 COMPLETE
Loss value at epoch 10:118.53384
Validating...
Saving the model...
INFO:tensorflow:/models/semantic_1-35071-10 is not in all_model_checkpoint_paths. Manually adding it.
------------------------------
EPOCH 10 COMPLETE
EPOCH 11 COMPLETE
EPOCH 12 COMPLETE
EPOCH 13 COMPLETE
EPOCH 14 COMPLETE
EPOCH 15 COMPLETE
EPOCH 16 COMPLETE
EPOCH 17 COMPLETE
EPOCH 18 COMPLETE
EPOCH 19 COMPLETE
Loss value at epoch 20:114.56581
Validating...
Saving the model...
INFO:tensorflow:/models/semantic_1-35071-20 is not in all_model_checkpoint_paths. Manually adding it.
------------------------------
EPOCH 20 COMPLETE
EPOCH 21 COMPLETE
EPOCH 22 COMPLETE
E

In [50]:
import tensorflow as tf

# Check for GPU availability
if tf.test.is_gpu_available():
    print("GPU is available")
else:
    print("GPU is not available")

GPU is not available


# Test the Accuracy

In [12]:
import os
import subprocess
import random

In [13]:
path_to_model = "/models/semantic_model.meta"
primus_path = "/primus_data/"
#path_to_model = "/models/semantic_1-1-90.meta"

In [14]:
path_to_ctc_predict = "/tf-end-to-end/ctc_predict.py"
python_path = "/usr/bin/python"
vocabulary_path = "/tf-end-to-end/Data/vocabulary_semantic.txt"

In [15]:
#python ctc_predict.py -image Data/Example/000051652-1_2_1.png -model Models/semantic_model.meta -vocabulary Data/vocabulary_semantic.txt
def make_prediction(path_input):
    command = python_path + " " +path_to_ctc_predict + " -image " +path_input +" -model "+path_to_model +" -vocabulary " +vocabulary_path + " > /log.txt"
    subprocess.Popen(command, shell=True)
    output = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)

In [29]:
def compare_semantic(s1, s2):
    s1 = s1.split('\t')
    s2 = s2.split('\t')
    a = 0
    
    for i in range(min(len(s1), len(s2))):
        if(s1[i].strip() == s2[i].strip()):
            a = a + 1
    
    
    a /= min(len(s1), len(s2))
    return a
        

In [30]:
#run accross the test set (n = 8767)
n = 1
accuracy = 0
random.shuffle(validation_list)
for i in range(n):
    #make the prediction
    fp = primus_path + validation_list[i] +"/" +validation_list[i] + ".png"
    make_prediction(fp)
    f = open("/log.txt", "r")
    o = open(primus_path + validation_list[i] +"/" +validation_list[i] + ".semantic")
    s1 = f.read()
    s2 = o.read()
    accuracy += compare_semantic(s1, s2)
    print(accuracy)
    f.close()
    o.close()
    
#normalize the accuracy
accuracy = accuracy / n

hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
hit
1
1
1
1
