In [1]:
import os
from datetime import datetime
import time
import numpy as np
import h5py
import random
from PIL import Image
import tensorflow as tf
import json
from model import Model

In this part, we define the evaluator to evaluate the accuracy. First we extra the image information in validation set. We reshape the image into needed shape and build batch for validation. We use the built batch, put them go through the model, calculate the predictions and accuracy and present validation accuracy. 

In [5]:
def write_summary(path_to_eval_log_dir):
    summary_writer = tf.summary.FileWriter(path_to_eval_log_dir)
    return summary_writer

def evaluate(writer, path_to_checkpoint, path_to_tfrecords_file, num_examples, global_step):
    batch_size = 128
    num_batches = num_examples // batch_size
    needs_include_length = False

    with tf.Graph().as_default():
        filename_queue_val = tf.train.string_input_producer([path_to_tfrecords_file], num_epochs=None)
        #image, length, digits = read_and_decode(filename_queue)

        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue_val)
        features_val = tf.parse_single_example(
                serialized_example,
                features={
                    'image': tf.FixedLenFeature([], tf.string),
                    'length': tf.FixedLenFeature([], tf.int64),
                    'digits': tf.FixedLenFeature([5], tf.int64)
                })
        #read validation data of image in tfrecords we built using DCT
        image_val = tf.decode_raw(features_val['image'], tf.uint8)
        #extract validation image data and reshape it into needed shap
        image_val = tf.image.convert_image_dtype(image_val, dtype=tf.float32)
        image_val = tf.multiply(tf.subtract(image_val, 0.5), 2)
        image_val = tf.reshape(image_val, [64, 64, 3])
        image_val = tf.random_crop(image_val, [54, 54, 3])
        #extract the length of digits and the extra digits in picture in validation set
        length_val = tf.cast(features_val['length'], tf.int32)
        digits_val = tf.cast(features_val['digits'], tf.int32)
    
    
    
        min_queue_examples_val = int(0.4 * num_examples)
        #build batch for validation
        image_batch_val, length_batch_val, digits_batch_val = tf.train.batch([image_val, length_val, digits_val],
                                                                     batch_size=batch_size,
                                                                     num_threads=2,
                                                                     capacity=min_queue_examples_val + 3 * batch_size)        
        length_logits_val, digits_logits_val = Model.inference(image_batch_val, drop_rate=0.0)
        length_predictions_val = tf.argmax(length_logits_val, axis=1)
        digits_predictions_val = tf.argmax(digits_logits_val, axis=2)
        #use the built batch to predict the result. Since it use softmax in the model, we need to find one whose probability is the highest

        labels_val = digits_batch_val
        predictions_val = digits_predictions_val

        labels_string_val = tf.reduce_join(tf.as_string(labels_val), axis=1)
        predictions_string_val = tf.reduce_join(tf.as_string(predictions_val), axis=1)

        accuracy_vali, update_accuracy_vali = tf.metrics.accuracy(
        labels=labels_string_val,
        predictions=predictions_string_val
            )
        #calculate the accuracy of predictions and true labels of validation set
        tf.summary.image('image', image_batch_val)
        tf.summary.scalar('accuracy', accuracy_vali)
        tf.summary.histogram('variables',
                                 tf.concat([tf.reshape(var, [-1]) for var in tf.trainable_variables()], axis=0))
        summary = tf.summary.merge_all()

        with tf.Session() as sess:
            sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            restorer = tf.train.Saver()
            restorer.restore(sess, path_to_checkpoint)

            for _ in range(num_batches):
                sess.run(update_accuracy_vali)

            accuracy_val, summary_val = sess.run([accuracy_vali, summary])
            writer.add_summary(summary_val, global_step=global_step)

            coord.request_stop()
            coord.join(threads)

    return accuracy_val




In this part, we define the main train function. We first extra the image information from tfrecords and build batch. Then we let the built batch to go throught model, get the prediction and calculate accuracy. We use this function to train small number of times in the same dataset on different hyperparameter settings, or just train the best model after we have selected the best model. Since it has to take long time, we did not use early stop.

In [6]:
#Build the training process

def train(path_to_train_tfrecords_file, num_train_examples, path_to_val_tfrecords_file, num_val_examples,
           path_to_train_log_dir, training_options, history_file_path):
    batch_size = training_options['batch_size']
    num_steps_to_show_loss = 10
    num_steps_to_check = 100  #set the step to show loss and validation accuracy
    Start_time = time.time()
    with tf.Graph().as_default():
        filename_queue_train = tf.train.string_input_producer([path_to_train_tfrecords_file], num_epochs=None)
        #image, length, digits = read_and_decode(filename_queue)

        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue_train)
        features_train = tf.parse_single_example(
                serialized_example,
                features={
                    'image': tf.FixedLenFeature([], tf.string),
                    'length': tf.FixedLenFeature([], tf.int64),
                    'digits': tf.FixedLenFeature([5], tf.int64)
                })
        #read training data of image in tfrecords we built using DCT
        image_train = tf.decode_raw(features_train['image'], tf.uint8)
        #extract image data and reshape it into needed shape for training
        image_train = tf.image.convert_image_dtype(image_train, dtype=tf.float32)
        image_train = tf.multiply(tf.subtract(image_train, 0.5), 2)
        image_train = tf.reshape(image_train, [64, 64, 3])
        image_train = tf.random_crop(image_train, [54, 54, 3])
        #extract the length of digits and the extra digits in picture in training set
        length_train = tf.cast(features_train['length'], tf.int32)
        digits_train = tf.cast(features_train['digits'], tf.int32)
    
    
    
        min_queue_examples_train = int(0.4 * num_train_examples)
        #build batch for training
        image_batch_train, length_batch_train, digits_batch_train = tf.train.batch([image_train, length_train, digits_train],
                                                                     batch_size=batch_size,
                                                                     num_threads=2,
                                                                     capacity=min_queue_examples_train + 3 * batch_size) 
        length_logtis_train, digits_logits_train = Model.inference(image_batch_train, drop_rate=0.2)
        #Use the built batch to predict
        loss = Model.loss(length_logtis_train, digits_logits_train, length_batch_train, digits_batch_train)
        #Calculate the loss of prediction of built batch
        global_step = tf.Variable(0, name='global_step', trainable=False)
        learning_rate = tf.train.exponential_decay(training_options['learning_rate'], global_step=global_step,
                                                   decay_steps=training_options['decay_steps'], decay_rate=training_options['decay_rate'], staircase=True)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train_op = optimizer.minimize(loss, global_step=global_step)
        #set up training parameter and prepare summary
        tf.summary.image('image', image_batch_train)
        tf.summary.scalar('loss', loss)
        tf.summary.scalar('learning_rate', learning_rate)
        summary = tf.summary.merge_all()

        with tf.Session() as sess:
            summary_writer = tf.summary.FileWriter(path_to_train_log_dir, sess.graph)
            evaluator = write_summary(os.path.join(path_to_train_log_dir, 'eval/val'))
            
            sess.run(tf.global_variables_initializer())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            saver = tf.train.Saver()

            print ('Start training')
            best_accuracy = 0.0   #define the initial best accuracy for latter replacement
            duration = 0.0
            epoch = 100000
            k=0
            accuracy_history=[]
            for i in range(100000):   #define the training times
                k+=1
                start_time = time.time()
                _, loss_val, summary_val, global_step_val, learning_rate_val = sess.run([train_op, loss, summary, global_step, learning_rate])
                duration += time.time() - start_time

                if global_step_val % num_steps_to_show_loss == 0:
                    examples_per_sec = batch_size * num_steps_to_show_loss / duration
                    duration = 0.0
                    print ('%s: step %d, loss = %f ' % (
                        datetime.now(), global_step_val, loss_val))

                if global_step_val % num_steps_to_check != 0:
                    continue

                summary_writer.add_summary(summary_val, global_step=global_step_val)


                path_to_latest_checkpoint_file = saver.save(sess, os.path.join(path_to_train_log_dir, 'latest.ckpt'))
                accuracy = evaluate(evaluator, path_to_latest_checkpoint_file, path_to_val_tfrecords_file,
                                              num_val_examples,
                                              global_step_val)
                print ('Validation accuracy is= %f, best accuracy %f' % (accuracy, best_accuracy))
                if k%10==0:
                    accuracy_history.append(accuracy)
                    #append accuracy history list
                if accuracy > best_accuracy:
                    path_to_checkpoint_file = saver.save(sess, os.path.join(path_to_train_log_dir, 'model.ckpt'),
                                                         global_step=global_step_val)
                    print ('Save file to: %s' % path_to_checkpoint_file)
                    best_accuracy = accuracy

                if k==100000:
                    break

            coord.request_stop()
            coord.join(threads)
            with open(history_file_path, 'w', encoding='utf-8') as f:
                f.write('times, acuuracy\n')
                for i,d in enumerate(accuracy_history):
                    f.write(str(i) + ',' + str(d) + '\n')
            end_time = time.time()
            print(end_time-Start_time)
            print ('Training progess is finished')

In next few parts, we train the same dataset in different hyperparameter settings in small number of training times to choose the best model we can get in limited time. The first parameter set is: batch size:16, learning rate:1e-2

In [10]:
#Train the model 
#define the folder path
train_tfrecords_file = 'data/train.tfrecords'
val_tfrecords_file = 'data/val.tfrecords'
tfrecords_meta_file = 'data/meta.json'
log_dir = 'logs_hyper/train16_1e2'
history_file_path = 'data/history_hyper1e2.csv' #history file for further analysis
opt = {
    'batch_size': 16, #batch_size=16,32,64
    'learning_rate': 1e-2, #learning_rate = 1e-2, 5e-3, 1e-3
    'decay_steps': 10000,
    'decay_rate': 0.9
    }
#set up hyperparameters for training and latter comparasion
with open(tfrecords_meta_file, 'r') as f:
    content = json.load(f)
    num_train_examples = content['num_examples']['train']
    num_val_examples = content['num_examples']['val']
    num_test_examples = content['num_examples']['test']
#extra the size of training set, validation set and test set we have store in json file
    
#train the model     
train(train_tfrecords_file, num_train_examples,
           val_tfrecords_file, num_val_examples,
           log_dir, 
           opt, history_file_path)

Start training
2019-12-08 02:28:51.611761: step 10, loss = 7.985003 
2019-12-08 02:28:52.306031: step 20, loss = 7.750267 
2019-12-08 02:28:53.008751: step 30, loss = 7.571653 
2019-12-08 02:28:53.700669: step 40, loss = 7.759266 
2019-12-08 02:28:54.402763: step 50, loss = 7.156721 
2019-12-08 02:28:55.098114: step 60, loss = 7.069119 
2019-12-08 02:28:55.795767: step 70, loss = 8.144591 
2019-12-08 02:28:56.493320: step 80, loss = 7.414077 
2019-12-08 02:28:57.196634: step 90, loss = 7.039929 
2019-12-08 02:28:57.903247: step 100, loss = 6.138819 
INFO:tensorflow:Restoring parameters from logs_hyper/train16_1e2/latest.ckpt
Validation accuracy is= 0.020733, best accuracy 0.000000
Save file to: logs_hyper/train16_1e2/model.ckpt-100
2019-12-08 02:29:10.525785: step 110, loss = 7.653413 
2019-12-08 02:29:11.190284: step 120, loss = 6.843325 
2019-12-08 02:29:11.865132: step 130, loss = 6.132644 
2019-12-08 02:29:12.514873: step 140, loss = 7.426774 
2019-12-08 02:29:13.185178: step 150, 

The second parameter set is: batch size:16, learning rate:5e-3

In [11]:
train_tfrecords_file = 'data/train.tfrecords'
val_tfrecords_file = 'data/val.tfrecords'
tfrecords_meta_file = 'data/meta.json'
log_dir = 'logs_hyper/train16_5e3'
history_file_path = 'data/history_hyper5e3.csv' #history file for further analysis
opt = {
    'batch_size': 16, #batch_size=16
    'learning_rate': 5e-3, #learning_rate = 5e-3
    'decay_steps': 10000,
    'decay_rate': 0.9
    }
#set up hyperparameters for training and latter comparasion
with open(tfrecords_meta_file, 'r') as f:
    content = json.load(f)
    num_train_examples = content['num_examples']['train']
    num_val_examples = content['num_examples']['val']
    num_test_examples = content['num_examples']['test']
#extra the size of training set, validation set and test set we have store in json file    
    
#train the model     
train(train_tfrecords_file, num_train_examples,
           val_tfrecords_file, num_val_examples,
           log_dir, 
           opt, history_file_path)

Start training
2019-12-08 03:43:00.640014: step 10, loss = 13.527200 
2019-12-08 03:43:01.318497: step 20, loss = 7.835879 
2019-12-08 03:43:01.997730: step 30, loss = 7.761820 
2019-12-08 03:43:02.701307: step 40, loss = 7.873653 
2019-12-08 03:43:03.389936: step 50, loss = 7.714021 
2019-12-08 03:43:04.078993: step 60, loss = 7.663630 
2019-12-08 03:43:04.776865: step 70, loss = 8.297626 
2019-12-08 03:43:05.467240: step 80, loss = 7.453154 
2019-12-08 03:43:06.159729: step 90, loss = 7.044896 
2019-12-08 03:43:06.852391: step 100, loss = 6.121518 
INFO:tensorflow:Restoring parameters from logs_hyper/train16_5e3/latest.ckpt
Validation accuracy is= 0.020733, best accuracy 0.000000
Save file to: logs_hyper/train16_5e3/model.ckpt-100
2019-12-08 03:43:18.712105: step 110, loss = 7.745437 
2019-12-08 03:43:19.341483: step 120, loss = 6.874719 
2019-12-08 03:43:20.006115: step 130, loss = 6.247143 
2019-12-08 03:43:20.671000: step 140, loss = 7.419309 
2019-12-08 03:43:21.308242: step 150,

 The Third parameter set is: batch size:16, learning rate:1e-3

In [12]:
train_tfrecords_file = 'data/train.tfrecords'
val_tfrecords_file = 'data/val.tfrecords'
tfrecords_meta_file = 'data/meta.json'
log_dir = 'logs_hyper/train16_1e3'
history_file_path = 'data/history_hyper1e3.csv' #history file for later analysis
opt = {
    'batch_size': 16, #batch_size=16
    'learning_rate': 1e-3, #learning_rate =1e-3
    'decay_steps': 10000,
    'decay_rate': 0.9
    }
with open(tfrecords_meta_file, 'r') as f:
    content = json.load(f)
    num_train_examples = content['num_examples']['train']
    num_val_examples = content['num_examples']['val']
    num_test_examples = content['num_examples']['test']
#extra the size of training set, validation set and test set we have store in json file     
    
#train the model     
train(train_tfrecords_file, num_train_examples,
           val_tfrecords_file, num_val_examples,
           log_dir, 
           opt, history_file_path)

Start training
2019-12-08 04:56:55.771349: step 10, loss = 13.848204 
2019-12-08 04:56:56.426378: step 20, loss = 13.748087 
2019-12-08 04:56:57.116889: step 30, loss = 13.646866 
2019-12-08 04:56:57.812463: step 40, loss = 13.444175 
2019-12-08 04:56:58.513376: step 50, loss = 13.102096 
2019-12-08 04:56:59.204788: step 60, loss = 11.489511 
2019-12-08 04:56:59.894455: step 70, loss = 9.130874 
2019-12-08 04:57:00.589227: step 80, loss = 7.952252 
2019-12-08 04:57:01.291907: step 90, loss = 6.993955 
2019-12-08 04:57:01.975065: step 100, loss = 6.353624 
INFO:tensorflow:Restoring parameters from logs_hyper/train16_1e3/latest.ckpt
Validation accuracy is= 0.020733, best accuracy 0.000000
Save file to: logs_hyper/train16_1e3/model.ckpt-100
2019-12-08 04:57:13.672267: step 110, loss = 7.717138 
2019-12-08 04:57:14.312777: step 120, loss = 7.748930 
2019-12-08 04:57:14.948573: step 130, loss = 6.438725 
2019-12-08 04:57:15.612141: step 140, loss = 7.281817 
2019-12-08 04:57:16.239347: step