## Columbia University
### ECBM E4040 Neural Networks and Deep Learning. Fall 2019.

# Test

In this file, we are going to implement: 

(1) Define some basic functions to read data (which has already been processed and converted to .tfrecords file), including get_features, get_image, get_length, get_digits...

(2) Build batch and normal evaluator for testing.

(3) Build test process.

(4) Test.

In [1]:
import os
import tensorflow as tf
from model import Model
import json

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Basic functions to extract features from data

In [None]:
def get_features(path_to_tfrecords_file):
    filename_queue = tf.train.string_input_producer([path_to_tfrecords_file], num_epochs=None)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(serialized_example,
            features={
                'image': tf.FixedLenFeature([], tf.string),
                'length': tf.FixedLenFeature([], tf.int64),
                'digits': tf.FixedLenFeature([5], tf.int64)
            })
    return features

In [None]:
def get_image(features_image):
    image = tf.decode_raw(features_image, tf.uint8)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    image = tf.multiply(tf.subtract(image, 0.5), 2)
    image = tf.reshape(image, [64, 64, 3])
    image = tf.random_crop(image, [54, 54, 3])
    return image

In [None]:
def get_length(features_length):
    length = tf.cast(features_length, tf.int32)
    return length

In [None]:
def get_digits(features_digits):
    digits = tf.cast(features_digits, tf.int32)
    return digits

### Build three_batch
#### image_batch, length_batch, digit_batch

In [None]:
def three_batch(path_to_tfrecords_file, num_examples, batch_size, shuffled):
    assert tf.gfile.Exists(path_to_tfrecords_file), '%s not found' % path_to_tfrecords_file
    features = get_features(path_to_tfrecords_file)
    image = get_image(features['image'])
    length = get_length(features['length'])
    digits = get_digits(features['digits'])
    min_queue_examples = int(0.4 * num_examples)
    image_batch, length_batch, digits_batch = tf.train.batch([image, length, digits],
                                                                     batch_size=batch_size,
                                                                     num_threads=2,
                                                                     capacity=min_queue_examples + 3 * batch_size)
    return image_batch, length_batch, digits_batch

### Build normal evaluator for test process
This function is quite common that can refer to previous homework.

In [3]:
#Used for evaluate the accuracy of the model
class Evaluator(object):
    def __init__(self, path_to_eval_log_dir):
        self.summary_writer = tf.summary.FileWriter(path_to_eval_log_dir)


 
    def evaluate(self, path_to_checkpoint, path_to_tfrecords_file, num_examples, global_step):
        batch_size = 128
        num_batches = num_examples // batch_size
        needs_include_length = False

        with tf.Graph().as_default():
            image_batch, length_batch, digits_batch = three_batch(path_to_tfrecords_file,
                                                                         num_examples=num_examples,
                                                                         batch_size=batch_size,
                                                                         shuffled=False)
            length_logits, digits_logits = Model.inference(image_batch, drop_rate=0.0)
            length_predictions = tf.argmax(length_logits, axis=1)
            digits_predictions = tf.argmax(digits_logits, axis=2)

            if needs_include_length:
                labels = tf.concat([tf.reshape(length_batch, [-1, 1]), digits_batch], axis=1)
                predictions = tf.concat([tf.reshape(length_predictions, [-1, 1]), digits_predictions], axis=1)
            else:
                labels = digits_batch
                predictions = digits_predictions

            labels_string = tf.reduce_join(tf.as_string(labels), axis=1)
            predictions_string = tf.reduce_join(tf.as_string(predictions), axis=1)

            accuracy, update_accuracy = tf.metrics.accuracy(
                labels=labels_string,
                predictions=predictions_string
            )

            tf.summary.image('image', image_batch)
            tf.summary.scalar('accuracy', accuracy)
            tf.summary.histogram('variables',
                                 tf.concat([tf.reshape(var, [-1]) for var in tf.trainable_variables()], axis=0))
            summary = tf.summary.merge_all()

            with tf.Session() as sess:
                sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)

                restorer = tf.train.Saver()
                restorer.restore(sess, path_to_checkpoint)

                for _ in range(num_batches):
                    sess.run(update_accuracy)

                accuracy_val, summary_val = sess.run([accuracy, summary])
                self.summary_writer.add_summary(summary_val, global_step=global_step)

                coord.request_stop()
                coord.join(threads)

        return accuracy_val

### Test
Pass in the file path and start testing the exist models.<br>
The test accuracy of all models reach over 84%. The gap between train and test accuracy is quite small.

In [5]:
# start the testing progress
path_to_train_tfrecords_file = './data/train.tfrecords'
path_to_val_tfrecords_file = './data/val.tfrecords'
path_to_test_tfrecords_file = './data/test.tfrecords'
path_to_tfrecords_meta_file = './data/meta.json'
path_to_checkpoint_dir = 'logs_hyper/train'
path_to_train_eval_log_dir = './logs/eval/train'
path_to_val_eval_log_dir = './logs/eval/val'
path_to_test_eval_log_dir = './logs/eval/test'


with open(path_to_tfrecords_meta_file, 'r') as f:
        content = json.load(f)
        num_train_examples = content['num_examples']['train']
        num_val_examples = content['num_examples']['val']
        num_test_examples = content['num_examples']['test']
    

evaluator = Evaluator(path_to_test_eval_log_dir)

checkpoint_paths = tf.train.get_checkpoint_state(path_to_checkpoint_dir).all_model_checkpoint_paths

for global_step, path_to_checkpoint in [(path.split('-')[-1], path) for path in checkpoint_paths]:
    

    try:
        global_step_val = int(global_step)   
        
    except ValueError:
        continue

        
    accuracy = evaluator.evaluate(path_to_checkpoint, path_to_test_tfrecords_file, num_test_examples,
                                      global_step_val)
    print ('Evaluate the model %s on %s, test accuracy is = %f' % (path_to_checkpoint, path_to_test_tfrecords_file, accuracy))

Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(string_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.TFRecor