In [2]:
import tensorflow as tf
from tensorflow.contrib.framework.python.ops.variables import get_or_create_global_step
from tensorflow.python.platform import tf_logging as logging

from preprocessing.preprocessing_factory import get_preprocessing
from nets import nets_factory

import os
import time
from tensorflow.contrib import slim

from datasets import dataset_utils
from checkpoints_downloader import ckpt_maker
from dataset_preparation import get_split, load_batch

In [3]:
MODEL = 'resnet_v2_50'

In [4]:
checkpoint_file = ckpt_maker(MODEL)

Checkpoint for resnet_v2_50 is ready!
File name: checkpoints/resnet_v2_50.ckpt


In [5]:
#================ DATASET INFORMATION ======================
#State dataset directory where the tfrecord files are located
dataset_dir = 'drivers_data'

#State where your log file is at. If it doesn't exist, create it.
log_dir = 'log/' + MODEL

if not tf.gfile.Exists(log_dir):
    tf.gfile.MakeDirs(log_dir)

#State the number of classes to predict:
num_classes = 10

# #State the labels file and read it
# labels_file = 'drivers_data/labels.txt'
# labels = open(labels_file, 'r')

# #Create a dictionary to refer each label to their string name
# labels_to_name = {}
# for line in labels:
#     label, string_name = line.split(':')
#     string_name = string_name[:-1] #Remove newline
#     labels_to_name[int(label)] = string_name

# Create the file pattern of your TFRecord files so that it could be recognized later on
# file_pattern = 'drivers_%s_*.tfrecord'
file_pattern = 'drivers_{}_*.tfrecord'

#Create a dictionary that will help people understand your dataset better. This is required by the Dataset class later.
items_to_descriptions = {
    'image': 'A 3-channel RGB coloured driver image.',
    'label': 'A label from 0 to 9.'
}

In [6]:
# The model for training
model_train = nets_factory.get_network_fn(MODEL, num_classes, is_training=True)

# The model for evaluation
model_eval = nets_factory.get_network_fn(MODEL, num_classes, is_training=False)

In [7]:
# State the image size you're resizing your images to. 
image_size = model_train.default_image_size

In [10]:
#================= TRAINING INFORMATION ==================
# State the number of epochs to train
num_epochs = 20

# State your batch size
batch_size = 32

# Learning rate information and configuration (Up to you to experiment)
initial_learning_rate = 0.001
learning_rate_decay_factor = 0.7
num_epochs_before_decay = 3

In [None]:
with tf.Graph().as_default() as graph:
    tf.logging.set_verbosity(tf.logging.INFO)
    
    # creat dataset and load batches
    dataset = get_split('train', dataset_dir, file_pattern=file_pattern)
    images, _, labels = load_batch(dataset, 
                                   batch_size=batch_size, 
                                   MODEL=MODEL, 
                                   height=image_size, 
                                   width=image_size, 
                                   is_training=True)
    
    num_batches_per_epoch = int(dataset.num_samples / batch_size)
    num_steps_per_epoch = num_batches_per_epoch
    decay_steps = int(num_epochs_before_decay * num_steps_per_epoch)
    
    # creat the model
    logits, end_points = model_train(images)
    
    # define scopes to excluded
    exclude = ['resnet_v2_50/logits']
    variables_to_restore = slim.get_variables_to_restore(exclude=exclude)
    
    # one-hot-encodeing of the labels
    one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)
    
    # calculate loss
    loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits)
    total_loss = tf.losses.get_total_loss()
    
    # create global step for mornitoring
    global_step = get_or_create_global_step()
    
    # define decaying learning rate
    learning_rate = tf.train.exponential_decay(learning_rate=initial_learning_rate, 
                                               global_step=global_step, 
                                               decay_steps=decay_steps, 
                                               decay_rate=learning_rate_decay_factor, 
                                               staircase=True)
    
    # optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    
    # create training operator
    train_op = slim.learning.create_train_op(total_loss, optimizer)
    
    # the predictions
    probabilities = end_points['predictions']
    predictions = tf.argmax(end_points['predictions'], 1)
    accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
    metrics_op = tf.group(accuracy_update, probabilities)
    
    # summaries
    tf.summary.scalar('losses/Total_Loss', total_loss)
    tf.summary.scalar('accuracy', accuracy)
    tf.summary.scalar('learning_rate', learning_rate)
    my_summary_op = tf.summary.merge_all()
    
    # define training step function
    def train_step(sess, train_op, global_step):
        start_time = time.time()
        total_loss, global_step_count, _ = sess.run([train_op, global_step, metrics_op])
        time_elapsed = time.time() - start_time
        
        # logging.info('global step %s: loss: %.4f (%.2f sec/step)', global_step_count, total_loss, time_elapsed)
        
        return total_loss, global_step_count
    
    # create saver function to restore variables from a checkpoint file
    saver = tf.train.Saver(variables_to_restore)
    def restore_fn(sess):
        return saver.restore(sess, checkpoint_file)
    
    sv = tf.train.Supervisor(logdir=log_dir, summary_op=None, init_fn=restore_fn)
    
    with sv.managed_session() as sess:
        for step in range(num_steps_per_epoch * num_epochs):
            if step % num_batches_per_epoch == 0:
                logging.info('Epoch {}/{}'.format(step/num_batches_per_epoch + 1, num_epochs))
                learning_rate_value, accuracy_value = sess.run([learning_rate, accuracy])
                logging.info('Current Learning Rate: {}'.format(learning_rate_value))
                logging.info('Current Streaming Accuracy: {}'.format(accuracy_value))
                
            if step % 10 == 0:
                loss, step = train_step(sess, train_op, sv.global_step)
                summaries = sess.run(my_summary_op)
                sv.summary_computed(sess, summaries)
                logging.info('global step {}: loss: {}'.format(step, loss))
            else:
                loss, _ = train_step(sess, train_op, sv.global_step)
                
        logging.info('Final Loss: {}'.format(loss))
        logging.info('Final Accuracy: {}'.format(sess.run(accuracy)))
        
        logging.info('Training finished! Saving model to disk.')
        sv.saver.save(sess, sv.save_path, global_step=sv.global_step)

INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Restoring parameters from log/resnet_v2_50/model.ckpt-0
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Epoch 1.0/20
INFO:tensorflow:Current Learning Rate: 0.0010000000474974513
INFO:tensorflow:Current Streaming Accuracy: 0.0
INFO:tensorflow:global step 1: loss: 2.7010669708251953
INFO:tensorflow:global step 11: loss: 2.287412643432617
INFO:tensorflow:global step 21: loss: 2.1404876708984375
INFO:tensorflow:global step 31: loss: 2.220679759979248
INFO:tensorflow:global step 41: loss: 1.9459900856018066
INFO:tensorflow:global step 51: loss: 1.9816895723342896
INFO:tensorflow:global step 61: loss: 2.299049139022827
INFO:tensorflow:global step 71: loss: 1.8380577564239502
INFO:tensorflow:global step 81: loss: 2.0770466327667236
INFO:tensorflow:global step 91: loss: 1.9054882526397705
INFO:tensorflow:global step 101: loss: 1.7761282920837402
INFO:tensorflow:global step 111: loss: 1.7421108484268188
INFO:tensorflow:global_s

INFO:tensorflow:global step 1291: loss: 0.605961799621582
INFO:tensorflow:global step 1301: loss: 0.8691591024398804
INFO:tensorflow:global step 1311: loss: 1.0709218978881836
INFO:tensorflow:global step 1321: loss: 0.6336425542831421
INFO:tensorflow:global step 1331: loss: 1.1135046482086182
INFO:tensorflow:global step 1341: loss: 0.5950260162353516
INFO:tensorflow:global step 1351: loss: 0.43409600853919983
INFO:tensorflow:global step 1361: loss: 0.5135654211044312
INFO:tensorflow:global step 1371: loss: 0.8169287443161011
INFO:tensorflow:global step 1381: loss: 0.5764526724815369
INFO:tensorflow:global step 1391: loss: 0.4198648929595947
INFO:tensorflow:global step 1401: loss: 0.6768785715103149
INFO:tensorflow:global step 1411: loss: 0.7326477766036987
INFO:tensorflow:global step 1421: loss: 0.5228776335716248
INFO:tensorflow:global step 1431: loss: 0.7308359146118164
INFO:tensorflow:global step 1441: loss: 0.29514288902282715
INFO:tensorflow:global step 1451: loss: 0.7205291986465

INFO:tensorflow:global step 2601: loss: 0.5691405534744263
INFO:tensorflow:global step 2611: loss: 0.7814098596572876
INFO:tensorflow:global step 2621: loss: 0.3365347981452942
INFO:tensorflow:global step 2631: loss: 0.5081354975700378
INFO:tensorflow:global step 2641: loss: 0.29210275411605835
INFO:tensorflow:global step 2651: loss: 0.6340328454971313
INFO:tensorflow:global step 2661: loss: 0.2110297828912735
INFO:tensorflow:global step 2671: loss: 0.6439464688301086
INFO:tensorflow:global step 2681: loss: 0.6286782026290894
INFO:tensorflow:global step 2691: loss: 0.39846399426460266
INFO:tensorflow:global step 2701: loss: 0.43718764185905457
INFO:tensorflow:global step 2711: loss: 0.39723721146583557
INFO:tensorflow:global step 2721: loss: 0.5350221395492554
INFO:tensorflow:global step 2731: loss: 0.5095523595809937
INFO:tensorflow:global step 2741: loss: 0.5508580207824707
INFO:tensorflow:global step 2751: loss: 0.36904364824295044
INFO:tensorflow:global step 2761: loss: 0.586905300

INFO:tensorflow:Current Streaming Accuracy: 0.7735889554023743
INFO:tensorflow:global step 3921: loss: 0.31021684408187866
INFO:tensorflow:global step 3931: loss: 0.23663437366485596
INFO:tensorflow:global step 3941: loss: 0.2136058360338211
INFO:tensorflow:global step 3951: loss: 0.07016070187091827
INFO:tensorflow:global step 3961: loss: 0.17607435584068298
INFO:tensorflow:global step 3971: loss: 0.2485649585723877
INFO:tensorflow:global step 3981: loss: 0.24196681380271912
INFO:tensorflow:global step 3991: loss: 0.4374406337738037
INFO:tensorflow:global step 4001: loss: 0.2860034108161926
INFO:tensorflow:global step 4011: loss: 0.15632441639900208
INFO:tensorflow:global step 4021: loss: 0.29424941539764404
INFO:tensorflow:global step 4031: loss: 0.087167888879776
INFO:tensorflow:global step 4041: loss: 0.1455511748790741
INFO:tensorflow:global step 4051: loss: 0.10207526385784149
INFO:tensorflow:global step 4061: loss: 0.20898160338401794
INFO:tensorflow:global step 4071: loss: 0.49

INFO:tensorflow:global step 5241: loss: 0.28981131315231323
INFO:tensorflow:global step 5251: loss: 0.18980799615383148
INFO:tensorflow:global step 5261: loss: 0.3947662115097046
INFO:tensorflow:global step 5271: loss: 0.14007477462291718
INFO:tensorflow:global step 5281: loss: 0.2925427258014679
INFO:tensorflow:global step 5291: loss: 0.2715737223625183
INFO:tensorflow:global step 5301: loss: 0.12420842796564102
INFO:tensorflow:global step 5311: loss: 0.21002492308616638
INFO:tensorflow:global step 5321: loss: 0.17345836758613586
INFO:tensorflow:global step 5331: loss: 0.07284979522228241
INFO:tensorflow:global step 5341: loss: 0.3172074854373932
INFO:tensorflow:global step 5351: loss: 0.2049252986907959
INFO:tensorflow:global step 5361: loss: 0.0750163272023201
INFO:tensorflow:global step 5371: loss: 0.15402069687843323
INFO:tensorflow:global step 5381: loss: 0.2939661145210266
INFO:tensorflow:Epoch 12.0/20
INFO:tensorflow:Current Learning Rate: 0.0003429999924264848
INFO:tensorflow:

INFO:tensorflow:global step 6541: loss: 0.17182523012161255
INFO:tensorflow:global step 6551: loss: 0.18364068865776062
INFO:tensorflow:global step 6561: loss: 0.12671512365341187
INFO:tensorflow:global step 6571: loss: 0.1992679238319397
INFO:tensorflow:global step 6581: loss: 0.30502477288246155
INFO:tensorflow:global step 6591: loss: 0.09186349809169769
INFO:tensorflow:global step 6601: loss: 0.07803516089916229
INFO:tensorflow:global step 6611: loss: 0.20431888103485107
INFO:tensorflow:global step 6621: loss: 0.1755988895893097
INFO:tensorflow:global step 6631: loss: 0.07074548304080963
INFO:tensorflow:global step 6641: loss: 0.033739976584911346
INFO:tensorflow:global step 6651: loss: 0.2837105989456177
INFO:tensorflow:global step 6661: loss: 0.05832181125879288
INFO:tensorflow:global step 6671: loss: 0.25328466296195984
INFO:tensorflow:global step 6681: loss: 0.21483010053634644
INFO:tensorflow:global step 6691: loss: 0.24620047211647034
INFO:tensorflow:global step 6701: loss: 0.

INFO:tensorflow:global step 7841: loss: 0.021370816975831985
INFO:tensorflow:global step 7851: loss: 0.048058487474918365
INFO:tensorflow:global step 7861: loss: 0.33606523275375366
INFO:tensorflow:global step 7871: loss: 0.11072741448879242
INFO:tensorflow:global step 7881: loss: 0.22538797557353973
INFO:tensorflow:global step 7891: loss: 0.1278384029865265
INFO:tensorflow:global step 7901: loss: 0.3855028450489044
INFO:tensorflow:global step 7911: loss: 0.17396655678749084
INFO:tensorflow:global step 7921: loss: 0.18913233280181885
INFO:tensorflow:global step 7931: loss: 0.18583139777183533
INFO:tensorflow:global step 7941: loss: 0.014751686714589596
INFO:tensorflow:global step 7951: loss: 0.4238993227481842
INFO:tensorflow:global step 7961: loss: 0.2755163908004761
INFO:tensorflow:global step 7971: loss: 0.14180059731006622
INFO:tensorflow:global step 7981: loss: 0.04638020694255829
INFO:tensorflow:global step 7991: loss: 0.3159186840057373
INFO:tensorflow:global step 8001: loss: 0.

INFO:tensorflow:global step 9161: loss: 0.19427905976772308
INFO:tensorflow:global step 9171: loss: 0.046800147742033005
INFO:tensorflow:global step 9181: loss: 0.08285640925168991
INFO:tensorflow:global step 9191: loss: 0.011358639225363731
INFO:tensorflow:global step 9201: loss: 0.238752081990242
INFO:tensorflow:global step 9211: loss: 0.07011732459068298
INFO:tensorflow:global step 9221: loss: 0.18245309591293335
INFO:tensorflow:global step 9231: loss: 0.12021559476852417
INFO:tensorflow:global step 9241: loss: 0.1927252858877182
INFO:tensorflow:global step 9251: loss: 0.05305855721235275
INFO:tensorflow:global step 9261: loss: 0.02759583853185177
INFO:tensorflow:global step 9271: loss: 0.2624382972717285
INFO:tensorflow:global step 9281: loss: 0.10492975264787674
INFO:tensorflow:global step 9291: loss: 0.2531639635562897
INFO:tensorflow:global step 9301: loss: 0.04645455628633499
INFO:tensorflow:Epoch 20.0/20
INFO:tensorflow:Current Learning Rate: 0.00011764899682020769
INFO:tensor

In [7]:
#================= TRAINING INFORMATION ==================
# State the number of epochs to train
num_epochs = 10

# State your batch size
batch_size = 32

# Learning rate information and configuration (Up to you to experiment)
initial_learning_rate = 0.001
learning_rate_decay_factor = 0.8
num_epochs_before_decay = 2

In [8]:
with tf.Graph().as_default() as graph:
    tf.logging.set_verbosity(tf.logging.INFO)
    
    # creat dataset and load batches
    dataset = get_split('train', dataset_dir, file_pattern=file_pattern)
    images, _, labels = load_batch(dataset, 
                                   batch_size=batch_size, 
                                   MODEL=MODEL, 
                                   height=image_size, 
                                   width=image_size, 
                                   is_training=True)
    
    num_batches_per_epoch = int(dataset.num_samples / batch_size)
    num_steps_per_epoch = num_batches_per_epoch
    decay_steps = int(num_epochs_before_decay * num_steps_per_epoch)
    
    # creat the model
    logits, end_points = model_train(images)
    
    # define scopes to excluded
    exclude = ['resnet_v2_50/logits']
    variables_to_restore = slim.get_variables_to_restore(exclude=exclude)
    
    # one-hot-encodeing of the labels
    one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)
    
    # calculate loss
    loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits)
    total_loss = tf.losses.get_total_loss()
    
    # create global step for mornitoring
    global_step = get_or_create_global_step()
    
    # define decaying learning rate
    learning_rate = tf.train.exponential_decay(learning_rate=initial_learning_rate, 
                                               global_step=global_step, 
                                               decay_steps=decay_steps, 
                                               decay_rate=learning_rate_decay_factor, 
                                               staircase=True)
    
    # optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    
    # create training operator
    train_op = slim.learning.create_train_op(total_loss, optimizer)
    
    # the predictions
    probabilities = end_points['predictions']
    predictions = tf.argmax(end_points['predictions'], 1)
    accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
    metrics_op = tf.group(accuracy_update, probabilities)
    
    # summaries
    tf.summary.scalar('losses/Total_Loss', total_loss)
    tf.summary.scalar('accuracy', accuracy)
    tf.summary.scalar('learning_rate', learning_rate)
    my_summary_op = tf.summary.merge_all()
    
    # define training step function
    def train_step(sess, train_op, global_step):
        start_time = time.time()
        total_loss, global_step_count, _ = sess.run([train_op, global_step, metrics_op])
        time_elapsed = time.time() - start_time
        
        # logging.info('global step %s: loss: %.4f (%.2f sec/step)', global_step_count, total_loss, time_elapsed)
        
        return total_loss, global_step_count
    
    # create saver function to restore variables from a checkpoint file
    saver = tf.train.Saver(variables_to_restore)
    def restore_fn(sess):
        return saver.restore(sess, checkpoint_file)
    
    sv = tf.train.Supervisor(logdir=log_dir, summary_op=None, init_fn=restore_fn)
    
    with sv.managed_session() as sess:
        for step in range(num_steps_per_epoch * num_epochs):
            if step % num_batches_per_epoch == 0:
                logging.info('Epoch {}/{}'.format(step/num_batches_per_epoch + 1, num_epochs))
                learning_rate_value, accuracy_value = sess.run([learning_rate, accuracy])
                logging.info('Current Learning Rate: {}'.format(learning_rate_value))
                logging.info('Current Streaming Accuracy: {}'.format(accuracy_value))
                
            if step % 10 == 0:
                loss, step = train_step(sess, train_op, sv.global_step)
                summaries = sess.run(my_summary_op)
                sv.summary_computed(sess, summaries)
                logging.info('global step {}: loss: {}'.format(step, loss))
            else:
                loss, _ = train_step(sess, train_op, sv.global_step)
                
        logging.info('Final Loss: {}'.format(loss))
        logging.info('Final Accuracy: {}'.format(sess.run(accuracy)))
        
        logging.info('Training finished! Saving model to disk.')
        sv.saver.save(sess, sv.save_path, global_step=sv.global_step)

INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Restoring parameters from log/resnet_v2_50/model.ckpt-9800
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Epoch 1.0/10
INFO:tensorflow:Current Learning Rate: 0.00010737420234363526
INFO:tensorflow:Current Streaming Accuracy: 0.0
INFO:tensorflow:global step 9801: loss: 0.11239162087440491
INFO:tensorflow:global step 9811: loss: 0.2036435902118683
INFO:tensorflow:global step 9821: loss: 0.1451779156923294
INFO:tensorflow:global step 9831: loss: 0.10372335463762283
INFO:tensorflow:global step 9841: loss: 0.17279523611068726
INFO:tensorflow:global step 9851: loss: 0.09582329541444778
INFO:tensorflow:global step 9861: loss: 0.12841318547725677
INFO:tensorflow:global step 9871: loss: 0.006579454988241196
INFO:tensorflow:global step 9881: loss: 0.16483469307422638
INFO:tensorflow:global step 9891: loss: 0.2140132486820221
INFO:tensorflow:global step 9901: loss: 0.16665053367614746
INFO:tensorflow:global step 9911: loss: 0.18

INFO:tensorflow:global step 10991: loss: 0.013937974348664284
INFO:tensorflow:global step 11001: loss: 0.0773518830537796
INFO:tensorflow:global step 11011: loss: 0.1207181066274643
INFO:tensorflow:global step 11021: loss: 0.13215963542461395
INFO:tensorflow:global step 11031: loss: 0.044077254831790924
INFO:tensorflow:global step 11041: loss: 0.1611233502626419
INFO:tensorflow:global step 11051: loss: 0.11028660833835602
INFO:tensorflow:global step 11061: loss: 0.042675334960222244
INFO:tensorflow:global step 11071: loss: 0.1659296602010727
INFO:tensorflow:global step 11081: loss: 0.08837480843067169
INFO:tensorflow:global step 11091: loss: 0.22959238290786743
INFO:tensorflow:global step 11101: loss: 0.04864426702260971
INFO:tensorflow:global step 11111: loss: 0.09180988371372223
INFO:tensorflow:global step 11121: loss: 0.23906193673610687
INFO:tensorflow:global step 11131: loss: 0.032339297235012054
INFO:tensorflow:global step 11141: loss: 0.05703498423099518
INFO:tensorflow:global s

INFO:tensorflow:global step 12261: loss: 0.0293864868581295
INFO:tensorflow:global step 12271: loss: 0.08436721563339233
INFO:tensorflow:global step 12281: loss: 0.03391912207007408
INFO:tensorflow:global step 12291: loss: 0.046401891857385635
INFO:tensorflow:global step 12301: loss: 0.22163325548171997
INFO:tensorflow:global step 12311: loss: 0.1736372709274292
INFO:tensorflow:global step 12321: loss: 0.046420078724622726
INFO:tensorflow:global step 12331: loss: 0.037171635776758194
INFO:tensorflow:global step 12341: loss: 0.07835444808006287
INFO:tensorflow:global step 12351: loss: 0.16407790780067444
INFO:tensorflow:global step 12361: loss: 0.028274673968553543
INFO:tensorflow:global step 12371: loss: 0.0446656234562397
INFO:tensorflow:global step 12381: loss: 0.23483949899673462
INFO:tensorflow:global step 12391: loss: 0.14981389045715332
INFO:tensorflow:global step 12401: loss: 0.07477543503046036
INFO:tensorflow:global step 12411: loss: 0.1405162513256073
INFO:tensorflow:global s

INFO:tensorflow:global step 13561: loss: 0.14851707220077515
INFO:tensorflow:global step 13571: loss: 0.0381922721862793
INFO:tensorflow:global step 13581: loss: 0.18522082269191742
INFO:tensorflow:global step 13591: loss: 0.21377897262573242
INFO:tensorflow:global step 13601: loss: 0.038877420127391815
INFO:tensorflow:global step 13611: loss: 0.01790587604045868
INFO:tensorflow:global step 13621: loss: 0.0764993280172348
INFO:tensorflow:global step 13631: loss: 0.13400134444236755
INFO:tensorflow:global step 13641: loss: 0.011753655038774014
INFO:tensorflow:global step 13651: loss: 0.10055723041296005
INFO:tensorflow:global step 13661: loss: 0.10250502079725266
INFO:tensorflow:global step 13671: loss: 0.04580989480018616
INFO:tensorflow:global step 13681: loss: 0.18813548982143402
INFO:tensorflow:global step 13691: loss: 0.0607784166932106
INFO:tensorflow:global step 13701: loss: 0.17721781134605408
INFO:tensorflow:global step 13711: loss: 0.15731123089790344
INFO:tensorflow:Epoch 9.0

In [11]:
#Create a new evaluation log directory to visualize the validation process
log_eval = 'log_val'

#State the batch_size to evaluate each time, which can be a lot more than the training batch
batch_size = 50

#State the number of epochs to evaluate
num_epochs = 3

#Get the latest checkpoint file
checkpoint_file = tf.train.latest_checkpoint(log_dir)

In [13]:
with tf.Graph().as_default() as graph:
    tf.logging.set_verbosity(tf.logging.INFO)
    
    # creat dataset and load batches
    dataset = get_split('validation', dataset_dir, file_pattern=file_pattern)
    images, _, labels = load_batch(dataset, 
                                   batch_size=batch_size, 
                                   MODEL=MODEL, 
                                   height=image_size, 
                                   width=image_size, 
                                   is_training=False)
    
    num_batches_per_epoch = int(dataset.num_samples / batch_size)
    num_steps_per_epoch = num_batches_per_epoch
    decay_steps = int(num_epochs_before_decay * num_steps_per_epoch)
    
    # creat the model
    logits, end_points = model_eval(images)
    
    # define scopes 
    variables_to_restore = slim.get_variables_to_restore()
    # create saver function to restore variables from a checkpoint file
    saver = tf.train.Saver(variables_to_restore)
    def restore_fn(sess):
        return saver.restore(sess, checkpoint_file)
    
    # one-hot-encodeing of the labels
    one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)
    
    # calculate loss
    loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits)
    total_loss = tf.losses.get_total_loss()
    
    # create global step for mornitoring
    global_step = get_or_create_global_step()
    global_step_op = tf.assign(global_step, global_step+1)
    
    
    # the predictions
    probabilities = end_points['predictions']
    predictions = tf.argmax(end_points['predictions'], 1)
    accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
    metrics_op = tf.group(accuracy_update, probabilities)
    
    # summaries
    tf.summary.scalar('losses/Total_Loss', total_loss)
    tf.summary.scalar('Validation_accuracy', accuracy)
    my_summary_op = tf.summary.merge_all()
    
    # define training step function
    def eval_step(sess, total_loss, metrics_op, global_step):
        start_time = time.time()
        total_loss_value, global_step_count, _, accuracy_value = sess.run([total_loss, global_step_op, metrics_op, accuracy])
        time_elapsed = time.time() - start_time
        
        # logging.info('global step %s: loss: %.4f (%.2f sec/step)', global_step_count, total_loss, time_elapsed)
        
        return total_loss_value, global_step_count, accuracy_value
    
    
    
    sv = tf.train.Supervisor(logdir=log_eval, summary_op=None, saver=None, init_fn=restore_fn)
    
    with sv.managed_session() as sess:
        for step in range(num_steps_per_epoch * num_epochs):
            sess.run(sv.global_step)
            
            if step % 10 == 0:
                loss, step, accuracy_value = eval_step(sess, total_loss, metrics_op, sv.global_step)
                summaries = sess.run(my_summary_op)
                sv.summary_computed(sess, summaries)
                logging.info('global step {}: loss: {}, accuracy: {}'.format(step, loss, accuracy_value))
            else:
                loss, _, accuracy_value = eval_step(sess, total_loss, metrics_op, sv.global_step)
            
            if step % num_batches_per_epoch == 0:
                logging.info('Epoch {}/{}'.format(step/num_batches_per_epoch + 1, num_epochs))
                logging.info('Current Loss: {}'.format(loss))
                logging.info('Current Streaming Accuracy: {}'.format(sess.run(accuracy)))
                
        logging.info('Final Loss: {}'.format(loss))
        logging.info('Final Accuracy: {}'.format(sess.run(accuracy)))

INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Restoring parameters from log/resnet_v2_50/model.ckpt-14700
INFO:tensorflow:global step 1: loss: 0.0025438577868044376, accuracy: 0.0
INFO:tensorflow:global step 11: loss: 0.0010007957462221384, accuracy: 0.9940000176429749
INFO:tensorflow:global step 21: loss: 0.0007120220107026398, accuracy: 0.996999979019165
INFO:tensorflow:global step 31: loss: 0.0015493235550820827, accuracy: 0.9973333477973938
INFO:tensorflow:global step 41: loss: 0.006649021524935961, accuracy: 0.9965000152587891
INFO:tensorflow:global step 51: loss: 0.0017664923798292875, accuracy: 0.9972000122070312
INFO:tensorflow:global step 61: loss: 0.03675095736980438, accuracy: 0.9973333477973938
INFO:tensorflow:global step 71: loss: 0.0004656332603190094, accuracy: 0.9974285960197449
INFO:tensorflow:global step 81: loss: 0.001735733007080853, accuracy: 0.996999979019165
INFO:tensorflow:global step 91: loss: 0.020356064662337303, accuracy: 0.995999991893768