## Digit recognizer - CNN model trained on MNIST database using TF

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import time

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Load, transform data and split it into train-val sets

No test set?! How could it be?! Kaggle provides test set and the way to check the score. 

In [2]:
df_train = pd.read_csv('data/train.csv')

train_labels = df_train[df_train.columns[0]]
y_train = train_labels.values

df_train_values = df_train.drop(['label'], axis=1)
X_train = df_train_values.values
X_train = X_train.astype('float32')

y_train = np_utils.to_categorical(y_train, 10)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.1, random_state = 2)

## Putting data into pipeline

In [3]:
tf.reset_default_graph()

train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))

def reshape(X, y):
    X_reshaped = tf.reshape(X, [28, 28, 1])
    return (X_reshaped, y)

train_ds_reshaped = train_ds.map(reshape)

## Transformations

def zoom_out(X):
    rand_tsr = tf.random_uniform(shape = (), minval=24, maxval=28, dtype = tf.int32)
    smaller_size_tsr = tf.stack([rand_tsr, rand_tsr])
    X_zoomed = tf.image.resize_images(X, smaller_size_tsr)
    smaller_padding = tf.cast(tf.divide(tf.subtract(28, rand_tsr), 2), dtype=tf.int32)
    bigger_padding = tf.cast(tf.divide(tf.subtract(29, rand_tsr), 2), dtype=tf.int32)
    padding = tf.stack([[smaller_padding, bigger_padding], [smaller_padding, bigger_padding], [0, 0]])
    X_padded = tf.pad(X_zoomed, padding)
    return X_padded

def zoom_in(X):
    rand_tsr = tf.random_uniform(shape = (), minval=29, maxval=33, dtype = tf.int32)
    bigger_size_tsr = tf.stack([rand_tsr, rand_tsr])
    X_zoomed = tf.image.resize_images(X, bigger_size_tsr) 
    offset = tf.cast(tf.divide(tf.subtract(rand_tsr, 28), 2), dtype=tf.int32)  
    X_cropped = tf.image.crop_to_bounding_box(X_zoomed, offset, offset, 28, 28)   
    return X_cropped

def rotate(X):
    rand_tsr = tf.random_uniform(shape = (), minval=-0.5, maxval=0.5)
    X_rotated = tf.contrib.image.rotate(X, rand_tsr)   
    return X_rotated

def random_transform(X, y):
    rand_tsr = tf.random_uniform(shape = (), minval=0, maxval=2, dtype = tf.int32)
    X_transformed = tf.cond(tf.equal(rand_tsr, tf.constant(0)), lambda: zoom_out(rotate(X)), lambda: zoom_in(rotate(X)))
    return (X_transformed, y)

train_ds_transformed = train_ds_reshaped.map(random_transform)
train_ds_shuffled = train_ds_transformed.shuffle(37800)
train_ds_batched = train_ds_shuffled.batch(100)
train_iterator = train_ds_batched.make_one_shot_iterator()
initialise_train = train_iterator.make_initializer(train_ds_batched)
train_element = train_iterator.get_next()

val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_ds_reshaped = val_ds.map(reshape)

val_ds_one_batch = val_ds_reshaped.batch(4200)
val_iterator = val_ds_one_batch.make_one_shot_iterator()
initialise_val = val_iterator.make_initializer(val_ds_one_batch)
val_element = val_iterator.get_next()

## Complete the computational graph

In [4]:
x = tf.placeholder(tf.float32, [None, 28, 28, 1], name='InputData')
y = tf.placeholder(tf.float32, [None, 10], name='LabelData')

prob_conv = tf.placeholder_with_default(1.0, shape=())
prob_dense = tf.placeholder_with_default(1.0, shape=())

# Spaghetti code. Prone to errors. Needs refactorisation!
with tf.name_scope('Model'):
    with tf.name_scope('Input_L'):
        input_tsr = tf.reshape(x, [-1, 28, 28, 1], name='InputReshaped')
    with tf.name_scope('Conv1_L'):
        W_conv1_1 = tf.get_variable("W_conv1_1", [5, 5, 1, 32])
        b_conv1_1 = tf.get_variable("b_conv1_1", [32])
        W_conv1_1_his = tf.summary.histogram("W_conv1_1", W_conv1_1)
        b_conv1_1_his = tf.summary.histogram("b_conv1_1", b_conv1_1)
        conv1_1 = tf.add(tf.nn.conv2d(input_tsr, W_conv1_1, strides=[1, 1, 1, 1], padding='SAME'), b_conv1_1)
        h_conv1_1 = tf.nn.relu(conv1_1)
        W_conv1_2 = tf.get_variable("W_conv1_2", [4, 4, 32, 32])
        b_conv1_2 = tf.get_variable("b_conv1_2", [32])
        W_conv1_2_his = tf.summary.histogram("W_conv1_2", W_conv1_2)
        b_conv1_2_his = tf.summary.histogram("b_conv1_2", b_conv1_2)
        conv1_2 = tf.add(tf.nn.conv2d(h_conv1_1, W_conv1_2, strides=[1, 1, 1, 1], padding='SAME'), b_conv1_2)
        h_conv1_2 = tf.nn.relu(conv1_2)
        h_pool1 = tf.nn.max_pool(h_conv1_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        dropout1 = tf.nn.dropout(h_pool1, prob_conv)
    with tf.name_scope('Conv2_L'):
        W_conv2_1 = tf.get_variable("W_conv2_1", [3, 3, 32, 64])
        b_conv2_1 = tf.get_variable("b_conv2_1", [64])
        W_conv2_1_his = tf.summary.histogram("W_conv2_1", W_conv2_1)
        b_conv2_1_his = tf.summary.histogram("b_conv2_1", b_conv2_1)
        conv2_1 = tf.add(tf.nn.conv2d(dropout1, W_conv2_1, strides=[1, 1, 1, 1], padding='SAME'), b_conv2_1)
        h_conv2_1 = tf.nn.relu(conv2_1)
        W_conv2_2 = tf.get_variable("W_conv2_2", [2, 2, 64, 64])
        b_conv2_2 = tf.get_variable("b_conv2_2", [64])
        W_conv2_2_his = tf.summary.histogram("W_conv2_2", W_conv2_2)
        b_conv2_2_his = tf.summary.histogram("b_conv2_2", b_conv2_2)
        conv2_2 = tf.add(tf.nn.conv2d(h_conv2_1, W_conv2_2, strides=[1, 1, 1, 1], padding='SAME'), b_conv2_2)
        h_conv2_2 = tf.nn.relu(conv2_2)
        h_pool2 = tf.nn.max_pool(h_conv2_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        dropout2 = tf.nn.dropout(h_pool2, prob_conv)
    with tf.name_scope('Dense_L'):
        W_dense = tf.get_variable("W_dense", [7 * 7 * 64, 256])
        b_dense = tf.get_variable("b_dense", [256])
        W_dense_his = tf.summary.histogram("W_dense", W_dense)
        b_dense_his = tf.summary.histogram("b_dense", b_dense)
        flat_tsr = tf.reshape(dropout2, [-1, 7 * 7 * 64])
        dense = tf.add(tf.matmul(flat_tsr, W_dense), b_dense)
        h_dense =  tf.nn.relu(dense)
        dropout3 = tf.nn.dropout(h_dense, prob_dense)
    with tf.name_scope('Output_L'):
        W_out = tf.get_variable("W_out", [256, 10])
        b_out = tf.get_variable("b_out", [10])
        W_out_his = tf.summary.histogram("W_out", W_out)
        b_out_his = tf.summary.histogram("b_out", b_out)
        pred = tf.add(tf.matmul(dropout3, W_out), b_out)
        
with tf.name_scope('Loss'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))
    
with tf.name_scope('Optimizer'):
    optimizer = tf.train.AdamOptimizer()
    train_step = optimizer.minimize(loss)
    
with tf.name_scope('Accuracy'):
    acc = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    acc = tf.reduce_mean(tf.cast(acc, tf.float32))
    
init = tf.global_variables_initializer()

loss_last_batch = tf.summary.scalar("Loss_on_last_batch", loss)
acc_last_batch = tf.summary.scalar("Accuracy_on_last_batch", acc)
merged_summaries_from_last_batch = tf.summary.merge([loss_last_batch, acc_last_batch])

validation_set_acc = tf.summary.scalar("Validation_set_accuracy", acc)
merged_validation = tf.summary.merge([validation_set_acc,  
                                      W_conv1_1_his, b_conv1_1_his, W_conv1_2_his, b_conv1_2_his,
                                      W_conv2_1_his, b_conv2_1_his, W_conv2_2_his, b_conv2_2_his,
                                      W_dense_his, b_dense_his,
                                      W_out_his, b_out_his])

logs_path = 'logs'

summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

saver = tf.train.Saver()

## Parameters

In [5]:
training_epochs = 50
batch_size = 100
total_batch = 378
save_path = "saved_models/saved_model.ckpt"

## Run session and save the model

In [None]:
with tf.Session() as sess:

    sess.run(init)

    for epoch in range(training_epochs):
        time_start = time.time()
        avg_cost = 0.
        avg_acc = 0.
        sess.run(initialise_train)
        for i in range(total_batch):
            batch_xs, batch_ys = sess.run(train_element)
            _, c, ac = sess.run([train_step, loss, acc], feed_dict={x: batch_xs, y: batch_ys,
                                                                    prob_conv: 0.75, prob_dense: 0.5})
            avg_cost += c / total_batch
            avg_acc += ac / total_batch
            
        summary_last_batch = sess.run(merged_summaries_from_last_batch, feed_dict={x: batch_xs, y: batch_ys, 
                                                                                   prob_conv: 0.75, prob_dense: 0.5})
        summary_writer.add_summary(summary_last_batch, epoch + 1)
        
        sess.run(initialise_val)
        X_val_pl, y_val_pl = sess.run(val_element)
        summary_validation = sess.run(merged_validation, feed_dict={x: X_val_pl, y: y_val_pl,
                                                                    prob_conv: 1., prob_dense: 1.})
        summary_writer.add_summary(summary_validation, epoch + 1)
        time_elapsed = time.time() - time_start
        print("Epoch: {:04} done in {:.2f} sec, avg_cost = {:.9f}, avg_acc = {:.4f}".format(epoch + 1, time_elapsed, avg_cost, avg_acc ))

    print("Optimization Finished!")
    saver.save(sess, save_path)
    print("Model saved")

Epoch: 0001 done in 124.63 sec, avg_cost = 0.994784897, avg_acc = 0.7322
Epoch: 0002 done in 109.07 sec, avg_cost = 0.244456088, avg_acc = 0.9259
Epoch: 0003 done in 107.40 sec, avg_cost = 0.187155464, avg_acc = 0.9436
Epoch: 0004 done in 108.16 sec, avg_cost = 0.155999397, avg_acc = 0.9534
Epoch: 0005 done in 116.12 sec, avg_cost = 0.138635470, avg_acc = 0.9603
Epoch: 0006 done in 110.85 sec, avg_cost = 0.131608163, avg_acc = 0.9605
Epoch: 0007 done in 111.54 sec, avg_cost = 0.119466926, avg_acc = 0.9641
Epoch: 0008 done in 108.88 sec, avg_cost = 0.119532470, avg_acc = 0.9638
Epoch: 0009 done in 109.47 sec, avg_cost = 0.113237058, avg_acc = 0.9661
Epoch: 0010 done in 110.56 sec, avg_cost = 0.106664859, avg_acc = 0.9688
Epoch: 0011 done in 110.45 sec, avg_cost = 0.103215413, avg_acc = 0.9701
Epoch: 0012 done in 110.97 sec, avg_cost = 0.094025335, avg_acc = 0.9722
Epoch: 0013 done in 109.92 sec, avg_cost = 0.100626250, avg_acc = 0.9698
Epoch: 0014 done in 108.64 sec, avg_cost = 0.091275

## Restore the model and continue training

In [7]:
continue_training_epochs = 50
batch_size = 100
total_batch = 378
save_path = "saved_models/restored_model.ckpt"

In [None]:
with tf.Session() as sess:

    saver.restore(sess, "saved_models/saved_model.ckpt")

    for epoch in range(continue_training_epochs):
        time_start = time.time()
        avg_cost = 0.
        avg_acc = 0.
        sess.run(initialise_train)
        for i in range(total_batch):
            batch_xs, batch_ys = sess.run(train_element)
            _, c, ac = sess.run([train_step, loss, acc], feed_dict={x: batch_xs, y: batch_ys,
                                                                    prob_conv: 0.75, prob_dense: 0.5})
            avg_cost += c / total_batch
            avg_acc += ac / total_batch
            
        summary_last_batch = sess.run(merged_summaries_from_last_batch, feed_dict={x: batch_xs, y: batch_ys, 
                                                                                   prob_conv: 0.75, prob_dense: 0.5})
        summary_writer.add_summary(summary_last_batch, epoch + training_epochs + 1)
        
        sess.run(initialise_val)
        X_val_pl, y_val_pl = sess.run(val_element)
        summary_validation = sess.run(merged_validation, feed_dict={x: X_val_pl, y: y_val_pl,
                                                                    prob_conv: 1., prob_dense: 1.})
        summary_writer.add_summary(summary_validation, epoch + training_epochs + 1)
        time_elapsed = time.time() - time_start
        print("Epoch: {:04} done in {:.2f} sec, avg_cost = {:.9f}, avg_acc = {:.4f}".format(epoch + training_epochs + 1, time_elapsed, avg_cost, avg_acc ))

    print("Optimization Finished!")
    saver.save(sess, save_path)
    print("Model saved")

INFO:tensorflow:Restoring parameters from saved_models/saved_model.ckpt
Epoch: 0051 done in 129.89 sec, avg_cost = 0.064966585, avg_acc = 0.9808
Epoch: 0052 done in 109.32 sec, avg_cost = 0.067310688, avg_acc = 0.9812
