In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import prettytensor as pt
from sklearn.metrics import confusion_matrix
import time 
from datetime import timedelta
import math
import os
%matplotlib inline

In [2]:
#import dataset
from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets('data/MNIST/', one_hot=True)

Extracting data/MNIST/train-images-idx3-ubyte.gz
Extracting data/MNIST/train-labels-idx1-ubyte.gz
Extracting data/MNIST/t10k-images-idx3-ubyte.gz
Extracting data/MNIST/t10k-labels-idx1-ubyte.gz


In [3]:
#make an array containing the true output class
data.test.cls=np.array([i.argmax() for i in data.test.labels])
data.validation.cls = np.argmax(data.validation.labels, axis=1)
data.test.cls[0:10]

array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9])

In [4]:
#initializing variables
img_size=28
img_size_flat=28*28
img_shape=(28,28)
num_classes=10
num_channels=1

In [5]:
#input variables
x=tf.placeholder(tf.float32,shape=[None,img_size_flat],name='x')
x_image=tf.reshape(x,shape=[-1,img_size,img_size,num_channels])
y_true=tf.placeholder(tf.float32,shape=[None,num_classes])
y_true_cls=tf.arg_max(y_true,dimension=1)

In [6]:
x_pretty = pt.wrap(x_image)

In [7]:
#create the CNN model using prettytensor
with pt.defaults_scope(activation_fn=tf.nn.relu):
    y_pred, loss = x_pretty.conv2d(kernel=5, depth=16, name='layer_conv1').max_pool(kernel=2, stride=1).\
        conv2d(kernel=5, depth=36, name='layer_conv2').max_pool(kernel=2, stride=1).flatten().\
        fully_connected(size=128, name='layer_fc1').softmax_classifier(num_classes=num_classes, labels=y_true)

In [8]:
#defining prediction class and type of optimizer used
y_pred_cls=tf.arg_max(y_pred,dimension=1)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [9]:
#saving our best fit model in checkpoint directory
batch_size=64
total_iterations=0
best_validation_accuracy = 0.0
last_improvement = 0
require_improvement = 1000
saver = tf.train.Saver()
save_dir = 'checkpoints/'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
save_path = os.path.join(save_dir, 'best_validation')

In [10]:
#calculating accuracy of prediction in the validation class
batch_size = 256
def predict_cls(images, labels, cls_true):
    num_images = len(images)
    cls_pred = np.zeros(shape=num_images, dtype=np.int)
    i = 0
    while i < num_images:
        j = min(i + batch_size, num_images)
        feed_dict = {x: images[i:j, :],y_true: labels[i:j, :]}
        cls_pred[i:j] = session.run(y_pred_cls, feed_dict=feed_dict)
        i = j
    correct = (cls_true == cls_pred)
    return correct, cls_pred
def cls_accuracy(correct):
    correct_sum = correct.sum()
    acc = float(correct_sum) / len(correct)
    return acc, correct_sum
def validation_accuracy():
    correct, _ = predict_cls(images = data.validation.images,labels = data.validation.labels,cls_true = data.validation.cls)
    return cls_accuracy(correct)

In [11]:
#function to train our CNN
def optimize(num_iterations):
    global total_iterations
    global best_validation_accuracy
    global last_improvement
    start_time=time.time()
    for i in range (total_iterations,total_iterations+num_iterations):
        total_iterations+=1
        x_batch,y_true_batch=data.train.next_batch(batch_size)
        feed_dict_train={x:x_batch,y_true:y_true_batch}
        session.run(optimizer,feed_dict=feed_dict_train)
        if total_iterations%100==0 or i==num_iterations-1:
            acc_train=session.run(accuracy,feed_dict=feed_dict_train)
            acc_validation,_=validation_accuracy()
            if acc_validation>best_validation_accuracy:
                best_validation_accuracy=acc_validation
                last_improvement=total_iterations
                saver.save(sess=session,save_path=save_path)
                improved_str='*'
            else:
                improved_str=''
            msg = "Iter: {0:>6}, Train-Batch Accuracy: {1:>6.1%}, Validation Acc: {2:>6.1%} {3}"
            print(msg.format(i + 1, acc_train, acc_validation, improved_str))
        if total_iterations-last_improvement>require_improvement:
            print("model cannot improove any futhher")
            break
    end_time = time.time()
    time_dif = end_time - start_time
    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))

In [12]:
test_batch_size = 256
def print_test_accuracy(show_confusion_matrix=False):
    num_test = len(data.test.images)
    cls_pred = np.zeros(shape=num_test, dtype=np.int)
    i = 0
    while i < num_test:
        j = min(i + test_batch_size, num_test)
        images = data.test.images[i:j, :]
        labels = data.test.labels[i:j, :]
        feed_dict = {x: images,y_true: labels}
        cls_pred[i:j] = session.run(y_pred_cls, feed_dict=feed_dict)
        i = j
    cls_true = data.test.cls
    correct = (cls_true == cls_pred)
    correct_sum = correct.sum()
    acc = float(correct_sum) / num_test
    msg = "Accuracy on Test-Set: {0:.1%} ({1} / {2})"
    print(msg.format(acc, correct_sum, num_test))

In [13]:
session=tf.Session()
session.run(tf.global_variables_initializer())

In [14]:
#accuracy before training
print_test_accuracy()

Accuracy on Test-Set: 10.2% (1023 / 10000)


In [15]:
#train our CNN
optimize(10000)

Iter:    100, Train-Batch Accuracy:  94.5%, Validation Acc:  94.7% *
Iter:    200, Train-Batch Accuracy:  96.1%, Validation Acc:  96.7% *
Iter:    300, Train-Batch Accuracy:  98.4%, Validation Acc:  97.4% *
Iter:    400, Train-Batch Accuracy:  99.2%, Validation Acc:  97.8% *
Iter:    500, Train-Batch Accuracy:  98.4%, Validation Acc:  98.3% *
Iter:    600, Train-Batch Accuracy:  98.0%, Validation Acc:  98.4% *
Iter:    700, Train-Batch Accuracy:  99.6%, Validation Acc:  98.5% *
Iter:    800, Train-Batch Accuracy:  98.4%, Validation Acc:  98.3% 
Iter:    900, Train-Batch Accuracy: 100.0%, Validation Acc:  98.8% *
Iter:   1000, Train-Batch Accuracy:  99.6%, Validation Acc:  98.5% 
Iter:   1100, Train-Batch Accuracy:  99.2%, Validation Acc:  98.8% 
Iter:   1200, Train-Batch Accuracy:  98.8%, Validation Acc:  98.8% *
Iter:   1300, Train-Batch Accuracy:  99.2%, Validation Acc:  98.7% 
Iter:   1400, Train-Batch Accuracy: 100.0%, Validation Acc:  98.8% *
Iter:   1500, Train-Batch Accuracy:  9

In [16]:
session.run(tf.global_variables_initializer())

In [17]:
#using our best fit model saved in checkpoint directory
saver.restore(sess=session, save_path=save_path)

INFO:tensorflow:Restoring parameters from checkpoints/best_validation


In [18]:
print_test_accuracy()

Accuracy on Test-Set: 98.8% (9883 / 10000)
