In [None]:
import pandas as pd
import numpy as np 

import tensorflow as tf

from sklearn.model_selection import train_test_split  #for doing train/validation split of training data

from mnist import MNIST  #for reading mnist full test data to test the model

In [None]:
train_df = pd.read_csv("train.csv")

In [None]:
train_df.head()

In [None]:
labels = train_df["label"]

In [None]:
train_df = train_df.drop("label", 1)

In [None]:
train_df.head()

In [None]:
train_data = train_df.as_matrix()
labels = labels.as_matrix()

In [None]:
print(train_data.shape)
print(labels.shape)

In [None]:
# resizing images from 784 X 1 to 28 X 28 X 1

resized_train_data = np.zeros((len(train_data), 28, 28, 1))
for i in range(len(train_data)):
    resized_train_data[i] = train_data[i].reshape((28 , 28 , 1))
resized_train_data = resized_train_data / 255.0    # normalizing input data between 0-1

In [None]:
# one-hot encoding the labels

one_hot_labels = np.zeros((len(train_data), 10))
for i in range(len(train_data)):
    one_hot_labels[i][labels[i]] = 1

In [None]:
# performing train/validation split

X_train, X_val, y_train, y_val = train_test_split(resized_train_data, one_hot_labels, test_size = 0.1, random_state = 42)

In [None]:
# building basic CNN model using tf.contrib.layers modules

def build_model():
        
    features = tf.placeholder(tf.float32, shape = (None, 28, 28, 1), name = "features")
    labels = tf.placeholder(tf.float32, shape = (None, 10), name = "labels")
    
    conv1 = tf.contrib.layers.conv2d(
    inputs = features,
    num_outputs = 32,
    kernel_size = [3 , 3],
    stride = 1,
    padding = 'SAME')
    
    pool1 = tf.contrib.layers.dropout(conv1, keep_prob = 0.5)
    
    pool1 = tf.contrib.layers.max_pool2d(
    inputs = pool1,
    kernel_size = [2 , 2],
    stride = 2,
    padding = 'VALID')
    
    conv2 = tf.contrib.layers.conv2d(
    inputs = pool1,
    num_outputs = 64,
    kernel_size = [3 , 3],
    stride = 1,
    padding = 'SAME')
    
    pool2 = tf.contrib.layers.dropout(conv2, keep_prob = 0.5)
    
    pool2 = tf.contrib.layers.max_pool2d(
    inputs = pool2,
    kernel_size = [2 , 2],
    stride = 2,
    padding = 'VALID')
    
    conv3 = tf.contrib.layers.conv2d(
    inputs = pool2,
    num_outputs = 64,
    kernel_size = [3 , 3],
    stride = 1,
    padding = 'SAME')
    
    pool3 = tf.contrib.layers.dropout(conv3, keep_prob = 0.5)
    
    flatten_layer = tf.contrib.layers.flatten(pool3)
    
    fc1 = tf.contrib.layers.fully_connected(flatten_layer, 64)
    
    fc2 = tf.contrib.layers.fully_connected(fc1, 10, activation_fn = None)
    
    pred = tf.nn.softmax(fc2)
    
    loss = tf.losses.softmax_cross_entropy(labels, fc2)
    
    accuracy = 100 * tf.reduce_mean(tf.to_float(tf.equal(tf.argmax(pred, 1), tf.argmax(labels, 1))))
    
    return features, labels, pred, loss, accuracy

In [None]:
num_epochs = 100
batch_size = 450

In [None]:
# training model and performing validation every 10 epochs 

features, labels, pred, loss, accuracy = build_model()

optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)

sess = tf.Session()

sess.run(tf.global_variables_initializer())

for i in range(num_epochs):

    for j in range(int(len(X_train)/batch_size)):

        feed_dict = {features: X_train[j * batch_size : (j+1) * batch_size],
                     labels: y_train[j * batch_size : (j+1) * batch_size]}

        loss_value, acc, _ = sess.run([loss, accuracy, optimizer], feed_dict)

    print("Epoch: {}, Loss: {:.4f}".format(i + 1, loss_value), ", Accuracy = {:.4f}".format(acc))
        
    if not ((i+1) % 10):
        
        feed_dict = {features: X_val, labels: y_val}
        
        val_loss_value, val_acc = sess.run([loss, accuracy], feed_dict)
        
        print("Epoch: {}: Validation Loss = {:.4f}".format(i + 1, val_loss_value), ", Validation Accuracy = {:.4f}".format(val_acc))

In [None]:
# reading full mnist test data to perform testing

mndata = MNIST('samples')

t_images, t_labels = mndata.load_testing()

t_images = np.asarray(t_images)

In [None]:
mnist_test = np.zeros((len(t_images), 28, 28, 1))
for i in range(len(t_images)):
    mnist_test[i] = t_images[i].reshape((28 , 28 , 1))
mnist_test = mnist_test / 255.0    
mnist_test.shape

In [None]:
oh_mnist = np.zeros((len(t_images), 10))
for i in range(len(t_images)):
    oh_mnist[i][t_labels[i]] = 1

In [None]:
accuracies = []
predictions = np.zeros((len(t_images), 10))

for i in range(100): # inference on 100 batch size

    feed_dict = {features: mnist_test[i * 100 : (i + 1) * 100], labels: oh_mnist[i * 100 : (i + 1) * 100]}

    acc, predicts = sess.run([accuracy, pred], feed_dict)
    
    accuracies.append(acc)
    
    predictions[i * 100 : (i + 1) * 100] = predicts

print("MNIST Test Accuracy = {:.4f}".format(np.mean(accuracies)))    

In [None]:
predictions = np.argmax(predictions, 1)
oh_mnist = np.argmax(oh_mnist, 1)

In [None]:
conf_mat = tf.confusion_matrix(oh_mnist, predictions) # creating confusion matrix on full mnist test data

In [None]:
sess.run(conf_mat)