# Imports

In [101]:
from numpy.random import seed
seed(888)
import tensorflow as tf
tf.random.set_seed(404)

In [102]:
import os
import numpy as np

# Constants

In [126]:
X_TRAIN_PATH = 'Data/digit_xtrain.csv'
X_TEST_PATH = 'Data/digit_xtest.csv'
Y_TRAIN_PATH = 'Data/digit_ytrain.csv'
Y_TEST_PATH = 'Data/digit_ytest.csv'

NR_CLASSES = 10
VALIDATION_SIZE = 10000

IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1
TOTAL_INPUTS = IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS

# Get the Data

In [104]:
%%time

y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter = ',', dtype = int)

CPU times: total: 109 ms
Wall time: 110 ms


In [105]:
y_train_all.shape

(60000,)

In [106]:
y_test = np.loadtxt(Y_TEST_PATH, delimiter= ',', dtype = int)
y_test.shape

(10000,)

In [107]:
%%time

x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter = ',', dtype = int)

CPU times: total: 20.4 s
Wall time: 20.6 s


In [108]:
%%time

x_test = np.loadtxt(X_TEST_PATH, delimiter = ',', dtype = int)

CPU times: total: 3.05 s
Wall time: 3.05 s


In [109]:
print(x_train_all.shape)
print(x_test.shape)

(60000, 784)
(10000, 784)


# Explore the Data

In [110]:
x_train_all.shape

(60000, 784)

In [111]:
x_train_all[0].shape

(784,)

In [112]:
# 0 means complete white and 255 means complete black
x_train_all[0]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [113]:
y_train_all.shape

(60000,)

In [114]:
#y_train_all[0] 
y_train_all[:5]  # shows the output classes

array([5, 0, 4, 1, 9])

# Data Preprocessing

In [115]:
# Re-scale the features
x_train_all, x_test = x_train_all/255.0, x_test/255.0

In [116]:
np.eye(10)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [117]:
np.eye(10)[2]

array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.])

### Convert target value to one-hot encoding 

In [118]:
y_train_all = np.eye(NR_CLASSES)[y_train_all]

In [119]:
y_train_all[:9] 

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [120]:
y_train_all.shape

(60000, 10)

In [121]:
y_test = np.eye(NR_CLASSES)[y_test]
y_test.shape

(10000, 10)

### Create validation dataset from training data

In [122]:
x_val = x_train_all[:VALIDATION_SIZE]
y_val = y_train_all[:VALIDATION_SIZE]
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

In [123]:
print(x_val.shape, y_val.shape)
print(x_train.shape, y_train.shape)

(10000, 784) (10000, 10)
(50000, 784) (50000, 10)


# Setup Tensorflow Graph

In [128]:
X = tf.compat.v1.placeholder(tf.float32, shape = [None, TOTAL_INPUTS])  # number of sample will be decided later(None)
Y = tf.compat.v1.placeholder(tf.float32, shape = [None, NR_CLASSES])

## Neural Network Architecture

### Hyperparameters

In [129]:
nr_epochs = 5
learning_rate = 0.0001

n_hidden1 = 512
n_hidden2 = 64

In [130]:
initial_w1 = tf.random.truncated_normal(shape = [TOTAL_INPUTS, n_hidden1], stddev= 0.1, seed = 42)

In [131]:
initial_w1

<tf.Tensor 'truncated_normal:0' shape=(784, 512) dtype=float32>

In [132]:
# create the weights of neurons
w1 = tf.Variable(initial_value = initial_w1) 

In [133]:
# create the biases 
initial_b1 = tf.constant(value = 0.0, shape = [n_hidden1])
b1 = tf.Variable(initial_value = initial_b1)

In [134]:
# calculation of 1st layer input
layer1_in = tf.matmul(X, w1) + b1

In [135]:
# defining the activation function and layer1 output
layer1_out = tf.nn.relu(layer1_in)

Second Hidden Layer