# Imports

In [1415]:
from numpy.random import seed
seed(888)
import tensorflow as tf
tf.random.set_seed(404)

In [1416]:
import os
import numpy as np

from time import strftime

# Constants

In [1417]:
X_TRAIN_PATH = 'Data/digit_xtrain.csv'
X_TEST_PATH = 'Data/digit_xtest.csv'
Y_TRAIN_PATH = 'Data/digit_ytrain.csv'
Y_TEST_PATH = 'Data/digit_ytest.csv'

LOGGING_PATH = 'tesorboard_mnist_digit_logs/'

NR_CLASSES = 10
VALIDATION_SIZE = 10000

IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1
TOTAL_INPUTS = IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS

# Get the Data

In [1418]:
%%time

y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter = ',', dtype = int)

CPU times: total: 109 ms
Wall time: 117 ms


In [1419]:
y_train_all.shape

(60000,)

In [1420]:
y_test = np.loadtxt(Y_TEST_PATH, delimiter= ',', dtype = int)
y_test.shape

(10000,)

In [1421]:
%%time

x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter = ',', dtype = int)

CPU times: total: 21.3 s
Wall time: 21.7 s


In [1422]:
%%time

x_test = np.loadtxt(X_TEST_PATH, delimiter = ',', dtype = int)

CPU times: total: 3.78 s
Wall time: 3.79 s


In [1423]:
print(x_train_all.shape)
print(x_test.shape)

(60000, 784)
(10000, 784)


# Explore the Data

In [1424]:
x_train_all.shape

(60000, 784)

In [1425]:
x_train_all[0].shape

(784,)

In [1426]:
# 0 means complete white and 255 means complete black
x_train_all[0]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [1427]:
y_train_all.shape

(60000,)

In [1428]:
#y_train_all[0] 
y_train_all[:5]  # shows the output classes

array([5, 0, 4, 1, 9])

# Data Preprocessing

In [1429]:
# Re-scale the features
x_train_all, x_test = x_train_all/255.0, x_test/255.0

In [1430]:
np.eye(10)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [1431]:
np.eye(10)[2]

array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.])

### Convert target value to one-hot encoding 

In [1432]:
y_train_all = np.eye(NR_CLASSES)[y_train_all]

In [1433]:
y_train_all[:9] 

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [1434]:
y_train_all.shape

(60000, 10)

In [1435]:
y_test = np.eye(NR_CLASSES)[y_test]
y_test.shape

(10000, 10)

### Create validation dataset from training data

In [1436]:
x_val = x_train_all[:VALIDATION_SIZE]
y_val = y_train_all[:VALIDATION_SIZE]
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

In [1437]:
print(x_val.shape, y_val.shape)
print(x_train.shape, y_train.shape)

(10000, 784) (10000, 10)
(50000, 784) (50000, 10)


# Setup Tensorflow Graph

In [1485]:
 # number of sample will be decided later(None = ?)
X = tf.compat.v1.placeholder(tf.float32, shape = [None, TOTAL_INPUTS], name = 'X') 
Y = tf.compat.v1.placeholder(tf.float32, shape = [None, NR_CLASSES], name = 'Labels')

## Neural Network Architecture

### Hyperparameters

In [1486]:
nr_epochs = 50
learning_rate = 0.001

n_hidden1 = 512
n_hidden2 = 64

### Layer Setup

In [1487]:
def setup_layer(input, weight_dim, bias_dim, name):
    
    with tf.name_scope(name):
        # setting up of weights
        initial_w = tf.random.truncated_normal(shape = weight_dim, stddev= 0.1, seed = 42)
        w = tf.Variable(initial_value = initial_w, name = 'W') 

        # setting up of biases
        initial_b = tf.constant(value = 0.0, shape = bias_dim)
        b = tf.Variable(initial_value = initial_b, name = 'B')

        # output of Output layer
        layer_in = tf.matmul(input, w) + b
        
        if name == 'out':
            layer_out = tf.nn.softmax(layer_in)
        else:
            layer_out = tf.nn.relu(layer_in)
        
        tf.compat.v1.summary.histogram('weights', w)
        tf.compat.v1.summary.histogram('biases', b)
        
        return layer_out
            

In [1488]:
# Model without drop out
# layer_1 = setup_layer(X, weight_dim = [TOTAL_INPUTS, n_hidden1], bias_dim = [n_hidden1], name = 'layer_1')
# layer_2 = setup_layer(layer_1, weight_dim = [n_hidden1, n_hidden2], bias_dim = [n_hidden2], name = 'layer_2')
# output = setup_layer(layer_2, weight_dim = [n_hidden2, NR_CLASSES], bias_dim = [NR_CLASSES], name = 'out')


# model_name = f'{n_hidden1}-{n_hidden2} LR{learning_rate} E{nr_epochs}'

In [1489]:
layer_1 = setup_layer(X, weight_dim = [TOTAL_INPUTS, n_hidden1], bias_dim = [n_hidden1], name = 'layer_1')

# dropout layer
layer_drop = tf.compat.v1.nn.dropout(layer_1, keep_prob= 0.8, name= 'dropout_layer')

layer_2 = setup_layer(layer_drop, weight_dim = [n_hidden1, n_hidden2], bias_dim = [n_hidden2], name = 'layer_2')
output = setup_layer(layer_2, weight_dim = [n_hidden2, NR_CLASSES], bias_dim = [NR_CLASSES], name = 'out')


model_name = f'{n_hidden1}-DO-{n_hidden2} LR{learning_rate} E{nr_epochs}'

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


# Tensorboard Setup

In [1490]:
# Folder for tensorboard
folder_name = f'{model_name} at {strftime("%H-%M")}'

directory = os.path.join(LOGGING_PATH, folder_name)


try:
    os.makedirs(directory)
except OSError as exception:
    print(exception.strerror)
    
else:
    print('Successfully created directories!')

Successfully created directories!


# Loss, Optimisation & Metrics

### Defining the Loss Function

In [1491]:
with tf.name_scope('loss_calc'):
    #function to compute the average softmax cross-entropy loss between the predicted output and the true label
    loss = tf.reduce_mean(tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(labels= Y, logits=output))

### Defining the Optimizer

In [1492]:
with tf.name_scope('optimizer'):
    # declare the optimizer and step
    optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate)
    train_step = optimizer.minimize(loss)

### Accuracy Metric

In [1493]:
with tf.name_scope('accuracy_calc'):
    correct_pred = tf.equal(tf.math.argmax(output, axis= 1), tf.math.argmax(Y, axis = 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [1494]:
with tf.name_scope('performance'):

    tf.compat.v1.summary.scalar('accuarcy', accuracy)

    #add summary for the loss
    tf.compat.v1.summary.scalar('cost', loss)

### Check Input Images in Tensorboard

In [1495]:

with tf.name_scope('show_image'):
    x_image = tf.reshape(X, [-1, 28, 28, 1])
    tf.compat.v1.summary.image('image_input', x_image, max_outputs= 4)

# Run Session

In [1496]:
sess = tf.compat.v1.Session()

### Setup Filewriter & Merge Summaries

In [1497]:
merged_summary = tf.compat.v1.summary.merge_all()

train_writer = tf.compat.v1.summary.FileWriter(directory + '/train')
train_writer.add_graph(sess.graph)

validation_writer = tf.compat.v1.summary.FileWriter(directory + '/validation')

In [1498]:
print(merged_summary)

Tensor("Merge/MergeSummary:0", shape=(), dtype=string)


### Initialise all the variables

In [1499]:
init = tf.compat.v1.global_variables_initializer()
sess.run(init)

### Batching the Data

In [1500]:
size_of_batch = 1000
num_examples = y_train.shape[0]
nr_iterations = int(num_examples / size_of_batch)

index_in_epoch = 0

In [1501]:
nr_iterations

50

In [1502]:
def next_batch(batch_size, data, labels):
    
    global num_examples
    global index_in_epoch
    
    start = index_in_epoch
    index_in_epoch = index_in_epoch + batch_size
    
    if index_in_epoch > num_examples:
        start = 0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    return data[start:end], labels[start:end]

### Training Loop

In [1503]:
for epoch in range(nr_epochs):
    
    # --------------------Traning Dataset-----------------------#
    
    for i in range(nr_iterations):
        batch_x, batch_y = next_batch(batch_size= size_of_batch, data = x_train, labels= y_train)
        
        feed_dictionary = {X: batch_x, Y: batch_y}
        
        sess.run(train_step, feed_dict= feed_dictionary)
    s, batch_accuracy = sess.run(fetches=[merged_summary, accuracy], feed_dict= feed_dictionary)
    
    train_writer.add_summary(s, epoch)
        
    print(f'Epoch {epoch} \t| Training Accuracy = {batch_accuracy}')
    
    # ===================Validation==============================
    
    summary = sess.run(fetches= merged_summary, feed_dict= {X: x_val, Y: y_val})
    validation_writer.add_summary(summary, epoch)

print('Done Training!')

Epoch 0 	| Training Accuracy = 0.8410000205039978
Epoch 1 	| Training Accuracy = 0.859000027179718
Epoch 2 	| Training Accuracy = 0.8640000224113464
Epoch 3 	| Training Accuracy = 0.871999979019165
Epoch 4 	| Training Accuracy = 0.9490000009536743
Epoch 5 	| Training Accuracy = 0.9769999980926514
Epoch 6 	| Training Accuracy = 0.9769999980926514
Epoch 7 	| Training Accuracy = 0.9789999723434448
Epoch 8 	| Training Accuracy = 0.9810000061988831
Epoch 9 	| Training Accuracy = 0.9789999723434448
Epoch 10 	| Training Accuracy = 0.984000027179718
Epoch 11 	| Training Accuracy = 0.9879999756813049
Epoch 12 	| Training Accuracy = 0.9850000143051147
Epoch 13 	| Training Accuracy = 0.9869999885559082
Epoch 14 	| Training Accuracy = 0.9860000014305115
Epoch 15 	| Training Accuracy = 0.9869999885559082
Epoch 16 	| Training Accuracy = 0.9879999756813049
Epoch 17 	| Training Accuracy = 0.9900000095367432
Epoch 18 	| Training Accuracy = 0.9900000095367432
Epoch 19 	| Training Accuracy = 0.9909999966

# Reset for the Next Run

In [1504]:
train_writer.close()
validation_writer.close()
sess.close()
tf.compat.v1.reset_default_graph()

# Code for 1st Part of the Module

1st Hidden Layer

In [1505]:

# with tf.name_scope('First_Hidden_Layer'):

#     # setting up weights
#     initial_w1 = tf.random.truncated_normal(shape = [TOTAL_INPUTS, n_hidden1], stddev= 0.1, seed = 42)

#     # create the weights of neurons
#     w1 = tf.Variable(initial_value = initial_w1, name = 'w1')

#     # create the biases 
#     initial_b1 = tf.constant(value = 0.0, shape = [n_hidden1])
#     b1 = tf.Variable(initial_value = initial_b1, name = 'b1')

#     # calculation of 1st layer input
#     layer1_in = tf.matmul(X, w1) + b1

#     # defining the activation function and layer1 output
#     layer1_out = tf.nn.relu(layer1_in)
   

2nd Hidden Layer

In [1506]:

# with tf.name_scope('Second_Hidden_Layer'):

#     # setting up of weights
#     initial_w2 = tf.random.truncated_normal(shape = [n_hidden1, n_hidden2], stddev= 0.1, seed = 42)
#     w2 = tf.Variable(initial_value = initial_w2, name = 'w2') 

#     # setting up of biases
#     initial_b2 = tf.constant(value = 0.0, shape = [n_hidden2])
#     b2 = tf.Variable(initial_value = initial_b2, name = 'b2')

#     # calculation of 2nd layer input
#     layer2_in = tf.matmul(layer1_out, w2) + b2

#     # defining the activation function and layer2 output
#     layer2_out = tf.nn.relu(layer2_in)


Output Layer

In [1507]:

# with tf.name_scope('output_layer'):

#     # setting up of weights
#     initial_w3 = tf.random.truncated_normal(shape = [n_hidden2, NR_CLASSES], stddev= 0.1, seed = 42)
#     w3 = tf.Variable(initial_value = initial_w3, name = 'w3') 

#     # setting up of biases
#     initial_b3 = tf.constant(value = 0.0, shape = [NR_CLASSES])
#     b3 = tf.Variable(initial_value = initial_b3, name = 'b3')

#     # output of Output layer
#     layer3_in = tf.matmul(layer2_out, w3) + b3
#     output = tf.nn.softmax(layer3_in)


In [1508]:
# w1.eval(sess)  shows all initial weights
# b1.eval(sess)  shows all initial biases

#b3.eval(sess)