## NOTEBOOK IMPORTS

In [66]:
from numpy.random import seed
seed(888)
from tensorflow.random import set_seed
set_seed(404)

In [67]:
import os
import numpy as np
import tensorflow as tf

from time import strftime

## CONSTANTS

In [68]:
X_TRAIN_PATH = 'MNIST/digit_xtrain.csv'
X_TEST_PATH = 'MNIST/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST/digit_ytest.csv'

LOGGING_PATH = 'tensorboard_mnist_digit_logs/'

NR_CLASSES = 10
VALIDATION_SIZE = 10000

IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1
TOTAL_INPUTS = IMAGE_WIDTH * IMAGE_HEIGHT * CHANNELS

## GET DATA

In [69]:
y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype=int)

In [70]:
y_train_all.shape

(60000,)

In [71]:
y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype=int)

In [72]:
x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype=int)

In [73]:
x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype=int)

In [74]:
x_test[0].shape

(784,)

## EXPLORE DATA

In [75]:
x_train_all.shape

(60000, 784)

In [76]:
x_train_all[0]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [77]:
y_train_all[0]

5

In [78]:
y_train_all[:5]

array([5, 0, 4, 1, 9])

## DATA PRE-PROCESSING

In [79]:
x_train_all, x_test = x_train_all / 225.0, x_test / 255.0

## CONVERT TARGET  VALUES TO ONE-HOT ENCODING

In [80]:
values = y_train_all[:5]
np.eye(10)[values]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [81]:
values[4]

9

In [82]:
y_train_all = np.eye(NR_CLASSES)[y_train_all]

In [83]:
y_train_all.shape

(60000, 10)

In [84]:
y_test = np.eye(10)[y_test]

In [85]:
y_test.shape

(10000, 10)

## CREATE VALIDATION DATASET FROM TRAINING DATA

***Split the Training dataset into a smaller training dataset and a validation dataset for the features and labels.
Create Four arrays: x_val, y_val, x_train and y_train from x_train_all and y_train_all. Use the validation size of 10,000.***

In [86]:
x_val = x_train_all[:VALIDATION_SIZE]
y_val = y_train_all[:VALIDATION_SIZE]

In [87]:
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

In [88]:
x_train.shape

(50000, 784)

In [89]:
x_val.shape

(10000, 784)

## SETUP TENSORFLOW GRAPH

In [119]:
tf.compat.v1.disable_eager_execution()
X = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, TOTAL_INPUTS])
Y = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, NR_CLASSES])

## THE NEURAL NETWORK ARCHITECTURE 
### Hyperparameters

In [120]:
nr_epochs = 5
learning_rate = 1e-4

n_hidden1 = 512
n_hidden2 = 64

In [121]:
initial_w1 = tf.random.truncated_normal(shape=[TOTAL_INPUTS, n_hidden1], seed=42, stddev=0.1)
w1 = tf.Variable(initial_value=initial_w1)

In [122]:
initial_b1 = tf.constant(value=0.0, shape=[n_hidden1])
b1 = tf.Variable(initial_value=initial_b1)

In [123]:
layer1_in = tf.matmul(X, w1) + b1

In [124]:
layer1_out = tf.nn.relu(layer1_in)

***Set up second hidden layer. This layer has 64 neurons and needs to work off the output of the first hidden layer. Then the output layer. The output layer will use the softmax activation function.***

In [125]:
initial_w2 = tf.random.truncated_normal(shape=[n_hidden1, n_hidden2], seed=42, stddev=0.1)
w2 = tf.Variable(initial_value=initial_w2)

initial_b2 = tf.constant(value=0.0, shape=[n_hidden2])
b2 = tf.Variable(initial_value=initial_b2)

layer2_in = tf.matmul(layer1_out, w2) + b2
layer2_out = tf.nn.relu(layer2_in)

In [126]:
initial_w3 = tf.random.truncated_normal(shape=[n_hidden2, NR_CLASSES], seed=42, stddev=0.1)
w3 = tf.Variable(initial_value=initial_w3)

initial_b3 = tf.constant(value=0.0, shape=[NR_CLASSES])
b3 = tf.Variable(initial_value=initial_b3)

layer3_in = tf.matmul(layer2_out, w3) + b3
output = tf.nn.softmax(layer3_in)

## TENSORBOARD SETUP

In [127]:
# FOLDER FOR TENSORBOARD

#folder_name = f'Model 1 at {strftime("%H:%M")}'
folder_name = f'Model 1 at {strftime("%I %M")}'
directory = os.path.join(LOGGING_PATH, folder_name)

try:
    os.makedirs(directory)
except OSError as exception:
    print(exception.strerror)
else:
    print('Successfully Created Directories!')

Successfully Created Directories!



## LOSS, OPTIMIZATION & METRICS

### Defining Loss Function

In [128]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=output))

### Defining Optimizer

In [129]:
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)
train_step = optimizer.minimize(loss)

### Accuracy Metric

In [130]:
correct_pred = tf.equal(tf.argmax(output, axis=1), tf.argmax(Y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [131]:
tf.summary.scalar('accuracy', accuracy)

<tf.Tensor 'accuracy_2/write_summary/Const:0' shape=() dtype=bool>

## RUN SESSION

In [132]:
sess = tf.compat.v1.Session()

### Setup Filewriter and Merge summaries

In [133]:
merged_summary = tf.compat.v1.summary.merge_all()

train_writer = tf.compat.v1.summary.FileWriter(directory + '/train')
train_writer.add_graph(sess.graph)

In [134]:
# merged_summary = tf.compat.v1.summary.merge_all()

# train_writer = tf.compat.v1.summary.FileWriter(directory + '/train')
# train_writer.add_graph(sess.graph)

# validation_writer = tf.compat.v1.summary.FileWriter(directory + '/validation')

### INITIALIZE ALL VARIABLES

In [135]:
init = tf.compat.v1.global_variables_initializer()
sess.run(init)

In [136]:
b2.eval(sess)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

## BATCHING DATA

In [137]:
size_of_batch = 1000

In [138]:
num_examples = y_train.shape[0]
nr_iterations = int(num_examples / size_of_batch)

index_in_epoch = 0

In [139]:
def next_batch(batch_size, data, labels):
    
    global num_examples
    global index_in_epoch
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > num_examples:
        start = 0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    return data[start:end], labels[start:end]

## TRAINING LOOP

In [140]:
for epoch in range(nr_epochs):
    
    for i in range(nr_iterations):
        
        batch_x, batch_y = next_batch(batch_size=size_of_batch, data=x_train, labels=y_train)
        
        feed_dictionary = {X:batch_x, Y:batch_y}
        
        sess.run(train_step, feed_dict=feed_dictionary)
        
    s, batch_accuracy = sess.run(fetches=[merged_summary, accuracy], feed_dict=feed_dictionary)

        #train_writer.add_summary(batch_accuracy, epoch)
    train_writer.add_summary(s, epoch)

    print(f'Epoch {epoch} \t|   Training Accuracy: {batch_accuracy}')
print('Done Training')

TypeError: Fetch argument None has invalid type <class 'NoneType'>

## RESET FOR NEXT RUN

In [None]:
train_writer.close()
sess.close()
tf.compat.v1.reset_default_graph()

In [None]:
type(merged_summary)