In [None]:
import os
import numpy as np
import time
import tensorflow as tf
np.random.seed(1234)
tf.random.set_seed(1234)
from tensorflow import keras
import matplotlib.pyplot as plt 
from sklearn.preprocessing import LabelBinarizer


In [None]:
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], 784))/255.
X_test = np.reshape(X_test, (X_test.shape[0], 784))/255.

In [None]:
print('Train dimension:');print(X_train.shape)
print('Test dimension:');print(X_test.shape)
## Changing y's to fit categorical entropy loss
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)
print('Train labels dimension:');print(y_train.shape)
print('Test labels dimension:');print(y_test.shape)

Train dimension:
(60000, 784)
Test dimension:
(10000, 784)
Train labels dimension:
(60000, 10)
Test labels dimension:
(10000, 10)


In [None]:
size_input = 784
size_hidden01 = 128
size_hidden02=64
size_output = 10

In [None]:
# Split dataset into batches
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(100)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(40)

In [None]:
class MLP(tf.keras.Model):
  def __init__(self, size_input, size_hidden01, size_hidden02, size_output, device=None):
    super(MLP, self).__init__()
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """

    # self.size_input = 32
    # self.size_hidden = 128
    # self.size_output = 1
    # self.device = 'gpu'
    self.size_input, self.size_hidden01, self.size_hidden02, self.size_output, self.device =\
    size_input, size_hidden01, size_hidden02, size_output, device
    
    # Initialize weights between input layer and hidden layer
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden01]))
    # Initialize biases for hidden layer
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden01]))
    # Initialize weights between hidden layer 1 and hidden 2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden01, self.size_hidden02]))
    #Initialize biases for hidden 2
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden02]))
     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden02, self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.MLP_variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true, y_pred)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      current_loss = self.loss(predicted, y_train)
    grads = tape.gradient(current_loss, self.MLP_variables)
    optimizer.apply_gradients(zip(grads, self.MLP_variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """    
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer
    what = tf.matmul(X_tf, self.W1) + self.b1
    hhat = tf.nn.relu(what)
    hhat = tf.nn.dropout(hhat, rate=0.5)
    #Hidden 2
    what1 = tf.matmul(hhat, self.W2) + self.b2
    hhat2 = tf.nn.relu(what1)
    hhat2 = tf.nn.dropout(hhat2, rate=0.5)
    # Compute output
    out = tf.matmul(hhat2, self.W3) + self.b3
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    output=tf.nn.softmax(out)
    return output

In [None]:
def find_accuracy(pred_val,true_val):
  correct_prediction = tf.equal(tf.argmax(pred_val, 1), tf.argmax(true_val, 1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  return accuracy


In [None]:
# Set number of epochs
NUM_EPOCHS = 11
# Initialize model using GPU
#mlp_on_gpu = MLP()
mlp_on_gpu = MLP(size_input, size_hidden01, size_hidden02, size_output, device='gpu')
time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(1000, seed=epoch*(1234)).batch(32)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs)
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  
  print('Number of Epoch = {} - Categorical Loss:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  preds = mlp_on_gpu.compute_output(X_train)
  accuracy_train = find_accuracy(preds,y_train)
  accuracy_train = accuracy_train * 100
  print ("Training Accuracy = {}".format(accuracy_train.numpy()))

preds_test = mlp_on_gpu.compute_output(X_test)
accuracy_test = find_accuracy(preds_test,y_test)
# To keep sizes compatible with model
accuracy_test = accuracy_test * 100
print ("Test Accuracy = {}".format(accuracy_test.numpy()))

time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))