<a href="https://colab.research.google.com/github/AnkurMali/IST597_Spring_2022/blob/main/IST597_MLP_collab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# IST597:- Multi-Layer Perceptron

## Load the libraries

In [21]:
import os
import numpy as np
import time
import tensorflow as tf
np.random.seed(1234)
tf.random.set_seed(1234)

In [22]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

#Get number of Gpu's and id's in the system or else you can also use Nvidia-smi in command prompt.

## Generate random data

In [23]:
size_input = 32
size_hidden = 128
size_output = 1
number_of_train_examples = 1000
number_of_test_examples = 300

In [24]:
X_train = np.random.randn(number_of_train_examples , size_input)
y_train = np.random.randn(number_of_train_examples)
X_test = np.random.randn(number_of_test_examples, size_input)
y_test = np.random.randn(number_of_test_examples)

In [25]:
# Split dataset into batches
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(16)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(4)

## Build MLP using Eager Execution

In [26]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.device =\
    size_input, size_hidden, size_output, device
    
    # Initialize weights between input layer and hidden layer
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden]))
    # Initialize biases for hidden layer
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden]))
     # Initialize weights between hidden layer and output layer
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden, self.size_output]))
    # Initialize biases for output layer
    self.b2 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.b1, self.b2]
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.losses.mean_squared_error(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      current_loss = self.loss(predicted, y_train)
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer
    what = tf.matmul(X_tf, self.W1) + self.b1
    hhat = tf.nn.relu(what)
    # Compute output
    output = tf.matmul(hhat, self.W2) + self.b2
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

## Train Model

In [27]:
# Set number of epochs
NUM_EPOCHS = 10

In [28]:
# Initialize model using CPU
mlp_on_cpu = MLP(size_input, size_hidden, size_output, device='cpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(1234)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_cpu.forward(inputs)
    loss_total = loss_total + mlp_on_cpu.loss(preds, outputs)
    lt = lt + mlp_on_cpu.loss(preds, outputs)
    mlp_on_cpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Average MSE:= {}'.format(epoch + 1, np.sum(loss_total) / X_train.shape[0]))
time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))
#For per epoch_time = Total_Time / Number_of_epochs

Number of Epoch = 1 - Average MSE:= 14860.577
Number of Epoch = 2 - Average MSE:= 91.625734375
Number of Epoch = 3 - Average MSE:= 41.9202265625
Number of Epoch = 4 - Average MSE:= 26.83350390625
Number of Epoch = 5 - Average MSE:= 19.01277734375
Number of Epoch = 6 - Average MSE:= 15.0563916015625
Number of Epoch = 7 - Average MSE:= 11.77468359375
Number of Epoch = 8 - Average MSE:= 10.0957353515625
Number of Epoch = 9 - Average MSE:= 8.9150615234375
Number of Epoch = 10 - Average MSE:= 7.2906474609375

Total time taken (in seconds): 4.86


In [29]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(1234)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs)
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Average MSE:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))

Number of Epoch = 1 - Average MSE:= 4325.573
Number of Epoch = 2 - Average MSE:= 27.723109375
Number of Epoch = 3 - Average MSE:= 12.409765625
Number of Epoch = 4 - Average MSE:= 7.18881298828125
Number of Epoch = 5 - Average MSE:= 4.785166015625
Number of Epoch = 6 - Average MSE:= 3.5824267578125
Number of Epoch = 7 - Average MSE:= 2.8207646484375
Number of Epoch = 8 - Average MSE:= 2.42839990234375
Number of Epoch = 9 - Average MSE:= 2.052015625
Number of Epoch = 10 - Average MSE:= 1.837486572265625

Total time taken (in seconds): 4.07


In [30]:
#Default mode
mlp_on_default = MLP(size_input, size_hidden, size_output)

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(1234)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_default.forward(inputs)
    loss_total_gpu = loss_total_gpu + mlp_on_default.loss(preds, outputs)
    lt = lt + mlp_on_default.loss(preds, outputs)
    mlp_on_default.backward(inputs, outputs)
  print('Number of Epoch = {} - Average MSE:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))

Number of Epoch = 1 - Average MSE:= 9692.274
Number of Epoch = 2 - Average MSE:= 50.85880859375
Number of Epoch = 3 - Average MSE:= 23.1451953125
Number of Epoch = 4 - Average MSE:= 14.2425986328125
Number of Epoch = 5 - Average MSE:= 10.015853515625
Number of Epoch = 6 - Average MSE:= 7.53853271484375
Number of Epoch = 7 - Average MSE:= 5.91586962890625
Number of Epoch = 8 - Average MSE:= 4.92961083984375
Number of Epoch = 9 - Average MSE:= 4.153177734375
Number of Epoch = 10 - Average MSE:= 3.6050009765625

Total time taken (in seconds): 3.76


In [31]:
#TPU mode
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='tpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(1234)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs)
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Average MSE:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))

Number of Epoch = 1 - Average MSE:= 4296.372
Number of Epoch = 2 - Average MSE:= 40.05835546875
Number of Epoch = 3 - Average MSE:= 16.907794921875
Number of Epoch = 4 - Average MSE:= 10.4465498046875
Number of Epoch = 5 - Average MSE:= 6.9279326171875
Number of Epoch = 6 - Average MSE:= 5.11931396484375
Number of Epoch = 7 - Average MSE:= 3.897322265625
Number of Epoch = 8 - Average MSE:= 3.13264208984375
Number of Epoch = 9 - Average MSE:= 2.685675048828125
Number of Epoch = 10 - Average MSE:= 2.2973623046875

Total time taken (in seconds): 4.84


## One Step Inference

In [35]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
for inputs, outputs in test_ds:
  preds = mlp_on_default.forward(inputs)
  #b = mlp_on_default.loss(preds, outputs)
  test_loss_total = test_loss_total + mlp_on_default.loss(preds, outputs)
# a = (test_loss_total.numpy() / X_train.shape[0])
# print(X_train.shape[0])
# print(test_loss_total.numpy())
# print(b)
print('Test MSE: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_train.shape[0]))

Test MSE: 1.6883
