# IST597:- Multi-Layer Perceptron

## Load the libraries

In [1]:
import os
import numpy as np
import time
import tensorflow as tf
import tensorflow.contrib.eager as tfe
tf.enable_eager_execution()
tf.executing_eagerly()

True

In [37]:
tfe.num_gpus()

1

#Get number of Gpu's in the system or else you can also use Nvidia-smi in command prompt

## Generate random data

In [2]:
size_input = 32
size_hidden = 128
size_output = 1
number_of_train_examples = 1000
number_of_test_examples = 300

In [3]:
X_train = np.random.randn(number_of_train_examples , size_input)
y_train = np.random.randn(number_of_train_examples)
X_test = np.random.randn(number_of_test_examples, size_input)
y_test = np.random.randn(number_of_test_examples)

In [4]:
# Split dataset into batches
#train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(16)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(4)

## Build MLP using Eager Execution

In [5]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.device =\
    size_input, size_hidden, size_output, device
    
    # Initialize weights between input layer and hidden layer
    self.W1 = tfe.Variable(tf.random_normal([self.size_input, self.size_hidden]))
    # Initialize biases for hidden layer
    self.b1 = tfe.Variable(tf.random_normal([1, self.size_hidden]))
     # Initialize weights between hidden layer and output layer
    self.W2 = tfe.Variable(tf.random_normal([self.size_hidden, self.size_output]))
    # Initialize biases for output layer
    self.b2 = tfe.Variable(tf.random_normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.b1, self.b2]
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.losses.mean_squared_error(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-4)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      current_loss = self.loss(predicted, y_train)
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables),
                              global_step=tf.train.get_or_create_global_step())
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer
    what = tf.matmul(X_tf, self.W1) + self.b1
    hhat = tf.nn.relu(what)
    # Compute output
    output = tf.matmul(hhat, self.W2) + self.b2
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

## Train Model

In [6]:
# Set number of epochs
NUM_EPOCHS = 10

In [8]:
# Initialize model using CPU
mlp_on_cpu = MLP(size_input, size_hidden, size_output, device='cpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total = tfe.Variable(0, dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(1234)).batch(20)
  print("original variables: {}".format(mlp_on_cpu.variables))
  for inputs, outputs in train_ds:
    preds = mlp_on_cpu.forward(inputs)
    loss_total = loss_total + mlp_on_cpu.loss(preds, outputs)
    mlp_on_cpu.backward(inputs, outputs)
  print("updated variables: {}".format(mlp_on_cpu.variables))  
  print('Number of Epoch = {} - Average MSE:= {:.4f}'.format(epoch + 1, loss_total.numpy() / X_train.shape[0]))

# Training accuracy
preds = mlp_on_cpu.forward(X_test)
correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y_test, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# To keep sizes compatible with model
print ("Accuracy = {}".format(accuracy.numpy()))

time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))
#For per epoch_time = Total_Time / Number_of_epochs

original variables: [<tf.Variable 'Variable:0' shape=(32, 128) dtype=float32, numpy=
array([[-0.12643534,  1.243619  ,  0.8793551 , ...,  1.9000742 ,
        -0.75000685, -1.2014854 ],
       [ 0.34021607,  0.42470688, -0.66460246, ...,  0.26425806,
         1.1993284 ,  0.82351494],
       [ 2.1894267 , -0.38412282, -0.6174463 , ...,  1.5634316 ,
        -0.38778022, -0.11753063],
       ...,
       [ 0.45774958, -0.717807  , -1.3340317 , ...,  0.96017706,
        -0.5696567 ,  0.93480414],
       [-0.8778115 ,  0.00995159, -0.96212864, ..., -0.4851143 ,
         0.06905562,  0.34049395],
       [ 2.0131698 ,  0.02866848, -0.510506  , ...,  1.0498365 ,
         0.5612748 ,  1.1952845 ]], dtype=float32)>, <tf.Variable 'Variable:0' shape=(128, 1) dtype=float32, numpy=
array([[-1.73602253e-01],
       [-2.98470825e-01],
       [ 4.27366287e-01],
       [ 1.17139876e+00],
       [ 1.80243099e+00],
       [ 1.09314954e+00],
       [ 1.77731204e+00],
       [ 1.19096744e+00],
       [-9.987

updated variables: [<tf.Variable 'Variable:0' shape=(32, 128) dtype=float32, numpy=
array([[-0.12668046,  1.2398666 ,  0.8814668 , ...,  1.9023772 ,
        -0.7493385 , -1.193927  ],
       [ 0.36771888,  0.43908396, -0.6822055 , ...,  0.27955824,
         1.2482955 ,  0.7925207 ],
       [ 2.1962316 , -0.38503844, -0.621215  , ...,  1.5722224 ,
        -0.35114107, -0.13028765],
       ...,
       [ 0.44539508, -0.7191691 , -1.3273331 , ...,  0.9599127 ,
        -0.583806  ,  0.94939524],
       [-0.90555227,  0.00835578, -0.94709504, ..., -0.49851584,
         0.0522612 ,  0.36874068],
       [ 2.0252993 ,  0.03053573, -0.5177039 , ...,  1.0604298 ,
         0.5700552 ,  1.1659126 ]], dtype=float32)>, <tf.Variable 'Variable:0' shape=(128, 1) dtype=float32, numpy=
array([[-7.18534231e-01],
       [-3.32250267e-01],
       [ 2.61608899e-01],
       [ 1.15802312e+00],
       [ 1.39344013e+00],
       [ 7.54000485e-01],
       [ 1.48672462e+00],
       [ 1.01169670e+00],
       [-9.3551

updated variables: [<tf.Variable 'Variable:0' shape=(32, 128) dtype=float32, numpy=
array([[-0.12685275,  1.2368864 ,  0.8825114 , ...,  1.9031608 ,
        -0.7472413 , -1.1894298 ],
       [ 0.3796901 ,  0.44194984, -0.68605924, ...,  0.27679262,
         1.251983  ,  0.79389286],
       [ 2.2006826 , -0.3871973 , -0.6216549 , ...,  1.5752052 ,
        -0.3397306 , -0.13193597],
       ...,
       [ 0.43821645, -0.7168216 , -1.326418  , ...,  0.9625655 ,
        -0.5838137 ,  0.953604  ],
       [-0.92502236,  0.01100397, -0.94329447, ..., -0.50190014,
         0.05716481,  0.37889403],
       [ 2.0270035 ,  0.02822578, -0.51914406, ...,  1.061128  ,
         0.56718296,  1.1582404 ]], dtype=float32)>, <tf.Variable 'Variable:0' shape=(128, 1) dtype=float32, numpy=
array([[-0.8157871 ],
       [-0.3245746 ],
       [ 0.21573481],
       [ 1.1467079 ],
       [ 1.2318608 ],
       [ 0.64876884],
       [ 1.3746575 ],
       [ 0.91838044],
       [-0.86216533],
       [-0.6072397 ],
   

updated variables: [<tf.Variable 'Variable:0' shape=(32, 128) dtype=float32, numpy=
array([[-0.1255351 ,  1.234784  ,  0.88264185, ...,  1.9042217 ,
        -0.74403185, -1.1859375 ],
       [ 0.3852555 ,  0.4429207 , -0.6879167 , ...,  0.27139044,
         1.2497351 ,  0.79866236],
       [ 2.2042732 , -0.38917172, -0.62169397, ...,  1.5772629 ,
        -0.33296198, -0.13203871],
       ...,
       [ 0.43329397, -0.7145365 , -1.3261105 , ...,  0.9653765 ,
        -0.58330125,  0.9551994 ],
       [-0.9373568 ,  0.01455603, -0.94182247, ..., -0.50278115,
         0.06621353,  0.38439804],
       [ 2.0254362 ,  0.0253293 , -0.5195616 , ...,  1.0603964 ,
         0.5635289 ,  1.1548045 ]], dtype=float32)>, <tf.Variable 'Variable:0' shape=(128, 1) dtype=float32, numpy=
array([[-8.31937194e-01],
       [-3.16552281e-01],
       [ 1.93942919e-01],
       [ 1.12058294e+00],
       [ 1.11717033e+00],
       [ 5.90659678e-01],
       [ 1.28526998e+00],
       [ 8.47592473e-01],
       [-8.0045

updated variables: [<tf.Variable 'Variable:0' shape=(32, 128) dtype=float32, numpy=
array([[-0.12292783,  1.2331107 ,  0.88251656, ...,  1.9056344 ,
        -0.74049264, -1.1833363 ],
       [ 0.3888643 ,  0.4434903 , -0.6892721 , ...,  0.26542753,
         1.2467362 ,  0.80367583],
       [ 2.2071414 , -0.3908447 , -0.62170845, ...,  1.5789443 ,
        -0.32775587, -0.13212049],
       ...,
       [ 0.4294516 , -0.7125401 , -1.3260245 , ...,  0.96785176,
        -0.5831644 ,  0.9558703 ],
       [-0.94669634,  0.01804579, -0.9410979 , ..., -0.502827  ,
         0.07594696,  0.387985  ],
       [ 2.0230374 ,  0.02270741, -0.51972526, ...,  1.0593909 ,
         0.56061876,  1.1530433 ]], dtype=float32)>, <tf.Variable 'Variable:0' shape=(128, 1) dtype=float32, numpy=
array([[-0.8263384 ],
       [-0.30804873],
       [ 0.180929  ],
       [ 1.0895039 ],
       [ 1.0268685 ],
       [ 0.5524411 ],
       [ 1.2057035 ],
       [ 0.7899228 ],
       [-0.74868786],
       [-0.49304757],
   

updated variables: [<tf.Variable 'Variable:0' shape=(32, 128) dtype=float32, numpy=
array([[-0.12039494,  1.2316257 ,  0.8823269 , ...,  1.9068758 ,
        -0.7374323 , -1.1809012 ],
       [ 0.39153236,  0.44399676, -0.69034946, ...,  0.25939268,
         1.2441847 ,  0.8082787 ],
       [ 2.209774  , -0.39230266, -0.6216964 , ...,  1.5804752 ,
        -0.32336766, -0.13227282],
       ...,
       [ 0.42678753, -0.7107387 , -1.3259767 , ...,  0.9704064 ,
        -0.58296674,  0.956187  ],
       [-0.9539476 ,  0.02139367, -0.94071907, ..., -0.5022862 ,
         0.08518866,  0.3902806 ],
       [ 2.020464  ,  0.02039579, -0.5198109 , ...,  1.0585312 ,
         0.5583709 ,  1.1517301 ]], dtype=float32)>, <tf.Variable 'Variable:0' shape=(128, 1) dtype=float32, numpy=
array([[-8.1462353e-01],
       [-2.9981387e-01],
       [ 1.7079574e-01],
       [ 1.0567508e+00],
       [ 9.4902730e-01],
       [ 5.1945925e-01],
       [ 1.1334723e+00],
       [ 7.3903394e-01],
       [-7.0486283e-01]

InvalidArgumentError: Expected dimension in the range [-1, 1), but got 1 [Op:ArgMax]

In [44]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total = tfe.Variable(0, dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=(epoch*1234)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs)
    loss_total = loss_total + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Average MSE:= {:.4f}'.format(epoch + 1, loss_total.numpy() / X_train.shape[0]))  
time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))

Number of Epoch = 1 - Average MSE:= 54.9124
Number of Epoch = 2 - Average MSE:= 24.0984
Number of Epoch = 3 - Average MSE:= 16.7387
Number of Epoch = 4 - Average MSE:= 13.9170
Number of Epoch = 5 - Average MSE:= 12.2832
Number of Epoch = 6 - Average MSE:= 11.1466
Number of Epoch = 7 - Average MSE:= 10.2290
Number of Epoch = 8 - Average MSE:= 9.4253
Number of Epoch = 9 - Average MSE:= 8.7257
Number of Epoch = 10 - Average MSE:= 8.1264

Total time taken (in seconds): 5.51


In [45]:
#Default mode
mlp_on_default = MLP(size_input, size_hidden, size_output)

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total = tfe.Variable(0, dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=(epoch*1234)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_default.forward(inputs)
    loss_total = loss_total + mlp_on_default.loss(preds, outputs)
    mlp_on_default.backward(inputs, outputs)
  print('Epoch {} - Average MSE: {:.4f}'.format(epoch + 1, loss_total.numpy() / X_train.shape[0]))
time_taken = time.time() - time_start

print('\nTotal time taken(seconds): {:.2f}'.format(time_taken))

Epoch 1 - Average MSE: 41.2676
Epoch 2 - Average MSE: 19.7190
Epoch 3 - Average MSE: 14.6194
Epoch 4 - Average MSE: 12.3656
Epoch 5 - Average MSE: 10.9466
Epoch 6 - Average MSE: 9.9415
Epoch 7 - Average MSE: 9.1411
Epoch 8 - Average MSE: 8.4033
Epoch 9 - Average MSE: 7.7717
Epoch 10 - Average MSE: 7.2243

Total time taken for training (seconds): 5.46


## One Step Inference

In [46]:
test_loss_total = tfe.Variable(0, dtype=tf.float32)
for inputs, outputs in test_ds:
  preds = mlp_on_default.forward(inputs)
  test_loss_total = test_loss_total + mlp_on_default.loss(preds, outputs)
print('Test MSE: {:.4f}'.format(test_loss_total.numpy() / X_train.shape[0]))

Test MSE: 16.3869
