<a href="https://colab.research.google.com/github/Sameer-Arora/Deep_Learning_Tensorflow2.0/blob/master/ANN_lab_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

# Install TensorFlow
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf
import numpy as np

In [0]:
mnist = tf.keras.datasets.mnist
print(mnist)

np.random.seed(1)

In [0]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
print( x_train.shape , y_train.shape ,x_train[0].shape, y_train[0], x_test[0].shape, y_test[0] )

In [0]:
val_size = int(0.2*x_train.shape[0])
train_size = x_train.shape[0]-val_size
(x_train,x_validate) = tf.split(x_train, [ train_size, val_size ] ,axis=0)
(y_train,y_validate) = tf.split(y_train, [ train_size, val_size ] ,axis=0)


In [0]:

print( x_train.shape , y_train.shape ,x_train[0].shape, y_train[0], x_validate.shape, y_validate.shape )

In [0]:
def preprocess_data( images , labels ):
  """
  Function to convet the images into a flattened vector and 
  to one-hot encode the labels,
  
  Returns:
  input: input to the network.
  labels: output for the loss. 
  
  """
  
  
  return tf.reshape(images,[ images.shape[0] ,-1]) , tf.one_hot(indices= tf.cast(labels,dtype=tf.int32),depth= tf.cast( tf.math.reduce_max(labels)+1,dtype=tf.int32) 
                                                                ,axis=1,dtype=tf.float64)

  

In [0]:
(x_train,y_train) = preprocess_data(x_train,y_train)
# print( x_train.shape , y_train.shape ,x_train[0], y_train[0], x_validate.shape, y_validate.shape )

In [0]:

class MyCustomLayer(tf.keras.layers.Layer):

  def __init__(self, num_outputs):
    super(MyCustomLayer, self).__init__()
    self.num_outputs = num_outputs

  def build(self, input_shape):
    self.kernel = self.add_variable("W",
                                    shape=[int(input_shape[-1]),
                                           self.num_outputs])
    self.bias = self.add_variable("b",
                                    shape=[ 1, self.num_outputs ] )

class LinearLayer(MyCustomLayer):

  def __init__(self, num_outputs):
    super(LinearLayer, self).__init__(num_outputs)
    self.num_outputs = num_outputs

  def call(self, input):
    return tf.matmul(input, self.kernel)+ self.bias

class SoftMaxLayer(MyCustomLayer):

  def __init__(self, num_outputs):
    super(SoftMaxLayer, self).__init__(num_outputs)
    self.num_outputs = num_outputs

  def call(self, input):
    return softmax(tf.matmul(input, self.kernel)+ self.bias)


In [0]:

class CustomModel(tf.keras.Model):
  
  def __init__(self, inp_dim,hid_dim,out_dim ):
    super(CustomModel, self).__init__(name='')

    self.linear_1 = LinearLayer(hid_dim)
    
    self.sftmax_2 = SoftMaxLayer(out_dim)

  def call(self, input_tensor, training=False):
    
    x = self.linear_1(input_tensor)
    x = self.sftmax_2(x)

    return x


In [0]:
def nn_model_initialize_params( inp_dim,hid_dim,out_dim ):

    
#     tf.random.set_random_seed(1)

    W1 = tf.Variable(tf.zeros([inp_dim, hid_dim ], tf.float64),name="W1")
    b1 = tf.Variable(tf.zeros([ 1,hid_dim ], tf.float64),name="b1")
    
    W2 = tf.Variable(tf.zeros([ hid_dim,out_dim ], tf.float64),name="W2")
    b2 = tf.Variable(tf.zeros([ 1,out_dim ], tf.float64),name="b2")
    
    parameters = {
        "W1" : W1 ,
        "b1" : b1 ,
        "W2" : W2 ,
        "b2" : b2 ,
                 }
  
    return parameters
    

In [0]:
def softmax(Z):
  ## rectify for numerical approximations
  
  return tf.math.exp(Z) / tf.math.reduce_sum( tf.math.exp(Z) )
  

In [0]:
softmax(tf.Variable([1.0,2.0], name="i1",dtype=tf.float32))


In [0]:
def feed_forward_model(X,parameters):

  W1 = parameters['W1']
  b1 = parameters['b1']
  W2 = parameters['W2']
  b2 = parameters['b2']
  
  print(X.shape,W1.shape,b1.shape)
  Z1=  tf.matmul(X,W1) + b1  ## first linear layer.
  
  Z2=  tf.matmul(Z1,W2) + b2  ## second softmax layer.
  
  A2= softmax(Z2)
  
  return A2

In [0]:
def loss(model, inputs, targets):
  preds = model(inputs)
  d_loss = - tf.math.reduce_sum( tf.multiply( targets , tf.cast(tf.math.log(preds),tf.float64) ) ) 
  return d_loss / targets.shape[0]

def compute_data_loss(y,y_):
  print(y.shape,y_.shape)
  ## softmax_cross_entropy_loss as data_loss
  d_loss = - tf.math.reduce_sum( tf.multiply( y , tf.cast(tf.math.log(y_),tf.float64) ) ) 
  return d_loss / y.shape[0]

def compute_reg_loss(parameters):
  W1 = parameters['W1']
  W2 = parameters['W2']
  ## L2 regularization_loss
  return tf.math.reduce_sum(tf.math.reduce_sum( tf.math.multiply(W1,W1) )) + tf.math.reduce_sum(tf.math.reduce_sum( tf.math.multiply(W2,W2) )) 


In [0]:
def get_mini_batch(x_train,y_train,batch_size):
  assert x_train.shape[0] == y_train.shape[0]
  
  for i in range(0,x_train.shape[0],batch_size):
    yield x_train[i:i+batch_size-1] ,  y_train[i:i+batch_size-1]
    

In [0]:
def grad(model, inputs, targets):
  with tf.GradientTape() as tape:
    loss_value = loss(model, inputs, targets)
  return loss_value, tape.gradient(loss_value, model.trainable_variables)


In [0]:
@tf.function
def train_model( learning_rate,regularization_param, batch_size ,epochs, x_train,y_train,x_test,y_test ):
  
  model = CustomModel(784,100,10)
  parameters = nn_model_initialize_params( 784,100,10 )
  
  num_tr_exm = x_train.shape[0]
  
  costs=[]
  seed=0
  
  for epoch in range(epochs):
    
    num_minibatches = int(num_tr_exm/ batch_size)
    epoch_cost=0 
    seed =seed+1   
    
    x_train_shuffle = tf.random.shuffle(x_train,seed=seed)  
    y_train_shuffle = tf.random.shuffle(y_train,seed=seed)  
  
    minibatches_gen = get_mini_batch(x_train,y_train,batch_size)    
    
    for _ in range(num_minibatches):
      
      x_train_batch,y_train_batch = next(minibatches_gen)
#       print("===============================================")
#       print(model(x_train_batch))
      
#       y_= model(x_train_batch)
#       y_ = feed_forward_model(x_train_batch,parameters)
      
#       data_loss = compute_data_loss(y_train_batch,y_)
#       print(data_loss)
      
#       reg_loss = compute_reg_loss(parameters)
#       print(reg_loss)
#       loss =  data_loss +  reg_loss 

      # function to pass loss as required by optimizers in tf 2.0
      def return_loss():
        return loss    

      optimizer = tf.optimizers.SGD(learning_rate=learning_rate,momentum=0.0)
      loss_value, grads = grad(model, x_train_batch, y_train_batch)

#       print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(),
#                                                 loss_value.numpy()))

      optimizer.apply_gradients(zip(grads, model.trainable_variables))

#       print("Step: {},         Loss: {}".format(optimizer.iterations.numpy(),
#                                                 loss(model,x_train_batch, y_train_batch).numpy()))
      
#       optimizer.minimize(return_loss,var_list=list(parameters.values()))
      
      epoch_cost += loss_value.numpy() / num_minibatches

#       print([x.name for x in model.trainable_variables])
#       print("===============================================")
      
    if epoch % 10 ==0:
      print("Cost after epoch",str(epoch) ," is ",str(epoch_cost))
      
      

In [0]:
learning_rate = 0.01
regularization_param= 1
batch_size=64
epochs=100

train_model( learning_rate,regularization_param, batch_size ,epochs, x_train,y_train,x_test,y_test )

Cost after epoch 40  is  4.680719354165296
Cost after epoch 50  is  4.6753166315542956
Cost after epoch 60  is  4.671351144846155
