<a href="https://colab.research.google.com/github/BNarayanaReddy/CS7015/blob/main/BackPropImplementation_Assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

In [2]:
height = np.random.normal(loc = 168, scale = 10, size=50) # average 168, deviation 10, size 50
weight = np.random.normal(loc = 68, scale = 5, size=50)

In [3]:
bmi = weight / (height/100)**2

In [4]:
labels = (bmi >= 25).astype(int) # obese = 1 else 0

### Network - Multi Label Classification
Input (shape: 50, 2) \
      || \
Layer1 (Neurons = 5) \
|| \
Layer 2 (Neurons = 5) \
|| \
Output Layer (Neurons = 1)

In [5]:
height = height.reshape(-1, 1)
weight = weight.reshape(-1, 1)

In [6]:
X = np.concatenate([height/np.max(height), weight/np.max(weight)], axis = 1)

In [7]:
Y = np.copy(labels).reshape(1,-1)

In [8]:
X[0]

array([0.9951731 , 0.77894804])

In [9]:
Y.shape

(1, 50)

In [10]:
# weights = [  [<Weights of layer 1, unit 1>, <Weights of layer 1, unit 2>...], [<Weights of layer 2, unit 1>, <Weights of layer 2, unit 2>...]        ]

In [11]:
def initialize_weights(units, layers = 2, input_dim = 2):
  weights = {}
  biases = {}
  # Hidden layers
  for layer in range(1, layers+1):
    if layer == 1:
      weights[layer] = np.random.randn(units, input_dim)
      biases[layer] = np.random.randn(1, units)
    else:
      weights[layer] = np.random.randn(units, units)
      biases[layer] = np.random.randn(1, units)

  # Output layer
  weights[layers+1] = np.random.randn(1, units)
  biases[layers+1] = np.random.randn(1, 1)

  return weights, biases

In [12]:
weights, biases = initialize_weights(5, 2)

In [13]:
weights[1].shape, weights[2].shape, weights[3].shape, biases[1].shape, biases[2].shape, biases[3].shape

((5, 2), (5, 5), (1, 5), (1, 5), (1, 5), (1, 1))

In [14]:
def activation(z, act_fn):
  if act_fn == 'relu':
    return np.maximum(0,z)
  if act_fn == 'sigmoid':
    return 1/(1+np.exp(-z))

In [15]:
def forward_prop(X, weights, biases, hidden_activation='relu', output_activation = 'sigmoid'):
  # m, n = X.shape
  a_op = {}
  h_op = {}
  h_op[0] = X
  # hidden
  for i in range(1, len(weights)):
    w = weights[i]
    b = biases[i]
    a_op[i] = np.matmul(h_op[i-1], w.T) + b
    h_op[i] = activation(a_op[i], hidden_activation)
  # output
  a_op[len(weights)] = np.dot(weights[len(weights)], h_op[len(weights)-1].T) + biases[len(weights)]
  h_op[len(weights)] = activation(a_op[len(weights)], output_activation)

  return a_op, h_op

In [16]:
a_op, h_op = forward_prop(X, weights, biases)

In [17]:
h_op[2].shape

(50, 5)

In [18]:
def compute_activation_gradient(a, activ_fn):
  grad = np.zeros(a.shape)
  if activ_fn == 'relu':
    grad[a > 0] = 1
  if activ_fn == 'sigmoid':
    return a*(1-a)
  return grad

In [19]:
a_op[2].shape

(50, 5)

In [20]:
def backpropagation(X, Y, weights, biases, y_pred, output_activation='sigmoid', hidden_activation='relu'):
  a_op, h_op = y_pred
  op_layer = 3
  output_gradient = h_op[op_layer] - Y

  grad_w = {}
  grad_b = {}

  for layer in range(op_layer, 0, -1):
    # print("Layer: ", layer)
    grad_w[layer] = np.dot(output_gradient, h_op[layer-1])
    grad_b[layer] = np.sum(output_gradient, axis=1) # 1, 10

    hidden_grad = np.dot(weights[layer].T, output_gradient)

    # print(compute_activation_gradient(a_op[layer], hidden_activation).shape)

    prev_op = a_op[layer]
    aggregate_grad = hidden_grad * compute_activation_gradient(h_op[layer-1].T, hidden_activation)


    output_gradient = aggregate_grad

  return grad_w, grad_b



In [21]:
y_pred = forward_prop(X, weights, biases)
grad_w, grad_b = backpropagation(X, Y, weights, biases, y_pred)

In [22]:
grad_w

{3: array([[ 0.        ,  0.        ,  0.        , 32.82506231,  0.        ]]),
 2: array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        , 45.57962883,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ]]),
 1: array([[ 0.        ,  0.        ],
        [33.54825726, 30.52893006],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ]])}

In [23]:
grad_b

{3: array([23.47061848]),
 2: array([ 0.        ,  0.        ,  0.        , 21.21796642,  0.        ]),
 1: array([ 0.        , 36.06543979,  0.        ,  0.        ,  0.        ])}

In [24]:
def compute_cost(Y_pred, Y):
  return np.mean(-Y*np.log10(Y_pred + 1e-8)-(1-Y)*np.log10(1-Y_pred + 1e-8))

In [25]:
compute_cost(h_op[3], Y)

np.float64(0.5768520859706695)

In [26]:
def fit(X, Y, weights, biases, epochs = 100, lr = 1e-3, output_activation='sigmoid', hidden_activation='relu'):
  for epoch in range(epochs):
    y_pred = forward_prop(X, weights, biases, hidden_activation, output_activation)

    if epoch%1== 0:
      print("Loss:", compute_cost(y_pred[1][3], Y))

    grad_w, grad_b = backpropagation(X, Y, weights, biases, y_pred, output_activation, hidden_activation)

    for layer in range(1, len(weights)+1):
      weights[layer] -= lr*grad_w[layer]
      # print(grad_b[layer].shape)
      biases[layer] -= lr*grad_b[layer]

  return weights, biases



In [27]:
weights, biases = fit(X, Y, weights, biases)

Loss: 0.5768520859706695
Loss: 0.5163519594532013
Loss: 0.471806917037609
Loss: 0.4382592567990767
Loss: 0.41242398295925964
Loss: 0.3921184679207342
Loss: 0.3758740084109058
Loss: 0.3628304682515296
Loss: 0.353518629861383
Loss: 0.34885266193438896
Loss: 0.3471226179877796
Loss: 0.3456323351143461
Loss: 0.34433185140494416
Loss: 0.3431514000834311
Loss: 0.3419983782117192
Loss: 0.3408722138059535
Loss: 0.3397723421451993
Loss: 0.33869820594590183
Loss: 0.3376458418892557
Loss: 0.3366070742006816
Loss: 0.33559322590814117
Loss: 0.3345973028186555
Loss: 0.33360540658010435
Loss: 0.3326815178818745
Loss: 0.33177452298562476
Loss: 0.3308994412728184
Loss: 0.3300273707311132
Loss: 0.3291759367261609
Loss: 0.3283556144221894
Loss: 0.3275513875415247
Loss: 0.32677363355616934
Loss: 0.32599826716128666
Loss: 0.3252457819978074
Loss: 0.32452635068550606
Loss: 0.32380413174562533
Loss: 0.3231020300255558
Loss: 0.32243516977160197
Loss: 0.3217626454059929
Loss: 0.3211096017385035
Loss: 0.3204898

In [28]:
weights, biases

({1: array([[-1.87477186,  1.34086777],
         [ 0.24821115,  0.68222708],
         [ 0.78757615, -0.46786056],
         [-0.26958889, -0.34658808],
         [-0.72239546, -1.3002343 ]]),
  2: array([[ 0.61835818, -1.09789039, -0.02260026,  0.17276919, -0.85807464],
         [ 0.3094141 , -2.34233634,  0.26246131, -0.36688779,  0.46450373],
         [ 0.0165342 , -0.84138135,  0.88338392,  0.72177741, -1.13714483],
         [ 2.84969247,  1.4391159 ,  1.20722057, -0.39787982, -1.01522527],
         [-0.13450709, -1.02466944,  0.09985751, -0.68842921, -1.0032862 ]]),
  3: array([[-0.05588111, -0.14075806, -0.3356481 ,  0.79467664, -0.67567899]])},
 {1: array([[-2.68154548,  0.77417915, -0.84192262, -0.33328873,  0.3916558 ]]),
  2: array([[ 0.84034847,  1.38223547, -2.60514503, -2.39058654, -0.56303132]]),
  3: array([[-0.00416519]])})

In [30]:
import pickle

def serialize(weights, biases, filename='/content/mlp.pkl'):
    model = {'weights': weights, 'biases': biases}
    with open(filename, 'wb') as file:
        pickle.dump(model, file)

In [32]:
serialize(weights, biases)

In [33]:
!ls

mlp.pkl  sample_data
