<a href="https://colab.research.google.com/github/MethEthPro/colab/blob/main/100DL/backpropagation_scratch_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

# regression problem

see this [site](https://developers-dot-devsite-v2-prod.appspot.com/machine-learning/crash-course/backprop-scroll)

## numpy implementation

In [2]:
df = pd.DataFrame([[8,8,4],[6,10,6],[7,9,5],[5,12,7]],columns=['cgpa','resume_score','lpa'])
df

Unnamed: 0,cgpa,resume_score,lpa
0,8,8,4
1,6,10,6
2,7,9,5
3,5,12,7


In [3]:
# this function gives us weights and biases
# according to the input shape
def initialise_parameters(dim):
  np.random.seed(42)

  l = len(dim)
  parameters={}

  for i in range(1,l):

    parameters['W'+str(i)] = np.ones((dim[i-1],dim[i]))*0.1
    parameters['b'+str(i)] = np.zeros((dim[i],1))

  return parameters


In [4]:
initialise_parameters([2,2,1])

{'W1': array([[0.1, 0.1],
        [0.1, 0.1]]),
 'b1': array([[0.],
        [0.]]),
 'W2': array([[0.1],
        [0.1]]),
 'b2': array([[0.]])}

In [5]:
def linear_forward(A_prev, W, b):
  z = np.dot(W.T, A_prev) + b
  return z

In [6]:
# Forward Propagation
def L_layer_forward(X, parameters):
  A=X
  L = len(parameters)//2    # no of layers in Neural Network
  for l in range(1,L+1):
    A_prev = A
    Wl = parameters['W'+str(l)]
    bl = parameters['b'+str(l)]
    # print("A" + str(l-1) + ": ",A_prev)
    # print("W" + str(l) + ": ",Wl)
    # print("b" + str(l) + ": ",bl)
    # print("--"*20)

    A = linear_forward(A_prev,Wl,bl)
    # print("A" + str(l)+": ",A)
    # print("--"*20)


  return A,A_prev



In [7]:
X = df[['cgpa','resume_score']].values[0].reshape(2,1)
X

# shape --> (no.of features, no.of training examples)

array([[8],
       [8]])

In [8]:
y = df[['lpa']].values[0][0]
y

4

In [9]:
parameters = initialise_parameters([2,2,1])

In [10]:
y_hat,A1 = L_layer_forward(X, parameters)

In [11]:
y_hat = y_hat[0][0]

In [12]:
(y-y_hat)**2

13.542399999999997

In [13]:
X

array([[8],
       [8]])

In [82]:
# this is how the parameters will get updated
# by the graidemt descent algorithm , when using MSE loss
def update_parameters(parameters, y, y_hat,A1,X):
  parameters['W2'][0][0] = parameters['W2'][0][0] + (0.001 * 2 * (y-y_hat) * A1[0][0])
  parameters['W2'][1][0] = parameters['W2'][1][0] + (0.001 * 2 * (y-y_hat) * A1[1][0])
  parameters['b2'][0][0] = parameters['b2'][0][0] + (0.01 * 2 * (y-y_hat))

  parameters['W1'][0][0] = parameters['W1'][0][0] + (0.001 * 2 * (y-y_hat) * parameters['W2'][0][0] * X[0][0])
  parameters['W1'][0][1] = parameters['W1'][0][1] + (0.001 * 2 * (y-y_hat) * parameters['W2'][0][0] * X[1][0])
  parameters['b1'][0][0] = parameters['b1'][0][0] + (0.01 * 2 * (y-y_hat) * parameters['W2'][0][0])

  parameters['W1'][1][0] = parameters['W1'][1][0] + (0.01 * 2 * (y-y_hat) * parameters['W2'][1][0] * X[0][0])
  parameters['W1'][1][1] = parameters['W1'][1][1] + (0.01 * 2 * (y-y_hat) * parameters['W2'][1][0] * X[1][0])
  parameters['b1'][1][0] = parameters['b1'][1][0] + (0.01 * 2 * (y-y_hat) * parameters['W2'][1][0])



In [15]:
update_parameters(parameters,y,y_hat,A1,X)

In [16]:
parameters

{'W1': array([[0.10658137, 0.10658137],
        [0.16581371, 0.16581371]]),
 'b1': array([[0.00822671],
        [0.00822671]]),
 'W2': array([[0.111776],
        [0.111776]]),
 'b2': array([[0.0736]])}

In [17]:
(y-y_hat)**2

13.542399999999997

In [18]:
parameters = initialise_parameters([2,2,1])
epochs = 5
for i in range(epochs):
  loss = []
  for j in range(df.shape[0]):
    X = df[['cgpa','resume_score']].values[j].reshape(2,1)
    y = df[['lpa']].values[j][0]


    # forward pass

    y_hat, A1 = L_layer_forward(X, parameters)
    y_hat = y_hat[0][0]

    # calcluate the loss
    loss.append((y-y_hat)**2)

    # update the parameters

    update_parameters(parameters,y,y_hat,A1,X)

  print("Epochs",i+1,"Loss: ",np.array(loss).mean())

parameters



Epochs 1 Loss:  20.73126980566457
Epochs 2 Loss:  2.0545530212110226
Epochs 3 Loss:  0.6360127678472741
Epochs 4 Loss:  0.6737719306590967
Epochs 5 Loss:  0.6761120722558785


{'W1': array([[0.14406399, 0.18832393],
        [0.56395062, 1.05128039]]),
 'b1': array([[0.08031501],
        [0.08574194]]),
 'W2': array([[0.27343621],
        [0.32242219]]),
 'b2': array([[0.45263366]])}

## PyTorch Implementation



In [19]:
import torch
from torch import nn

In [20]:
class myNN(nn.Module):
  def __init__(self):
    super().__init__()

    self.layer_stack = nn.Sequential(
      nn.Linear(in_features = 2, out_features = 2),
      nn.Linear(in_features = 2, out_features = 1),
    )
  def forward(self,x):
    return self.layer_stack(x)

In [21]:
mymodel1 = myNN()

In [22]:
df

Unnamed: 0,cgpa,resume_score,lpa
0,8,8,4
1,6,10,6
2,7,9,5
3,5,12,7


In [23]:
X = df.iloc[:,:-1]
X = np.array(X)
X = torch.tensor(X, dtype=torch.float32)


In [24]:
y = df.iloc[:,-1]
y = np.array(y)
y = torch.tensor(y, dtype= torch.float32)

In [25]:
y

tensor([4., 6., 5., 7.])

In [26]:
y_logits = mymodel1(X)
y_logits.squeeze()

tensor([-1.2582, -1.2561, -1.2571, -1.3035], grad_fn=<SqueezeBackward0>)

In [27]:
loss_fnc = nn.MSELoss()
optim = torch.optim.SGD(params = mymodel1.parameters(),
                        lr=0.001)

In [28]:
epochs = 20

train_loss = []
for i in range(epochs):
  mymodel1.train()
  y_logits = mymodel1(X)

  y_pred = y_logits.squeeze()


  loss = loss_fnc(y_pred,y)

  print(f"epoch {i} loss: {loss.item()}")


  optim.zero_grad()

  loss.backward()

  optim.step()


epoch 0 loss: 47.09989547729492
epoch 1 loss: 41.199180603027344
epoch 2 loss: 36.782569885253906
epoch 3 loss: 33.09516143798828
epoch 4 loss: 29.734636306762695
epoch 5 loss: 26.47848129272461
epoch 6 loss: 23.21678352355957
epoch 7 loss: 19.924358367919922
epoch 8 loss: 16.645498275756836
epoch 9 loss: 13.477514266967773
epoch 10 loss: 10.546205520629883
epoch 11 loss: 7.973599433898926
epoch 12 loss: 5.845794200897217
epoch 13 loss: 4.192354202270508
epoch 14 loss: 2.9849302768707275
epoch 15 loss: 2.1534292697906494
epoch 16 loss: 1.6100026369094849
epoch 17 loss: 1.2700273990631104
epoch 18 loss: 1.0642342567443848
epoch 19 loss: 0.9421327114105225


# Classification Problem

In [50]:
df = pd.DataFrame([[8,8,1],[7,9,1],[6,10,0],[5,5,0]], columns = ['cgpa','resume_score',"placed"])
df

Unnamed: 0,cgpa,resume_score,placed
0,8,8,1
1,7,9,1
2,6,10,0
3,5,5,0


## Numpy Implementation

In [51]:
def initialise_parameters(dim):
  np.random.seed(42)
  parameters = {}
  l = len(dim)

  for i in range(1,l):
    parameters['W'+str(i)] = np.ones((dim[i-1],dim[i]))*0.1
    parameters['b'+str(i)] = np.zeros((dim[i],1))

  return parameters

In [52]:
def sigmoid(Z):
  A = 1/(1+np.exp(Z))

  return A

In [53]:
def linear_forward(A_prev,W,b):
  Z = np.dot(W.T,A_prev) + b

  A = sigmoid(Z)
  return A

In [54]:
# Forward Propagation
def L_layer_forward(X, parameters):
  A=X
  L = len(parameters)//2    # no of layers in Neural Network
  for l in range(1,L+1):
    A_prev = A
    Wl = parameters['W'+str(l)]
    bl = parameters['b'+str(l)]
    # print("A" + str(l-1) + ": ",A_prev)
    # print("W" + str(l) + ": ",Wl)
    # print("b" + str(l) + ": ",bl)
    # print("--"*20)

    A = linear_forward(A_prev,Wl,bl)
    # print("A" + str(l)+": ",A)
    # print("--"*20)


  return A,A_prev



In [55]:
# this is how the parameters will get updated
# by the graidemt descent algorithm , when using BCE LOSS
def update_parameters(parameters, y, y_hat,A1,X):
  parameters['W2'][0][0] = parameters['W2'][0][0] + (0.001  * (y-y_hat) * A1[0][0])
  parameters['W2'][1][0] = parameters['W2'][1][0] + (0.001 * (y-y_hat) * A1[1][0])
  parameters['b2'][0][0] = parameters['b2'][0][0] + (0.01  * (y-y_hat))

  parameters['W1'][0][0] = parameters['W1'][0][0] + (0.001 * (y-y_hat) * parameters['W2'][0][0] * (1-A1[0][0]) * X[0][0])
  parameters['W1'][0][1] = parameters['W1'][0][1] + (0.001  * (y-y_hat) * parameters['W2'][0][0] * (1-A1[0][0]) * X[1][0])
  parameters['b1'][0][0] = parameters['b1'][0][0] + (0.01  * (y-y_hat) * parameters['W2'][0][0]* (1-A1[0][0]) )

  parameters['W1'][1][0] = parameters['W1'][1][0] + (0.01 * (y-y_hat) * parameters['W2'][1][0] * (1-A1[1][0]) * X[0][0])
  parameters['W1'][1][1] = parameters['W1'][1][1] + (0.01  * (y-y_hat) * parameters['W2'][1][0] * (1-A1[1][0]) * X[1][0])
  parameters['b1'][1][0] = parameters['b1'][1][0] + (0.01 * (y-y_hat) * parameters['W2'][1][0]* (1-A1[1][0]) )



In [56]:
parameters = initialise_parameters([2,2,1])
epochs = 5
for i in range(epochs):
  loss = []
  for j in range(df.shape[0]):
    X = df[['cgpa','resume_score']].values[j].reshape(2,1)
    y = df[['placed']].values[j][0]


    # forward pass

    y_hat, A1 = L_layer_forward(X, parameters)
    y_hat = y_hat[0][0]

    # calcluate the loss
    loss.append(-y*np.log(y_hat) - (1-y)*(np.log)(1-y_hat))

    # update the parameters

    update_parameters(parameters,y,y_hat,A1,X)

  print("Epochs",i+1,"Loss: ",np.array(loss).mean())

parameters



Epochs 1 Loss:  0.6897878299995169
Epochs 2 Loss:  0.6897862245084582
Epochs 3 Loss:  0.6897848612118213
Epochs 4 Loss:  0.6897837395079125
Epochs 5 Loss:  0.6897828587514063


{'W1': array([[0.10106404, 0.10066372],
        [0.11061073, 0.10660672]]),
 'b1': array([[0.00040723],
        [0.00040577]]),
 'W2': array([[0.09981084],
        [0.09981231]]),
 'b2': array([[0.00214264]])}

## PyTorch implementation

In [62]:
df

Unnamed: 0,cgpa,resume_score,placed
0,8,8,1
1,7,9,1
2,6,10,0
3,5,5,0


In [63]:
X = df.iloc[:,:-1]
X = np.array(X)
X = torch.tensor(X,dtype = torch.float32)
X

tensor([[ 8.,  8.],
        [ 7.,  9.],
        [ 6., 10.],
        [ 5.,  5.]])

In [64]:
y = df.iloc[:,-1]
y = np.array(y)
y = torch.tensor(y, dtype = torch.float32)
y

tensor([1., 1., 0., 0.])

In [77]:
class classification(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_stack = nn.Sequential(
        nn.Linear(in_features=2,out_features=2),
        nn.Sigmoid(),
        nn.Linear(in_features=2,out_features=1),
        nn.Sigmoid()
    )

  def forward(self,x):
    return self.layer_stack(x)

In [78]:
mymodel2 = classification()

In [79]:
y_logits = mymodel2(X)
y_pred = y_logits.squeeze()
y_pred

tensor([0.5200, 0.5212, 0.5199, 0.5090], grad_fn=<SqueezeBackward0>)

In [80]:
loss_fnc  = nn.BCELoss()
optim = torch.optim.SGD(params = mymodel2.parameters(),
                        lr=0.01)

In [81]:
epochs = 100

for i in range(epochs):
  mymodel2.train()
  y_logits = mymodel2(X)

  y_pred = y_logits.squeeze()

  loss = loss_fnc(y_pred,y)
  if(i%10==0):
    print(f"epoch {i} loss {loss.item()}")

  optim.zero_grad()

  loss.backward()

  optim.step()

epoch 0 loss 0.6877042651176453
epoch 10 loss 0.6875591278076172
epoch 20 loss 0.6874119639396667
epoch 30 loss 0.6872620582580566
epoch 40 loss 0.6871089339256287
epoch 50 loss 0.6869519948959351
epoch 60 loss 0.6867905259132385
epoch 70 loss 0.6866239309310913
epoch 80 loss 0.6864514946937561
epoch 90 loss 0.6862725019454956
