In [126]:
import numpy as np

#**Initialize Inputs**

In [127]:
X1=np.array([[2],[4],[2]])
X2=np.array([[4],[2],[4]])
X=np.array([X1,X2], dtype=object)
y1 = 2
y2 = 4
Y=np.array([y1,y2], dtype=object)

#**Initialize Weights**

In [128]:
W1=np.transpose(np.array([[0.12,0.24],[0.32,0.44],[0.52,0.44]]))
W2=np.transpose(np.array([[2.15,2.25,2.35],[4.45,4.55,4.65]]))
W3=np.transpose(np.array([[8.12,8.22],[8.32,8.42],[8.52,8.62]]))
W4=np.transpose(np.array([[-1.84],[-1.64]]))
W = np.array([W1, W2, W3, W4], dtype=object)

#**Initialize Bias**

In [129]:
B1=np.array([[0.21],[0.42]])
B2=np.array([[6.15],[6.25],[6.35]])
B3=np.array([[0.52],[0.62]])
B4=2.26
B = np.array([B1, B2, B3, B4], dtype=object)

#**Required Functions**

In [130]:
#Weighted Sum
def ws(x,w,b):
  s=np.dot(w,x)+b
  return s

In [131]:
def ReLU(x):
  if x>0:
    return x
  else:
    return 0

In [132]:
def sigmoid(x):
  return 1./(1+np.exp(-x))

In [133]:
def loss(y_hat,y):
  return 0.5*(y_hat-y)**2

In [161]:
def grad_b(X,y,y_hat,W,B,S):
  e=(y_hat-y)
  db4=e*np.heaviside(S[3],1)
  db3=np.transpose(W[3])*db4*(1-(np.tanh(S[2].astype(float)))**2)
  db2=np.dot(np.transpose(W[2]),db3)*sigmoid(S[1])*(1-sigmoid(S[1]))
  db1=np.dot(np.transpose(W[1]),db2)*(1-(np.tanh(S[0].astype(float)))**2)
  return db1,db2,db3,db4

In [162]:
def grad_w(X,y,y_hat,W,B,S):
  db1,db2,db3,db4=grad_b(X,y,y_hat,W,B,S)
  e=(y_hat-y)
  dw4=np.dot(db4,np.transpose(np.tanh(S[2].astype(float))))
  dw3=np.dot(db3,np.transpose(sigmoid(S[1])))
  dw2=np.dot(db2,np.transpose(np.tanh(S[2].astype(float))))
  dw1=np.dot(db1,np.transpose(X))
  return dw1,dw2,dw3,dw4

#**Feed Forward**

In [158]:
def forward(X,W,B):

  S1=ws(X,W[0],B[0])
  a1=np.tanh(S1.astype(float))
  
  S2=ws(a1,W[1],B[1]) 
  a2=sigmoid(S2)
    
  S3=ws(a2,W[2],B[2])
  a3=np.tanh(S3.astype(float))

  S4=ws(a3,W[3],B[3])
  a4=ReLU(S4)
  y_hat=a4

  S = np.array([S1, S2, S3, S4], dtype=object)

  return y_hat,S

#**Back Propagation**

In [163]:
def Backpropagation(X,Y,W,B,learning_rate=0.1):
  for i in range(2):
    y_hat,S=forward(X[i],W,B)
    db1,db2,db3,db4=grad_b(X[i],Y[i],y_hat,W,B,S)
    dw1,dw2,dw3,dw4=grad_w(X[i],Y[i],y_hat,W,B,S)
    W[3]=W[3]-learning_rate*dw4
    B[3]=B[3]-learning_rate*db4

    W[2]=W[2]-learning_rate*dw3
    B[2]=B[2]-learning_rate*db3

    W[1]=W[1]-learning_rate*dw2
    B[1]=B[1]-learning_rate*db2

    W[0]=W[0]-learning_rate*dw1
    B[0]=B[0]-learning_rate*db1

  return W,B

In [174]:
W,B=Backpropagation(X,Y,W,B)
W_names=['W1','W2','W3','W4']
B_names=['B1','B2','B3','B4']
for i in range(4):
  if i==0 : print('Weights:')
  print(f'{W_names[i]} = {W[i]}\n')
for i in range(4): 
  if i==0 : print('Bias:') 
  print(f'{B_names[i]} = {B[i]}\n')

Weights:
W1 = [[0.12 0.32 0.52]
 [0.24 0.44 0.44]]

W2 = [[2.15 4.45]
 [2.25 4.55]
 [2.35 4.65]]

W3 = [[8.12 8.32 8.52]
 [8.22 8.42 8.62]]

W4 = [[-1.84 -1.64]]

Bias:
B1 = [[0.21]
 [0.42]]

B2 = [[6.15]
 [6.25]
 [6.35]]

B3 = [[0.52]
 [0.62]]

B4 = [[2.26]]

