# Part B (Neural Network from Scratch)

You need to implement a neural network from scratch .This is a multiclass classification problem. No. of hidden layers depends on you but should be atleast 2.Remember to use activation function. You can add any other function of your choice.

In [1]:
import numpy as np
import random
import matplotlib.pyplot as plt

In [2]:
from sklearn import datasets
iris = datasets.load_iris()
x = iris.data
y = iris.target

In [3]:

x=x.T
y=np.eye(3)[y]
y=y.T
print(x.shape,y.shape)

(4, 150) (3, 150)


In [4]:
def initialize_parameters(h1,h2,nx,ny):
  w1=np.random.randn(h1,nx)
  b1=np.random.randn(h1,1)

  w2=np.random.randn(h2,h1)
  b2=np.random.randn(h2,1)

  w3=np.random.randn(ny,h2)
  b3=np.random.randn(ny,1)

  parameters={'w1':w1,'w2':w2,'w3':w3,'b1':b1,'b2':b2,'b3':b3}
  return parameters

In [5]:
#activation functions
def sigmoid(x):
    return 1/(1+np.exp(-x))

def relu(x):
    return np.maximum(x,0)

def softmax(x):
    exps = np.exp(x - np.max(x, axis=0, keepdims=True))
    return exps / np.sum(exps, axis=0, keepdims=True)

def delrelu(x):
    return np.where(x > 0, 1, 0)

In [6]:
def forward_propogation(parameters,x):
    w1=parameters['w1']
    w2=parameters['w2']
    w3=parameters['w3']
    b1=parameters['b1']
    b2=parameters['b2']
    b3=parameters['b3']
    z1=np.dot(w1,x)+b1
    a1=relu(z1)

    z2=np.dot(w2,a1)+b2
    a2=relu(z2)

    z3=np.dot(w3,a2)+b3
    a3=softmax(z3)

    forward_cache={'z1':z1,'z2':z2,'z3':z3,'a1':a1,'a2':a2,'a3':a3}
    return forward_cache

In [7]:
def cost_funct(y,a3):
  m=x.shape[1]
  cost=-(1/m)*(np.sum(y*(np.log(a3))))
  return cost

In [8]:
# use Gradient descent as of now as an optimizer

In [9]:
def backward(forward,x,y,parameters):
  w1=parameters['w1']
  w2=parameters['w2']
  w3=parameters['w3']
  b1=parameters['b1']
  b2=parameters['b2']
  b3=parameters['b3']
  z1=forward['z1']
  z2=forward['z2']
  z3=forward['z3']
  a1=forward['a1']
  a2=forward['a2']
  a3=forward['a3']
  m=x.shape[0]
  dz3=a3-y
  dw3=(1/m)*(np.dot(dz3,a2.T))
  db3=(1/m)*np.sum(dz3,axis=1,keepdims=True)
  dz2=np.dot(w3.T,dz3)*delrelu(a2)
  dw2=(1/m)*(np.dot(dz2,a1.T))
  db2=(1/m)*np.sum(dz2,axis=1,keepdims=True)

  dz1=np.dot(w2.T,dz2)*delrelu(a1)
  dw1=(1/m)*(np.dot(dz1,x.T))
  db1=(1/m)*np.sum(dz1,axis=1,keepdims=True)
  gradients={'dw1':dw1,'dw2':dw2,'dw3':dw3,'db1':db1,'db2':db2,'db3':db3}
  return gradients

In [10]:
def update_parameters(parameters,gradients,rate):
  w1=parameters['w1']
  w2=parameters['w2']
  w3=parameters['w3']
  b1=parameters['b1']
  b2=parameters['b2']
  b3=parameters['b3']
  dw1=gradients['dw1']
  dw2=gradients['dw2']
  dw3=gradients['dw3']
  db1=gradients['db1']
  db2=gradients['db2']
  db3=gradients['db3']

  w1=w1-rate*dw1
  w2=w2-rate*dw2
  w3=w3-rate*dw3
  b1=b1-rate*db1
  b2=b2-rate*db2
  b3=b3-rate*db3
  updated_parameters={'w1':w1,'w2':w2,'b1':b1,'b2':b2,'w3':w3,'b3':b3}
  return updated_parameters

In [11]:
def model(x,y,nh1,nh2,rate,epochs):
  nx=x.shape[0]
  ny=3
  cost_list=[]
  parameters=initialize_parameters(nh1,nh2,nx,ny)
  for i in range(epochs):
    forward=forward_propogation(parameters,x)
    cost=cost_funct(y,forward['a3'])
    gradients= backward(forward,x,y,parameters)
    parameters=update_parameters(parameters,gradients,rate)
    cost_list.append(cost)
    if(i%10==0):
      print("cost of",i,"iteration is",cost)
  return parameters

In [12]:
parameters=model(x,y,10,10,0.001,1000)

cost of 0 iteration is 26.134036361941547
cost of 10 iteration is 0.6500514256513615
cost of 20 iteration is 0.6044777572853169
cost of 30 iteration is 0.5701301537748824
cost of 40 iteration is 0.5378828042245007
cost of 50 iteration is 0.5072653743911468
cost of 60 iteration is 0.4826101359565066
cost of 70 iteration is 0.45771723726399477
cost of 80 iteration is 0.4309103580766657
cost of 90 iteration is 0.4226937882628065
cost of 100 iteration is 0.4042757727336497
cost of 110 iteration is 0.37731120087948944
cost of 120 iteration is 0.3903684465767258
cost of 130 iteration is 0.3509818277959368
cost of 140 iteration is 0.4209239010168922
cost of 150 iteration is 0.3038972775214937
cost of 160 iteration is 0.2902766959181574
cost of 170 iteration is 0.2776806908278326
cost of 180 iteration is 0.26606326845770745
cost of 190 iteration is 0.2553170147949226
cost of 200 iteration is 0.24535464411183794
cost of 210 iteration is 0.23610671308103592
cost of 220 iteration is 0.22751721837