# Part B (Neural Network from Scratch)

You need to implement a neural network from scratch .This is a multiclass classification problem. No. of hidden layers depends on you but should be atleast 2.Remember to use activation function. You can add any other function of your choice.

In [87]:
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [88]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target
y=y.reshape(-1,1)

In [89]:
print(X.shape,y.shape)

(150, 4) (150, 1)


In [90]:

X = X / np.max(X, axis=0)
Y = np.zeros((y.size, y.max() + 1))
Y[np.arange(y.size), y.flatten()] = 1



X_train, X_dev, Y_train, Y_dev = train_test_split(X, Y, test_size=0.2, random_state=42)

X_train = X_train.T
X_dev = X_dev.T
Y_train = Y_train.T
Y_dev = Y_dev.T


In [91]:
def initialize(n_x, n_h1,n_h2, n_y):
    w1 = np.random.randn(n_h1, n_x)*0.01
    b1 = np.zeros((n_h1, 1))
    w2 = np.random.randn(n_h2, n_h1)*0.01
    b2 = np.zeros((n_h2, 1))

    w3 = np.random.randn(n_y, n_h2)*0.01
    b3 = np.zeros((n_y, 1))

    parameters = {
        "w1" : w1,
        "b1" : b1,
        "w2" : w2,
        "b2" : b2,
        "w3" : w3,
        "b3" : b3
    }

    return parameters

In [92]:
#activation functions
def tanh(x):
    return np.tanh(x)

def relu(x):
    return np.maximum(x, 0)

def softmax(x):
    expX = np.exp(x - np.max(x, axis=0, keepdims=True))  # Stability improvement
    return expX / np.sum(expX, axis=0, keepdims=True)
def derivative_tanh(x):
    return (1 - np.power(np.tanh(x), 2))

def derivative_relu(x):
    return np.array(x > 0, dtype = np.float32)

In [93]:
def forward(x, parameters):

    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    w3 = parameters['w3']
    b3 = parameters['b3']

    z1 = np.dot(w1, x) + b1
    a1 = relu(z1)
    z2 = np.dot(w2, a1) + b2
    a2 = relu(z2)

    z3 = np.dot(w3, a2) + b3
    a3 = softmax(z3)

    forward_cache = {
        "z1" : z1,
        "a1" : a1,
        "z2" : z2,
        "a2" : a2,
        "z3" : z3,
        "a3" : a3
    }

    return forward_cache

In [94]:
def cost_function(a3, y):
    m = y.shape[1]
    cost = -(1/m) * np.sum(y * np.log(a3 + 1e-8))  # Adding epsilon to avoid log(0)
    return cost

In [95]:
# use Gradient descent as of now as an optimizer

In [96]:
def backward(x, y, parameters, forward_cache):

    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    w3 = parameters['w3']
    b3 = parameters['b3']

    a1 = forward_cache['a1']
    a2 = forward_cache['a2']
    a3 = forward_cache['a3']

    m = x.shape[1]

    dz3 = (a3 - y)
    dw3 = (1/m)*np.dot(dz3, a2.T)
    db3 = (1/m)*np.sum(dz3, axis = 1, keepdims = True)

    dz2 = (1/m)*np.dot(w3.T, dz3)*derivative_relu(a2)
    dw2 = (1/m)*np.dot(dz2, a1.T)
    db2 = (1/m)*np.sum(dz2, axis = 1, keepdims = True)

    dz1 = (1/m)*np.dot(w2.T, dz2)*derivative_relu(a1)
    dw1 = (1/m)*np.dot(dz1, x.T)
    db1 = (1/m)*np.sum(dz1, axis = 1, keepdims = True)

    gradients = {
        "dw1" : dw1,
        "db1" : db1,
        "dw2" : dw2,
        "db2" : db2,
        "dw3" : dw3,
        "db3" : db3
    }

    return gradients

In [97]:
def update_parameters(parameters, gradients, learning_rate):

    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    w3 = parameters['w3']
    b3 = parameters['b3']

    dw1 = gradients['dw1']
    db1 = gradients['db1']
    dw2 = gradients['dw2']
    db2 = gradients['db2']
    dw3 = gradients['dw3']
    db3 = gradients['db3']

    w1 = w1 - learning_rate*dw1
    b1 = b1 - learning_rate*db1
    w2 = w2 - learning_rate*dw2
    b2 = b2 - learning_rate*db2
    w3 = w3 - learning_rate*dw3
    b3 = b3 - learning_rate*db3

    parameters = {
        "w1" : w1,
        "b1" : b1,
        "w2" : w2,
        "b2" : b2,
        "w3" : w3,
        "b3" : b3
    }

    return parameters

In [98]:
def model(x, y, n_h1,n_h2, learning_rate, iterations):

    n_x = x.shape[0]
    n_y = y.shape[0]

    cost_list = []

    parameters = initialize(n_x, n_h1,n_h2, n_y)

    for i in range(iterations):

        forward_cache = forward(x, parameters)

        cost = cost_function(forward_cache['a3'], y)

        gradients = backward(x, y, parameters, forward_cache)

        parameters = update_parameters(parameters, gradients, learning_rate)

        cost_list.append(cost)

        if(i%10 == 0):
            print("Cost after", i, "iterations is :", cost)

    return parameters, cost_list

In [99]:
iterations = 100
n_h1 = 1000
n_h2=1000
learning_rate = 0.02
p, Cost_list = model(X_train, Y_train, n_h1 = n_h1,n_h2=n_h2 ,learning_rate = learning_rate, iterations = iterations)

Cost after 0 iterations is : 1.0983014896453809
Cost after 10 iterations is : 1.0982534403759192
Cost after 20 iterations is : 1.0982085519501867
Cost after 30 iterations is : 1.0981664340661972
Cost after 40 iterations is : 1.098126739782505
Cost after 50 iterations is : 1.0980891613390402
Cost after 60 iterations is : 1.0980534251422724
Cost after 70 iterations is : 1.0980193177351605
Cost after 80 iterations is : 1.097986629746606
Cost after 90 iterations is : 1.0979551857236558


In [None]:
#write down the predictions and the f1 score finally

In [101]:
def predict(x, parameters):
    forward_cache = forward(x, parameters)
    predictions = np.argmax(forward_cache['a3'], axis=0)
    return predictions

In [102]:
from sklearn.metrics import f1_score
pred_dev = predict(X_dev, p)
Y_dev_labels = np.argmax(Y_dev, axis=0)
f1 = f1_score(Y_dev_labels, pred_dev, average='macro')
print("F1 score on the development set:", f1)
print("Predictions on the development set:", pred_dev)

F1 score on the development set: 0.15384615384615383
Predictions on the development set: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [103]:
def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size
accuracy_dev = get_accuracy(pred_dev, Y_dev)
print(f"Development set accuracy: {accuracy_dev:.4f}")

Development set accuracy: 0.3333
