# Part B (Neural Network from Scratch)

You need to implement a neural network from scratch .This is a multiclass classification problem. No. of hidden layers depends on you but should be atleast 2.Remember to use activation function. You can add any other function of your choice.

In [1]:
import numpy as np
import random
import matplotlib.pyplot as plt

In [15]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [3]:
print(X.shape,y.shape)

(150, 4) (150,)


In [4]:
def parameters(hidden_size, input_size, output_size):
    np.random.seed(42)
    W1 = np.random.randn(hidden_size, input_size) * 0.01
    b1 = np.zeros((hidden_size, 1))
    W2 = np.random.randn(output_size, hidden_size) * 0.01
    b2 = np.zeros((output_size, 1))

    parameters = {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2
    }

    return parameters

In [5]:
#activation functions
def sigmoid(X):
    A = 1 / (1 + np.exp(-X))
    return A

def relu(X):
    A = np.maximum(0, X)
    return A

def softmax(X):
    expX = np.exp(X - np.max(X, axis=0, keepdims=True))
    A = expX / np.sum(expX, axis=0, keepdims=True)
    return A

In [6]:
def forward(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    Z1 = np.dot(W1, X) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = softmax(Z2)

    cache = {
        "Z1": Z1,
        "A1": A1,
        "Z2": Z2,
        "A2": A2
    }

    return A2, cache


In [7]:
def cost_funct(A2, Y):
    m = Y.shape[1]
    logp = np.multiply(np.log(A2), Y)
    cost = - np.sum(logp) / m

    cost = np.squeeze(cost)
    return cost

In [8]:
# use Gradient descent as of now as an optimizer
def sigmoid_derivative(Z):
    s = sigmoid(Z)
    return s * (1 - s)

In [9]:
def backward(parameters, cache, X, Y):
    m = X.shape[1]

    W1 = parameters["W1"]
    W2 = parameters["W2"]

    A1 = cache["A1"]
    A2 = cache["A2"]
    Z1 = cache["Z1"]

    dZ2 = A2 - Y
    dW2 = np.dot(dZ2, A1.T) / m
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m

    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * sigmoid_derivative(Z1)
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m

    grads = {
        "dW1": dW1,
        "db1": db1,
        "dW2": dW2,
        "db2": db2
    }

    return grads

In [10]:
def update_parameters(parameters, grads, learning_rate):
    parameters["W1"] = parameters["W1"] - learning_rate * grads["dW1"]
    parameters["b1"] = parameters["b1"] - learning_rate * grads["db1"]
    parameters["W2"] = parameters["W2"] - learning_rate * grads["dW2"]
    parameters["b2"] = parameters["b2"] - learning_rate * grads["db2"]

    return parameters

In [17]:
def model(X, Y, hidden_size, num_iterations=5000, learning_rate=0.01, print_cost=False):
    input_size = X.shape[0]
    output_size = Y.shape[0]

    parameter = parameters(input_size, hidden_size, output_size)

    for i in range(num_iterations):
        A2, cache = forward(X, parameter)

        cost = cost_funct(A2, Y)

        grads = backward(parameter, cache, X, Y)

        parameter = update_parameters(parameter, grads, learning_rate)

        if print_cost and i % 1000 == 0:
            print(f"Cost after iteration {i}: {cost}")

    return parameter

In [12]:
from sklearn.metrics import f1_score

In [13]:
def predict(X, parameters):
    A2, _ = forward(X, parameters)
    predictions = np.argmax(A2, axis=0)
    return predictions

In [18]:
X = iris.data.T
Y = np.eye(3)[y].T

hidden_size = 4
parameter = model(X, Y, hidden_size, num_iterations=5000, learning_rate=0.01, print_cost=True)

predictions = predict(X, parameter)

f1 = f1_score(y, predictions, average = 'macro')
print(f"F1 Score: {f1}")

Cost after iteration 0: 1.0986655627490312
Cost after iteration 1000: 1.0900130241700494
Cost after iteration 2000: 0.8609174442356656
Cost after iteration 3000: 0.5578033333334779
Cost after iteration 4000: 0.4680899565755299
F1 Score: 0.9665831244778613
