build a neural network model from scratch to classify the Iris data set

1. Load Iris Data Set and pre-processing

In [1]:
import numpy as np 
import pandas as pd

iris = pd.read_csv('Iris.csv')
iris = iris.sample(frac=1).reset_index(drop=True)
X = iris[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
X = np.array(X)

from sklearn.preprocessing import OneHotEncoder
one_hot_encoder = OneHotEncoder(sparse=False)

Y = iris.Species
Y = one_hot_encoder.fit_transform(np.array(Y).reshape(-1, 1))
Y[:5]

array([[0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.]])

2. Training data Testing Data

In [2]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test= train_test_split(X,Y,test_size = 0.2,random_state=0)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.1)

Neural Network building : Create 3 layers: 1 Input(4 units) ,1 hidden (parameter passed during execution) and 1 Output (3 units) .
Inputs = length and widths of the species
Output = Target Classes 

w1 is weight connecting Input and the hidden layer. Each node in input layer connects to each node in the hidden layer.
w2 are the weights of connections between hidden layer and output layer.

Initial weights: Weight are randomized between -0.05 and 0.05.

Network Training : Train the network by updating the weights using backpropogation. This is the crux of the network. The layers are fed forward using sigmoid activation function. The weighs are then updated based on error using gradient descent. Set the training rate to 0.01.

In [20]:
def NeuralNetwork(X_train, Y_train, X_val=None, Y_val=None, epochs=100, nodes=[],lr = 0.01):
    hidden_layers = len(nodes) - 1
    weights = InitializeWeights(nodes)

    for epochs in range(1, epochs+1):
        weights = Train(X_train, Y_train, lr, weights)
        if(epochs % 20 == 0):
            print("epochs {}".format(epochs))
            print("Training Accuracy:{}".format(Accuracy(X_train, Y_train, weights)))
            if X_val.any():
                print("Validation Accuracy:{}".format(Accuracy(X_val, Y_val, weights)))
            
    return weights

In [4]:
def InitializeWeights(nodes):
    layers, weights = len(nodes), []
    
    for i in range(1, layers):
        w = [[np.random.uniform(-0.05, 0.05) for k in range(nodes[i-1] + 1)]
              for j in range(nodes[i])]
        weights.append(np.matrix(w))
    
    return weights

In [5]:
def ForwardPropagation(x, weights, layers):
    activations, layer_input = [x], x
    for j in range(layers):
        activation = Sigmoid(np.dot(layer_input, weights[j].T))
        activations.append(activation)
        layer_input = np.append(1, activation) # with bias
    
    return activations

def BackPropagation(y, activations, weights, layers):
    outputFinal = activations[-1]
    error = np.matrix(y - outputFinal)
    
    for j in range(layers, 0, -1):
        currActivation = activations[j]
        
        if(j > 1):
            prevActivation = np.append(1, activations[j-1])
        else:
            prevActivation = activations[0]
        
        delta = np.multiply(error, SigmoidDerivative(currActivation))
        weights[j-1] += lr * np.multiply(delta.T, prevActivation)

        w = np.delete(weights[j-1], [0], axis=1) # Remove bias from weights
        error = np.dot(delta, w)
    
    return weights

In [6]:
#Training the model:
def Train(X, Y, lr, weights):
    layers = len(weights)
    for i in range(len(X)):
        x, y = X[i], Y[i]
        x = np.matrix(np.append(1, x))
        
        activations = ForwardPropagation(x, weights, layers)
        weights = BackPropagation(y, activations, weights, layers)

    return weights

In [7]:
def Sigmoid(x):
    return 1 / (1 + np.exp(-x))

def SigmoidDerivative(x):
    return np.multiply(x, 1-x)

Neural Network Predictaion and Accuracy:

In [8]:
def Predict(item, weights):
    layers = len(weights)
    item = np.append(1, item)
    
    activations = ForwardPropagation(item, weights, layers)   
    outputFinal = activations[-1].A1
    index = FindMaxActivation(outputFinal)
    y = [0 for i in range(len(outputFinal))]
    y[index] = 1
    return y


def FindMaxActivation(output):
    m, index = output[0], 0
    for i in range(1, len(output)):
        if(output[i] > m):
            m, index = output[i], i
    
    return index

In [9]:
def Accuracy(X, Y, weights):
    correct = 0

    for i in range(len(X)):
        x, y = X[i], list(Y[i])
        guess = Predict(x, weights)

        if(y == guess):
            correct += 1

    return correct / len(X)

In [22]:
i = len(X[0]) # Number of input units
o = len(Y[0]) # Number of output units

layers = [i,5,9,10,o] # Number of units in hidden layer
lr = 0.01
epochs = 100

# Train the network for the specified number of iterations at learning rate 0.01.

weights = NeuralNetwork(X_train, Y_train, X_val, Y_val,epochs=epochs, nodes=layers, lr=lr);
print("Accuracy on test data: {}".format(Accuracy(X_test, Y_test, weights)))

epochs 20
Training Accuracy:0.3425925925925926
Validation Accuracy:0.25
epochs 40
Training Accuracy:0.3425925925925926
Validation Accuracy:0.25
epochs 60
Training Accuracy:0.3425925925925926
Validation Accuracy:0.25
epochs 80
Training Accuracy:0.3425925925925926
Validation Accuracy:0.25
epochs 100
Training Accuracy:0.3425925925925926
Validation Accuracy:0.25
Accuracy on test data: 0.3333333333333333


1. How does the number of hidden units affect the final accuracy on the test data?
observation : Accuracy is high when no.of hiden layers are increased that means ,model is overfitting the data

2. How does it affect the number of epochs needed for training to converge?
observation: As epochs increase it will result in smooth curve

In [None]:
#Reference for the above work
#http://www.wildml.com/2015/09/implementing-a-neural-network-from-scratch/
#http://neuralnetworksanddeeplearning.com/chap2.html
#https://selbydavid.com/2018/01/09/neural-network/
#https://towardsdatascience.com/neural-network-on-iris-data-4e99601a42c8