In [59]:
import numpy as np
from numpy.random import MT19937
from numpy.random import RandomState, SeedSequence
rs = RandomState(MT19937(SeedSequence()))
np.random.seed(12345679)


toy_test = open("data/toy/testNN.txt", "r")
toy_train = open("data/toy/trainNN.txt", "r")

# loop until end of file
test_input = []
train_input = []

for line in toy_test:
    # split line into words
    words = line.split()
    # convert words to float
    for i in range(len(words)):
        words[i] = float(words[i])
    # append words to list
    test_input.append(words)

for line in toy_train:
    # split line into words
    words = line.split()
    # convert words to float
    for i in range(len(words)):
        words[i] = float(words[i])
    # append words to list
    train_input.append(words)


# split X and Y
test_X = []
test_Y = []
train_X = []
train_Y = []

# take last column as Y
for i in range(len(test_input)):
    test_Y.append(int(test_input[i][-1]))
    test_X.append(test_input[i][:-1])

for i in range(len(train_input)):
    train_Y.append(int(train_input[i][-1]))
    train_X.append(train_input[i][:-1])

train_X = np.array(train_X)
train_Y = np.array(train_Y)
test_X = np.array(test_X)
test_Y = np.array(test_Y)


# normalize with mean and std
train_X = (train_X - np.mean(train_X, axis=0)) / np.std(train_X, axis=0)
test_X = (test_X - np.mean(test_X, axis=0)) / np.std(test_X, axis=0)

In [60]:
# backprop algo implementation


def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_der(z):
    return sigmoid(z)*(1-sigmoid(z))

def ReLU(z):
    return np.maximum(0,z)

# DERIVATIVE OF ReLu FUNCTION
def ReLU_der(z):
    return np.array(z>0, dtype=float)

def cost_der(a,y):
    return (a-y)


def backprop(B,W,structure,x,y):
    
    delJdelB = [np.zeros(b.shape) for b in B]
    delJdelW = [np.zeros(w.shape) for w in W]

    Z = []
    A = []
    # ---------- forward pass ----------   
    x = np.array(x).reshape(len(x),1)
    for b, w in zip(B, W):
        if len(Z) == 0:
            Z.append(np.dot(w.T,x) + b)
        else:
            Z.append(np.dot(w.T, A[-1]) + b)

        A.append(ReLU(Z[-1])) 
    

    H = len(structure)-2 # number of hidden layers

    for l in range(H,-1,-1):
    
        if l == H:
            delJdelB[l] = cost_der(A[l], y) * ReLU_der(Z[l])
        else:
            delJdelB[l] = ReLU_der(Z[l]) * np.dot( W[l+1],delJdelB[l+1]) 
    
        if l == 0:
            delJdelW[l] = np.dot(x.T,delJdelB[l-1])
        else:
            delJdelW[l] = np.dot(A[l-1],delJdelB[l].T) #doubt
    

    return delJdelW, delJdelB


def gradient_descent(B,W,structure,mini_batch, eta) :
    delJdelB = [np.zeros(b.shape) for b in B]
    delJdelW = [np.zeros(w.shape) for w in W]

    for x, y in mini_batch:
        dJdw, dJdb = backprop(B,W,structure,x, y)
        delJdelB = [b + dJdb[i] for i, b in enumerate(delJdelB)]
        delJdelW = [w + dJdw[i] for i, w in enumerate(delJdelW)]
    
    B = [b - (eta/len(mini_batch)) * dJdb[i] for i, b in enumerate(B)]
    W = [w - (eta/len(mini_batch)) * dJdw[i] for i, w in enumerate(W)]


def train(structure,train_X, train_Y, epochs, eta):
    
    B = [np.random.randn(l, 1) for l in structure[1:]] 
    W = [np.random.randn(l, next_l) for l, next_l in zip(structure[:-1], structure[1:])]

    for i in range(epochs):
        mini_batch = list(zip(train_X, train_Y))
        np.random.shuffle(mini_batch)
        mini_batch = mini_batch[:len(train_X)]
        gradient_descent(B,W,structure,mini_batch, eta)

    return B, W


num_of_class = len(set(test_Y))
num_of_input_features = len(test_X[0])
        
neu_net = [num_of_input_features,5,num_of_class]
B, W = train(neu_net, train_X, train_Y, 100, 0.1)


# test the model

# get the prediction from given weights and biases and input
def predict(B,W,x):
    Z = []
    A = []


    # make X a column vector
    x = np.array(x).reshape(len(x),1)

    for b, w in zip(B, W):
        if len(Z) == 0:
            Z.append(np.dot(w.T,x) + b)
        else:
            Z.append(np.dot(w.T, A[-1]) + b)

        A.append(ReLU(Z[-1])) 
    
    return A[-1]


y = predict(B,W,test_X[0]) 

y_pred = []
for x in test_X:
    y_pred.append(np.argmax(predict(B,W,x))+1)
    #y_pred.append(predict(B,W,x))

# print accuracy
accuracy = 0
for i in range(len(test_Y)):
    if test_Y[i] == y_pred[i]:
        accuracy += 1

print("Accuracy", accuracy/len(test_Y))

Accuracy 0.508
