## BACKPROPAGATION 

In [4]:
import numpy as np 
import pandas as pd 
#import math
from numpy import genfromtxt
#from random import seed
from random import random as rn

In [5]:
# Loading dataset
data_frame = pd.read_csv("train_data.csv", header=None)
labels_frame = pd.read_csv("train_labels.csv", header=None)

labels=np.array(labels_frame)
data=np.array(data_frame)

count = len(data)

In [6]:
data = np.append(data, labels, axis=1)#Attaching labels at the end of train data
data.shape

(24754, 788)

In [7]:
#Shuffling rows before splitting
np.random.shuffle(data)

In [8]:
# Defining data splitting function
def dataSplit(data, split_at):
    np.random.shuffle(data)
    return data[:split_at,:] , data[split_at:,:]

In [9]:
#Splitting data into train and test sets
train_count = round(count*0.9)
#print(test_count + train_count)
train_data , test_data = dataSplit(data, train_count)
print("test:", len(test_data))
print("train:", len(train_data))

# validating the train data further
valid_count = round(train_count*0.2)
valid_data, train_data = dataSplit(train_data, valid_count)
print("train final:", len(train_data))
print("validation:", len(valid_data))

test: 2475
train: 22279
train final: 17823
validation: 4456


In [10]:
train_data[:,-4:]

array([[0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       ...,
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 1., 0., 0.]])

In [11]:
X_train=train_data[:, :784]
y_train=train_data[:,-4:]
X_test = test_data[:, :784]
y_test=test_data[:,-4:]

In [12]:
n_inputs = 784
hidden_nodes =  260
n_outputs = 4 

wh = np.random.rand(n_inputs,hidden_nodes)
bh = np.random.randn(hidden_nodes)

wo = np.random.rand(hidden_nodes,n_outputs)
bo = np.random.randn(n_outputs)
lr = 0.01


In [14]:
# function named sigmoid() that implements the sigmoid equation.
def sigmoid(x):
    return np.where(x >= 0, 
                    1 / (1 + np.exp(-x)), 
                    np.exp(x) / (1 + np.exp(x)))
#1/(1+np.exp(-x))

def sigmoid_der(x):
    return sigmoid(x) *(1-sigmoid (x))
# function named softmax() that implements the sigmoid equation.
def softmax(x):
    #expA = np.exp(A)
    #return expA / expA.sum(axis=1, keepdims=True)
    z = x - np.max(x, axis=-1, keepdims=True)
    numerator = np.exp(z)
    denominator = np.sum(numerator, axis=-1, keepdims=True)
    softmax = numerator / denominator
    return softmax


In [16]:
error_cost = []

for epoch in range(700):
# Forward propagate input to a network output

    # Phase 1
    #Calculating input to hidden layer
    zh = np.dot(X_train, wh) + bh
    #Output of hidden layer
    ah = np.tanh(zh)

    # Phase 2
    #Input to output layer
    zo = np.dot(ah, wo) + bo
    #Output of output layer
    ao = softmax(zo)
    
# Back Propogation  
    #Differentiating cost wrt weights so that weights can be updated
    #Differentiating cost wrt to output layer weights in order to update them in the end.
    dcost_dzo = ao - y_train
    dzo_dwo = ah

    dcost_wo = np.dot(dzo_dwo.T, dcost_dzo)

    dcost_bo = dcost_dzo
    
    dzo_dah = wo
    dcost_dah = np.dot(dcost_dzo , dzo_dah.T)
    dah_dzh = 1-np.power(ah,2)
    dzh_dwh = X_train
    dcost_wh = np.dot(dzh_dwh.T, dah_dzh * dcost_dah)
    
    dcost_bh = dcost_dah * dah_dzh

    # # Update network weights
    
    wh -= lr * dcost_wh
    bh -= lr * dcost_bh.sum(axis=0)

    wo -= lr * dcost_wo
    bo -= lr * dcost_bo.sum(axis=0)
    
    #Printing loss function value after every 50 epochs
    if epoch % 50 == 0:
        loss = np.sum(-np.log(np.sum(y_train * ao, axis=1)+ 1e-12) )
        print('Loss function value: ', loss)
        error_cost.append(loss)

  This is separate from the ipykernel package so we can avoid doing imports until


Loss function value:  357020.42383890785
Loss function value:  357020.42383890785
Loss function value:  286178.6345199053
Loss function value:  371416.18584030715
Loss function value:  374952.95654314617
Loss function value:  357020.42383890785
Loss function value:  374013.5018252045
Loss function value:  371416.18584030715
Loss function value:  374952.95654314617
Loss function value:  374952.95654314617
Loss function value:  371416.18584030715
Loss function value:  374013.5018252045
Loss function value:  374013.5018252045
Loss function value:  357020.42383890785


In [20]:
zh = np.dot(X_test, wh) + bh
ah = np.tanh(zh)

# Phase 2
# Make a prediction with a network
zo = np.dot(ah, wo) + bo
ao = softmax(zo)

  This is separate from the ipykernel package so we can avoid doing imports until


In [26]:
#Creating an array of max values per row.
rowmax = np.amax(ao, 1).reshape(len(ao), 1)

#Matching the shape of rowmax with softmax outputs
rowmax = rowmax.reshape(len(rowmax),1)
rowmax = np.append(rowmax, rowmax, axis=1)
rowmax = np.append(rowmax, rowmax, axis=1)

#Saving as one hot labels
predictions = (ao == rowmax).astype(np.uint8)




In [32]:
def accuracy(y_true, y_pred):
    if not (len(y_true) == len(y_pred)):
        print('Size of predicted and true labels not equal.')
        return 0.0

    corr = 0
    for i in range(0,len(y_true)):
        corr += 1 if (y_true[i] == y_pred[i]).all() else 0

    return corr/len(y_true)

In [33]:
Accuracy = accuracy(y_test, y_pred)

In [36]:
Accuracy

96.61712439418416

In [37]:
from numpy import savetxt, loadtxt

savetxt('w1.csv', wh,delimiter=',')
savetxt('b1.csv', bh,delimiter=',')
savetxt('w2.csv', wo,delimiter=',')
savetxt('b2.csv', bo,delimiter=',')

### References
1. https://stackabuse.com/creating-a-neural-network-from-scratch-in-python-multi-class-classification/

In [None]:
def initialize_network(n_inputs, n_hidden, n_outputs):
    network = list()
    hidden_layer = [{'weights':[rn() for i in range(n_inputs)]} for i in range(n_hidden)]
    network.append(hidden_layer)
    output_layer = [{'weights':[rn() for i in range(n_hidden)]} for i in range(n_outputs)]
    network.append(output_layer)
    return network

In [None]:
network = initialize_network(4, 2, 3)
print(network)

In [None]:
network

In [None]:
# Initialize a network
def initialize_network(n_inputs, n_hidden, n_outputs):
    
    hidden_layer = [{'weights':[rn() for i in range(n_inputs)]} for i in range(n_hidden)]
    network.append(hidden_layer)
    output_layer = [{'weights':[rn() for i in range(n_hidden)]} for i in range(n_outputs)]
    network.append(output_layer)
    return network
 
seed(1)
#network = initialize_network(4, 2, 3)
#for layer in network:
#    print(layer)

In [None]:
# Calculate neuron activation for an input
def activate(weights, inputs):
    activation = 0
    for i in range(len(weights)-1):
        activation += weights[i] * inputs[i]
    return activation

In [None]:
# Transfer neuron activation
def transfer(z):
    return np.exp(z) / np.sum(np.exp(z),axis=0, keepdims=True)

In [None]:
# Forward propagate input to a network output
def forward_propagate(network, row):
    inputs = row
    for layer in network:
        new_inputs = []
        for neuron in layer:
            activation = activate(neuron['weights'], inputs)
            neuron['output'] = transfer(activation) # activation ko o/p mai transfer kiya
            new_inputs.append(neuron['output'])
        inputs = new_inputs
    return inputs

In [None]:
# Calculate the derivative of an neuron output
def transfer_derivative(output):
    return output * (1.0 - output)

In [None]:
# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
    for i in reversed(range(len(network))):
        layer = network[i]
        errors = list()
        if i != len(network)-1:
            for j in range(len(layer)):
                error = 0.0
                for neuron in network[i + 1]: #o/p layer ki neurons ki baat ho ri hai[0,1]
                    error += (neuron['weights'][j] * neuron['delta']) #j is frst neuron
                errors.append(error)
        else:
            for j in range(len(layer)):
                neuron = layer[j]
                errors.append(expected[j] - neuron['output'])
        for j in range(len(layer)):
            neuron = layer[j]
            neuron['delta'] = errors[j] * transfer_derivative(neuron['output']) #o/p layer ke neurons ka delta
            
   

In [None]:
# Update network weights with error
def update_weights(network, row, l_rate):
    for i in range(len(network)):
        inputs = row[:-1]
        if i != 0:
            inputs = [neuron['output'] for neuron in network[i - 1]]
        for neuron in network[i]:
            for j in range(len(inputs)):
                neuron['weights'][j] += l_rate * neuron['delta'] * inputs[j]
            neuron['weights'][-1] += l_rate * neuron['delta'] # for bias

#forward_propagate(network, [1,2,3,4])
#backward_propagate_error(network, [0,1,2])
#update_weights(network, [1,2,3,4],0.1)
#for layer in network:
 #   print(layer)         

In [None]:
# Train a network for a fixed number of epochs
def train_network(network, X_train, l_rate, n_epoch, y_train):
    for epoch in range(n_epoch):
        sum_error = 0
        for row in X_train:
            outputs = forward_propagate(network, row)
            expected = [0 for i in range(y_train)]
            #expected[row[-1]] = 1
            sum_error += sum([(expected[i]-outputs[i])**2 for i in range(len(expected))])
            backward_propagate_error(network, expected)
            update_weights(network, row, l_rate)
        print('>epoch=%d, lrate=%.1f, error=%.3f' % (epoch, l_rate, sum_error))

In [None]:
# Make a prediction with a network
def predict(network, row):
    outputs = forward_propagate(network, row)
    return outputs.index(max(outputs))

In [None]:
# gyy

n_outputs = len(set([row[-1] for row in X_train]))
n_inputs = len(X_train[0]) - 1
network = initialize_network(n_inputs, 3, n_outputs)
train_network(network, X_train, 0.1, 5, n_outputs)
print(predict(network, X_valid[0]))

result_arr=[]
for row in X_valid:
    result=predict(network,row)
    result_arr.append(result)
print(result_arr)
print(len(result_arr))
#accuracy = (result_arr == y_valid).all(axis=(0,2)).mean()
print(accuracy_score(np.array(y_valid), np.array(result_arr)[:, 2, :]))  

In [None]:
def sigmoid(x):
   p(-x))

def sigmoid_der(x):
    return sigmoid(x) *(1-sigmoid (x))

def softmax(A):
    expA = np.exp(A)
    return expA / expA.sum(axis=1, keepdims=True)

In [None]:
hidden_nodes= 50
bh = np.random.randn(1)
wh = np.random.rand(784,hidden_nodes)
wo = np.random.rand(hidden_nodes,4)
bo = np.random.randn(4)
for epoch in range(50000):
## feedforward

    # Phase 1
    zh = np.dot(train_data, wh) + bh
    ah = sigmoid(zh)

    # Phase 2
    zo = np.dot(ah, wo) + bo
    ao = softmax(zo)