In [1]:
import numpy as np
import pandas as pd
import math
import time
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Activation function methods are ready, which will be used in the neural network, if act_type = 0 we use log sigmoid activation function, if act_type = 1 we use tanh activation function, and last if act_type = 2 we use relu activation function.

Refer to 'Table of activation functions' from https://en.wikipedia.org/wiki/Activation_function for activation function.

In [3]:
# Will be used as activation function for hidden as well as output layer
def tanh(X):
    return np.tanh(X)

def derivative_tanh(X):
    return 1-np.power(X, 2)

Loss function if loss_type = 0 we use Cross Entropy as our Loss function and if loss_type = 1 we use Mean Squared Error as our Loss function. For more information check https://en.wikipedia.org/wiki/Loss_function.

In [4]:
def MSE(Y_train, Y):
    multiplier = 1/(2*Y_train.shape[0])
    return multiplier*np.sum(np.sum(np.square(np.subtract(Y_train, Y)), axis=0), axis=0)

Reading Training Dataset and applying required modifications on training dataset

In [None]:
start = time.time()
df_train = pd.read_csv('/content/drive/MyDrive/Devnagri Dataset/train_data_shuffled.csv', header=None)

# Last column of train dataset contains Y values
Y_train = df_train[df_train.columns[-1]].to_numpy()
# Using One hot encoding
Y_train = pd.get_dummies(Y_train).to_numpy()

print(pd.DataFrame(Y_train))

# Dropping Last column of dataframe as Y value has been retained
df_train.drop(df_train.columns[-1], inplace=True, axis=1)
X_train = df_train.to_numpy()

# Scaling pixel values between 0 to 1
X_train = X_train/255

print(pd.DataFrame(X_train))

Reading testing feature vector and also labels of testing dataset seperately, also making modifications required.

In [None]:
df_test = pd.read_csv('/content/drive/MyDrive/Devnagri Dataset/public_test.csv', header = None)

# Last column contains Labels
Y_test = df_test[df_test.columns[-1]].to_numpy()
print(pd.DataFrame(Y_test))

# Dropping last column to get X_test 
df_test.drop(df_test.columns[-1], inplace=True, axis=1)
X_test = df_test.to_numpy()
# Scaling X_test from 0 to 1
X_test = X_test/255
X_test = np.concatenate((np.ones((X_test.shape[0],1)), X_test), axis = 1)

print(pd.DataFrame(X_test))

Now Setting Neural Network Parameters like number of epochs, batch_size, neurons in each layer(list is passed), learning rate type(for normal gradient descent, adaptive gradient descent), learning rate, activation type(sigmoid activation, tanh activation, RELU activation, Softmax Activation), loss function type(Cross Entropy loss Function, Mean Squared Error loss function), initial seed value(Taking default value as 87).

In [None]:
# Need to open params.txt here instead of manual data entering
epochs = 100
batch_size = 1700
layers = [512,256,128,46]
lr_type = 1
lr = 2
act_type = 1
loss_type = 1
seed_val = 22

Doing random initialisation of neural nets weights matrix according to Xavier's Initialisation of weights.

In [None]:
# Number of training datavalues
N = X_train.shape[0]

weights = []

np.random.seed(seed_val)

# Initial number of neurons which is feature of dataset then after each iteration this will change to previous layers neurons
m = X_train.shape[1]

# Initialising weights
for i in range(len(layers)):
    n = layers[i]

    # Initialisation of weights of each layer,
    # Random data values from a normal distribution whose mean is 0 
    # and std. deviation is 1 and output dimension is (m+1 cross n) [previous_layer_neurons+1  cross current_layer_neurons] 
    weights.append(np.float64(np.random.normal(0,1,(m+1,n))*np.sqrt(2/(m+n+1))))

    # change the number of input neurons
    m = n

# Prints weight matrix of last layer
for i in range(len(layers)):
    print(weights[i].shape)

Dividing Training Dataset into batches of batch_size.

In [None]:
# Dividing data into batches
training_batch = []
# Adding a columns of ones to absorb bias in the training data
print(pd.DataFrame(X_train))
print("======================================================")
X_train = np.concatenate((np.ones((N,1)), X_train), axis=1)
print(pd.DataFrame(X_train))

number_batches = N//batch_size
for i in range(number_batches):
    # Adding a tupple of (X,Y) in batches
    training_batch.append((X_train[i*batch_size:(i+1)*batch_size,:], Y_train[i*batch_size:(i+1)*batch_size,:]))


Now Training Our Neural Network According to given training Dataset

In [None]:
# Making loss array to store loss value after each epoch to compare loss values according to epoch
tmp = lr
loss = []

for epoch in range(epochs):
    # For adaptive gradient descent
    if lr_type == 1:
        lr = tmp/math.sqrt(epoch+1)

    total_loss = 0

    # Go over every batch and train my neural network
    for batch, Y in training_batch:
        values = []
        values.append(batch)

        # store original batch as this will get modified in next for loop
        batch1 = batch
        # Forward Propogation this batch so that it can be used in Backpropogation
        for i in range(len(layers)-1):
            batch = tanh(np.dot(batch, weights[i]))
            
            # Concatenate with a columns of ones after output from each layer to absorb bias in the data
            batch = np.concatenate((np.ones((batch_size, 1)),batch), axis = 1)
            # caching this in values so that it can be used in backpropogation
            values.append(batch)
        
        # forward propogation for last layer
        output = tanh(np.dot(batch, weights[i+1]))

        # Final output of this neural network for this batch without tweeking weights wrong output most probably
        values.append(output)

        # Calculating derivative of loss function for backpropogation of neural net
        der = (output-Y)*derivative_tanh(output)/Y.shape[0]
        
        # Backpropogating the error 
        for i in range(len(layers)-1,-1,-1):
            weight_copy = weights[i].copy()
            w = np.dot(values[i].T, der)
            weights[i] -= lr*w
            der = np.dot(der, weight_copy.T)*derivative_tanh(values[i])                        
            der = np.delete(der, 0, axis = 1)

        # Calculating predicted output of this batch to calculate loss value of this batch
        output = batch1
        for i in range(len(layers)-1):
            output = np.dot(output, weights[i])
            output = tanh(output)
            output = np.concatenate((np.ones((output.shape[0], 1)), output), axis = 1)
        
        # Forward propogating for output layer
        output = tanh(np.dot(output, weights[i+1]))

        # According to loss type calculate loss value of this batch
        total_loss += MSE(Y, output)
        
    # append to this epoch's loss value
    loss.append(total_loss)

    print("Training Loss on " + str(epoch+1) +" iteration --> " + str(total_loss))

print("Graphical representation of Training Loss vs number of epochs -->")
plt.plot([(epoch+1) for epoch in range(epochs)], loss)
plt.show()       

Now predicting the model on testing dataset, as you will see that it is highly inaccurate and it needs 500 epochs to show good results. 

In [None]:
for i in range(len(layers)-1):
    X_test = tanh(np.dot(X_test, weights[i]))
    X_test = np.concatenate((np.ones((X_test.shape[0],1)), X_test), axis = 1)

X_test = tanh(np.dot(X_test, weights[i+1]))
pred = np.argmax(X_test, axis=1)

correct = 0
total = pred.shape[0]

for i in range(total):
    if pred[i] == Y_test[i]:
        correct+=1

print("Accuracy On testing dataset ---> " +str((correct/total)*100)) 