In [54]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split


In [2]:
tr_data = pd.read_csv('./../data/mnist_train.csv')
ts_data = pd.read_csv('./../data/mnist_test.csv')

In [3]:
tr_data.describe()


Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
count,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,...,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0
mean,4.453933,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.200433,0.088867,0.045633,0.019283,0.015117,0.002,0.0,0.0,0.0,0.0
std,2.88927,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.042472,3.956189,2.839845,1.68677,1.678283,0.3466,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,254.0,254.0,253.0,253.0,254.0,62.0,0.0,0.0,0.0,0.0


In [4]:
train_data = tr_data.drop(columns='label').values
train_labels = tr_data.label.values
test_data = ts_data.drop(columns='label').values
test_labels = ts_data.label.values

In [5]:
print(train_data.shape)
print(train_labels.shape)

(60000, 784)
(60000,)


In [34]:
train_data = train_data/255
test_data = test_data/255

In [59]:
class Activation:
    def softmax(self, z):
        return 1/sum(np.exp(z)) * np.exp(z)

    def stable_softmax(self, x):
        # """ ... numerically stable way."""
        shiftx = x - np.max(x)
        exps = np.exp(shiftx)
        return exps / np.sum(exps)

    def relu(self, z):
        return np.maximum(0, z)

    def tanh(self, z):
        return np.tanh(z)
    
    def sigmoid(self, z):
        return 1/(1 + np.exp(-z))
    
    def use(self, string):
       match string:
        case 'relu':
            return self.relu
        case 'sigmoid':
            return self.sigmoid
        case 'softmax':
            return self.softmax
        case 'tanh':
            return self.tanh
        case 'stable_softmax':
            return self.stable_softmax

In [49]:
class LossFunctions:
    def mean_squared_error(self, y_true, y_pred):
        mse = np.mean((y_true - y_pred)**2)
        return mse
    
    def categorical_cross_entropy(self, y_hat, label):
        return -sum(np.multiply(np.log(y_hat), one_hot(label)))
    
    def grad_relu(self, x):
        return (x > 0).astype(int)
    
    def decay_alpha(i):
        return( 0.01 if i <=12 else (0.001 if i <= 24 else (0.0001)) )

In [50]:
class Encoder:
    def one_hot(self, label):
        arr = np.zeros(shape = (10,1))
        arr[label] = 1
        return arr

In [53]:
class Metrics:
    def accuracy(self, y_hat, true):
        return 100*np.mean(np.where(y_hat == np.reshape(true,len(true)), 1,0))

In [67]:
class NeuralNetwork:
    def __init__(self, train_data, train_labels, test_data, test_labels, hidden_layer_u, learning_rate=0.01):

        self.x_train = train_data
        self.x_test = test_data
        self.y_train = train_labels
        self.y_test = test_labels

        self.layer1 = 784
        self.layer2 = hidden_layer_u
        self.layer3 = 10

        self.W1 = np.random.randn(self.layer2,self.layer1)/np.sqrt(self.layer1)
        self.b1 = np.zeros(shape = (self.layer2,1))
        self.W2 = np.random.randn(self.layer3,self.layer2)/np.sqrt(self.layer2)
        self.b2 = np.zeros(shape = (self.layer3,1))

    def summary(self):
        print('Model configurations: ')
        print('W1 has dim', self.W1.shape)
        print('b1 has dim', self.b1.shape)
        print('W2 has dim', self.W2.shape)
        print('b2 has dim', self.b2.shape)
        print('Input has dim', self.layer1)
        print('Hidden layer has dim', self.layer2)
        print('Output has dim', self.layer3)

    def forward(self,x):
        
        self.z1 = np.array([self.W1.dot(x)]).transpose() + self.b1
        vec_rectified_linear_unit = np.vectorize(Activation().use('relu'))
        self.h = vec_rectified_linear_unit(self.z1)
        self.h = self.h.transpose()[0]
        self.z2 = np.array([self.W2.dot(self.h)]).transpose() + self.b2
        self.y_hat = Activation().use('stable_softmax')(self.z2)    
        
    def back_prop(self,Ytr1,Xtr1):
        
        y_hat = self.y_hat
        h = self.h
        z1 = self.z1
        
        true = Encoder().one_hot(Ytr1)

        diff_outer = -(true - y_hat)

        del_b2 = diff_outer
        del_W2 = np.matmul(diff_outer,np.reshape(h,(1,self.l2)))
        DEL = self.W2.transpose().dot(diff_outer)

        NAB = np.multiply(DEL, grad_rectified_linear_unit(z1))
        del_b1 = NAB
        del_W1 = np.matmul(np.reshape(NAB, (self.l2,1)), np.reshape(Xtr1, (1,self.l1)))

        self.W2 = self.W2 - alpha * del_W2
        self.b2 = self.b2 - alpha * del_b2
        self.b1 = self.b1 - alpha * del_b1
        self.W1 = self.W1 - alpha * del_W1
                
    def predict(self,x):
        y_hat_lab = np.zeros(shape = (len(x)))
        for i in range(len(x)):
            self.forward(x[i])
            y_hat_lab[i] = np.argmax(self.y_hat)
        return y_hat_lab

    def train(self, epochs):
        x_learn, x_val, y_learning, y_val = train_test_split(self.x_train, self.y_train)
        L = [i for i in range(0,len(x_learn))]
        print('------------------------')
        print("training the network")

        for j in range(epochs):
            alpha = decay_alpha(j)
            loss = 0
            np.random.shuffle(L)
            
            for i in L:
                self.forward(x_learn[i])
                self.back_prop(y_learning[i], x_learn[i])
                
                loss = loss + LossFunctions().categorical_cross_entropy(self.y_hat, y_learning[i])
            
            loss = loss/len(L)
            predicted_labels_validation = self.predict(x_val)
            print('Epoch :', j)
            print('Loss ->', loss[0])
            print('accuracy ->', Metrics().accuracy(predicted_labels_validation, y_val))
            print('------------------------')
        print('---------training complete----------')

    def run_test(self):
        temp = self.x_test@self.W1.T + self.b1.T
        temp = np.clip(temp,a_min=0, a_max=temp.max())
        temp = temp@self.W2.T + self.b2.T
        temp = np.exp(temp)
        temp2 = temp.sum(axis=1)
        temp = temp/temp2.reshape(-1,1)
        preds = np.argmax(temp,axis=1)
        print('the testing accuracy is :', Metrics().accuracy(preds, self.y_test))

In [73]:
model = NeuralNetwork(train_data, train_labels, test_data, test_labels, 100, 0.01)

In [74]:
model.summary()

Model configurations: 
W1 has dim (100, 784)
b1 has dim (100, 1)
W2 has dim (10, 100)
b2 has dim (10, 1)
Input has dim 784
Hidden layer has dim 100
Output has dim 10


In [75]:
model.train(epochs=2)

------------------------
training the network
Epoch : 0
Loss -> 0.06488945574244043
accuracy -> 12.693333333333335
------------------------
---------training complete----------


In [76]:
model.run_test()

the testing accuracy is : 12.790000000000001


In [46]:
# def rectified_linear_unit(x):
#     if x<0:
#         return(0)
#     else :
#         return(x)

# def grad_rectified_linear_unit(x):
#     x[x>0] = 1
#     x[x<=0] = 0
#     return x
    
# def one_hot(label):
#     arr = np.zeros(shape = (10,1))
#     arr[label] = 1
#     return arr

# def categorical_cross_entropy(y_hat, label):
#     return -sum(np.multiply(np.log(y_hat), one_hot(label)))

# def stable_softmax(X):
#     temp2 = np.exp(X - np.max(X))
#     return temp2 / np.sum(temp2)

# def decay_alpha(i):
    # return( 0.01 if i <=12 else (0.001 if i <= 24 else (0.0001)) )
    
# def accuracy(y_hat, true):
#     return 100*np.mean(np.where(y_hat == np.reshape(true,len(true)), 1,0))

In [47]:
from sklearn.model_selection import train_test_split


class neural_network:
        
    def __init__(self,lr=0.001,loss_func='categorical_cross_entropy',MNIST=True,xtr=None,xts=None,ytr=None,yts=None):
        
        self.lr = lr
        self.loss_func = 'categorical_cross_entropy'
        
        if MNIST != True :
            assert(None not in [xtr,xts,ytr,yts])
            self.x_train = xtr
            self.x_test = xts
            self.y_train = ytr
            self.y_test = yts

        else :
            self.prepare_data()
            
    def prepare_data(self):
        
        self.x_train = train_data
        self.x_test = test_data
        self.y_train = train_labels
        self.y_test = test_labels
        
            
        
            
    def print_model(self):
        print('')
        print('Model configurations are as follows :')
        print('-------------------------------------')
        
        print('layer 1')
        print('W1 has dim', self.W1.shape)
        print('b1 has dim', self.b1.shape)
        print('W2 has dim', self.W2.shape)
        print('b2 has dim', self.b2.shape)
        print('Input has dim', self.l1)
        print('Hidden layer has dim', self.l2)
        print('Output has dim', self.l3)
            
    def create_model(self, L2):
        
        self.l1 = 784
        self.l2 = L2
        self.l3 = 10

        self.W1 = np.random.randn(self.l2,self.l1)/np.sqrt(self.l1)
        self.b1 = np.zeros(shape = (self.l2,1))
        self.W2 = np.random.randn(self.l3,self.l2)/np.sqrt(self.l2)
        self.b2 = np.zeros(shape = (self.l3,1))
    
    def forward(self,x):
        
        self.z1 = np.array([self.W1.dot(x)]).transpose() + self.b1
        vec_rectified_linear_unit = np.vectorize(rectified_linear_unit)
        self.h = vec_rectified_linear_unit(self.z1)
        self.h = self.h.transpose()[0]
        self.z2 = np.array([self.W2.dot(self.h)]).transpose() + self.b2
        self.y_hat = stable_softmax(self.z2)
    
    def predict(self,x):
        y_hat_lab = np.zeros(shape = (len(x)))
        for i in range(len(x)):
            self.forward(x[i])
            y_hat_lab[i] = np.argmax(self.y_hat)
        return y_hat_lab
    
    def back_prop(self,Ytr1,Xtr1):
        
        y_hat = self.y_hat
        h = self.h
        z1 = self.z1
        
        true = one_hot(Ytr1)

        diff_outer = -(true - y_hat)

        del_b2 = diff_outer
        del_W2 = np.matmul(diff_outer,np.reshape(h,(1,self.l2)))
        DEL = self.W2.transpose().dot(diff_outer)

        NAB = np.multiply(DEL, grad_rectified_linear_unit(z1))
        del_b1 = NAB
        del_W1 = np.matmul(np.reshape(NAB, (self.l2,1)), np.reshape(Xtr1, (1,self.l1)))

        self.W2 = self.W2 - alpha * del_W2
        self.b2 = self.b2 - alpha * del_b2
        self.b1 = self.b1 - alpha * del_b1
        self.W1 = self.W1 - alpha * del_W1
        
        
    def test_and_summarize(self):
        temp = self.x_test@self.W1.T+self.b1.T
        temp = np.clip(temp,a_min=0,a_max=temp.max())
        temp = temp@self.W2.T+self.b2.T
        temp = np.exp(temp)
        temp2 = temp.sum(axis=1)
        temp = temp/temp2.reshape(-1,1)
        preds = np.argmax(temp,axis=1)
        print('the testing accuracy of the classifier is :',sum(preds.reshape(-1,1) == self.y_test.reshape(-1,1))/len(self.y_test))
        
        
        
NN = neural_network()
NN.create_model(100)
NN.print_model()



x_learn, x_val, y_learning, y_val = train_test_split(NN.x_train, NN.y_train)
L = [i for i in range(0,len(x_learn))]
epochs = 1
print('')
print('------------------------')
print("start training the net")

for j in range(epochs):
    
    #IMPLEMETING SGD ALGORITHM
    
    alpha = decay_alpha(j)
    loss = 0
    np.random.shuffle(L)
    
    for i in L:
        NN.forward(x_learn[i])
        NN.back_prop(y_learning[i],x_learn[i])
        
        loss = loss + categorical_cross_entropy(NN.y_hat,y_learning[i])
    
    loss = loss/len(L)
    predicted_labels_validation = NN.predict(x_val)
    print('Epoch Summary for epoch:',j)
    print('Loss ->',loss[0])
    print('accuracy ->',accuracy(predicted_labels_validation,y_val))
    print('')
    
    

    
NN.test_and_summarize()


Model configurations are as follows :
-------------------------------------
layer 1
W1 has dim (100, 784)
b1 has dim (100, 1)
W2 has dim (10, 100)
b2 has dim (10, 1)
Input has dim 784
Hidden layer has dim 100
Output has dim 10

------------------------
start training the net
Epoch Summary for epoch: 0
Loss -> 0.2780669935288418
accuracy -> 94.72

the testing accuracy of the classifier is : [0.9556]
