In [44]:
import time
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch
from sklearn.model_selection import train_test_split

In [21]:
# load MNIST dataset 
image_size = 28           # width and length
no_of_different_labels = 10     #  i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size
train_data = np.loadtxt("mnist_train.csv", delimiter=",")
test_data = np.loadtxt("mnist_test.csv", delimiter=",") 

# data preprocessing

# map pixels information from range(0, 255) to range(0.01, 1)
fac = 0.99 / 255
train_imgs = np.asfarray(train_data[:, 1:]) * fac + 0.01
test_imgs = np.asfarray(test_data[:, 1:]) * fac + 0.01

train_labels = np.asfarray(train_data[:, :1])
test_labels = np.asfarray(test_data[:, :1])

lr = np.arange(no_of_different_labels)

# transform labels into one hot representation
train_labels_one_hot = (lr==train_labels).astype(np.float64)
test_labels_one_hot = (lr==test_labels).astype(np.float64)

In [31]:
x_train,x_val,y_train,y_val=train_test_split(train_imgs,train_labels_one_hot,test_size=0.2,random_state=9)

In [None]:
class NeuralNetwork():
    def __init__(self, sizes,epochs=100,learning_rate=0.0001):
        self.sizes=sizes
        self.epochs=epochs
        self.learning_rate=learning_rate
        self.params=self.initializeWeight()
        self.status={}
        self.loss=nn.CrossEntropyLoss()
        print(self.params['W1'].shape)
    def initializeWeight(self):
        input_layer=self.sizes[0]
        hidden_1=self.sizes[1]
        hidden_2=self.sizes[2]
        output_layer=self.sizes[3]
        return{
            'W1':np.random.randn(input_layer, hidden_1) * np.sqrt(1. / hidden_1),
            'W2':np.random.randn(hidden_1, hidden_2) * np.sqrt(1. / hidden_2),
            'W3':np.random.randn(hidden_2, output_layer) * np.sqrt(1. / output_layer),
            'b1':np.random.randn(hidden_1),
            'b2':np.random.randn(hidden_2),
            'b3':np.random.randn(output_layer)
        }
    def InnerProduct_ForProp(self,x,W,b):
        y=np.dot(x,W)+b
        return y

    def InnerProduct_BackProp(self,dEdy,x,W,b):
        dEdx=np.dot(dEdy,W.T)
        dEdW=np.dot(x.T,dEdy)
        dEdb=dEdy
        return dEdx,dEdW,dEdb
    def Softmax_ForProp(self,x):
        exp=np.exp(x)
        y=exp/np.sum(exp)
        return y

    def Softmax_BackProp(self,y,t):
        dEdx=y-t
        return dEdx

    def Sigmoid_ForProp(self,x):
        y= 1/(1+np.exp(-x))
        return y

    def Sigmoid_BackProp(self,dEdy,x):
        dEdx= np.exp(-x) / np.square( 1+np.exp(-x) )
        return dEdx
    def sigmoid(self, x, derivative=False):
        if derivative:
            return (np.exp(-x))/((np.exp(-x)+1)**2)
        return 1/(1 + np.exp(-x))

    def softmax(self, x, derivative=False):
        # Numerically stable with large exponentials
        exps = np.exp(x - x.max())
        if derivative:
            return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
        return exps / np.sum(exps, axis=0)
    def backward_passward(self,output,target):
        status=self.status
        params=self.params
        learning_rate=self.learning_rate
        # output layer backpro to hidden 2 layer
        soft_back=self.Softmax_BackProp(output,target)
        dEdx,dEdW,dEdb=self.InnerProduct_BackProp(soft_back,status["A2"],params["W3"],params["b3"])
        # W3 update
        w3_grad=np.average(dEdW,axis=0)
        b3_grad=np.average(dEdb,axis=0)
        self.params["W3"]=self.params["W3"]-w3_grad*learning_rate
        self.params["b3"]=self.params["b3"]-b3_grad*learning_rate
        
        # hidden 2 back to hidden 1
        sigm_back=self.Sigmoid_BackProp(dEdx,status["Z2"])
        dEdx,dEdW,dEdb=self.InnerProduct_BackProp(sigm_back,status["A1"],params["W2"],params["b2"])


        # W2 update
        w2_grad=np.average(dEdW,axis=0)
        b2_grad=np.average(dEdb,axis=0)
        self.params["W2"]-=w2_grad*learning_rate
        self.params["b2"]-=b2_grad*learning_rate


        # hidden 1 back to input
        sigm_back=self.Sigmoid_BackProp(dEdx,status["Z1"])
        dEdx,dEdW,dEdb=self.InnerProduct_BackProp(sigm_back,status["A0"],params["W1"],params["b1"])
        # W1 update
        w1_grad=np.average(dEdW,axis=0)
        b1_grad=np.average(dEdb,axis=0)
        self.params["W1"]=self.params["W1"]-w1_grad*learning_rate
        self.params["b1"]=self.params["b1"]-b1_grad*learning_rate
        
        return
    def forward_passward(self,x):
        # input layer to hidden 1 layer
        A0=x
        Z1=self.InnerProduct_ForProp(A0,self.params['W1'],self.params['b1'])
        A1=self.Sigmoid_ForProp(Z1)

        # hidden 1 to hidden 2 layer
        Z2=self.InnerProduct_ForProp(A1,self.params['W2'],self.params['b2'])
        A2=self.Sigmoid_ForProp(Z2)
        # hidden 2 to output layer
        Z3=self.InnerProduct_ForProp(A2,self.params['W3'],self.params['b3'])
        A3=self.Softmax_ForProp(Z3)
        self.status={"A0":A0,
                     "Z1":Z1,
                     "A1":A1,
                     "Z2":Z2,
                     "A2":A2,
                     "Z3":Z3,
                     "A3":A3}
        return A3


    def accuracy(self,x,targets):
        predictions=[]
        outputs=self.forward_pass(x)

        for o,t in zip(outputs,targets):
            pred=np.argmax(o)
            predictions.append(pred==np.argmax(t))

        return np.mean(predictions)

        pass
    def train(self,x_train,y_train,x_val,y_val):
        start=time.time()
        for i in range(self.epochs):
            output=self.forward_pass(x_train)
            loss_val=self.loss(torch.tensor(output),torch.tensor(y_train))
            
            self.backward_pass(output,y_train)
            break
            print("epochs="+str(i+1)+"  Time Spent:"+str(time.time()-start))
            print(self.params['W3'][0])
            accuracy=self.accuracy(x_train,y_train)
            print("accuracy=",accuracy,"loss=",loss_val)




nn_model=NeuralNetwork(sizes=[784,128,64,10])
nn_model.train(x_train,y_train,x_val,y_val)

: 