In [None]:
import copy
import numpy as np
from sklearn import preprocessing
import h5py
import sklearn.svm
import random
from sklearn.metrics import f1_score, accuracy_score

In [None]:
class Layer:
    # constructor
    def __init__(self, neurons=0, theta=[], b=[], z=[], a=[],delta=[],DELTA_THETA=[],DELTA_BIAS=[],dTheta=[],dBias=[]):
        self.neurons = 0 # neurons count in layer
        self.theta = [] # Weight vector(W)
        self.b=[] # bias
        self.z = [] # hypothesis z = W.T * X + b = here = theta.T * X + b
        self.a = [] # activation function a=sigmoid(z) or relu(z) or anyother(z)
        self.delta = [] # Loss or Error function delta= delta_cross_entropy() or anyother()
        self.DELTA_THETA = [] # only derivative weight vector =dw
        self.DELTA_BIAS = [] # only derivative bias vector =db
        self.dTheta = [] # complete derivation term = (1/m)*(DELTA + (lambda*theta))
        self.dBias = [] # complete derivation term 

    def setNeurons(self, neurons):
        self.neurons = neurons

    def getNeurons(self):
        return self.neurons

    def setTheta(self, theta):
        self.theta = theta

    def getTheta(self):
        return self.theta

    def setB(self, b):
        self.b = b

    def getB(self):
        return self.b

    def setZ(self, z):
        self.z = z

    def getZ(self):
        return self.z

    def setA(self, a):
        self.a = a

    def getA(self):
        return self.a

    def setDelta(self, delta):
        self.delta = delta

    def getDelta(self):
        return self.delta

    def setDELTA_THETA(self, DELTA_THETA):
        self.DELTA_THETA = DELTA_THETA

    def getDELTA_THETA(self):
        return self.DELTA_THETA

    def setDELTA_BIAS(self, DELTA_BIAS):
        self.DELTA_BIAS = DELTA_BIAS

    def getDELTA_BIAS(self):
        return self.DELTA_BIAS
    
    def setDTheta(self, dTheta):
        self.dTheta = dTheta

    def getDTheta(self):
        return self.dTheta
    
    def setDBias(self, dBias):
        self.dBias = dBias

    def getDBias(self):
        return self.dBias
    
    

In [None]:
dataset = h5py.File('../MNIST_Subset-1.h5', 'r+')

print(list(dataset.keys()))



In [None]:
data_X = dataset['X']
data_Y = dataset['Y']

X = np.array(data_X.value)
Y = np.array(data_Y.value)

print(X.shape,Y.shape)

# Y=Y.reshape(14251,1)
print(X.shape,Y.shape)

In [None]:
print(X.shape)
X=X.reshape(14251,28*28)
print(X.shape)

X = preprocessing.scale(X)

m=X.shape[0]
print('no.of samples:',m)

In [None]:
X=X.T # transposed X now shape=784 x 14251 => now each column is one datapoint
print(X.shape)

In [None]:
def sigmoid(z):
    return (1.0/(1.0+np.exp(-z)))

In [None]:
def softmax(z):
    return np.divide(np.exp(z),np.sum(np.exp(z),axis=0))

In [None]:
def stableSoftmax(z):
    exps = np.exp(z - np.max(z))
    return np.divide(exps , np.sum(exps,axis=0))

In [None]:
def delta_cross_entropy(z,y):
    grad = (z-y)/m
    return grad

In [None]:
def crossEntropy(a,y):
    return (-y*np.log(a))/m

In [None]:
def accuracy(y_predicted):
    y_multilabel = []
    for p in y_predicted:
        y_multilabel.append(list(p).index(max(p)))
        
    print(accuracy_score(y_multilabel, Y))

In [None]:
y_actual=[] # changed to 1 at their index

for i in range(Y.shape[0]):
    temp = [0]*10
    index = int(Y[i])
    temp[index] = 1
    y_actual.append(temp)
y_actual=np.array(y_actual).T


In [None]:
l=5
neurons=[784,100,50,50,10]

layers=[]
for i in range(len(neurons)):
    lay=Layer()
    
    if(i!=len(neurons)-1):
        DELTA_THETA=np.zeros((neurons[i+1],neurons[i]))
        theta=np.random.uniform(low=0.1,high=1,size=(neurons[i],neurons[i+1]))
        #DELTA_BIAS=
        #bias=
        
        lay.setDELTA_THETA(DELTA_THETA)
        lay.setTheta(theta)
        
    layers.append(lay)

print(len(layers))
for i in range(l-1):# n-1 layers becz last layer don't have DELTA_THETA and theta vectors
    print('layer',i,'-->theta:',layers[i].getTheta().shape,' DELTA:',layers[i].getDELTA_THETA().shape)

In [None]:

layers[0].setA(X)
regParam=0.0001
alpha=1
maxIterations=20000

for iter in range(maxIterations):
    
    # Forward propagation
    for i in range(1,l):
        z=np.dot(layers[i-1].getTheta().T, layers[i-1].getA())
        if(i==l-1):
            a=softmax(z)
        else:
            a=sigmoid(z)
        layers[i].setZ(z)
        layers[i].setA(a)
    
    # Backward Propagation
    for i in range(l-1,-1,-1):
        loss=None
        if(i==l-1):
            loss=delta_cross_entropy(layers[i].getA(),y_actual)
        else:
            loss=np.dot(layers[i].getTheta(),layers[i+1].getDelta()) * (layers[i].getA()*(1-layers[i].getA()))
        layers[i].setDelta(loss)
    
    for i in range(0,l-1):
        D=layers[i].getDELTA_THETA() + np.dot(layers[i+1].getDelta(),layers[i].getA().T)
        layers[i].setDELTA_THETA(D)
    
    for i in range(0,l-1):
        dT=(1/m)*(layers[i].getDELTA_THETA().T+(regParam*layers[i].getTheta()))
        layers[i].setDTheta(dT)
    
    print('Iteration:',iter,'--> ',end='')
    accuracy(layers[-1].getA().T)
    if(accuracy(layers[-1].getA().T) == np.nan):
        break
    
    for i in range(0,l-1):
        newTh=layers[i].getTheta()-(alpha*layers[i].getDTheta())
        layers[i].setTheta(newTh)


In [None]:
# DELTA1=np.zeros((100,784))
# DELTA2=np.zeros((10,100))

# theta1=np.random.uniform(low=0.1,high=1,size=(784,100)) #randomly generated weight vector for Hidden layer 1 and Size is 78400= 784featues and 100 neurons in hidden layer1
# theta2=np.random.uniform(low=0.1,high=1,size=(100,10)) #randomly generated weight vector for output layer and Size is 1000= 100Hidden units and 10 output layer units in hidden layer1

# DELTA1=np.zeros((784,100))
# DELTA2=np.zeros((100,10))

# # Layer 1 (Input Layer)
# a1=X
# regParam=0.0001
# alpha=1
# maxIterations=1
# for iter in range(maxIterations):
    
#     # ForwardProp
#     # Layer 2 (Hidden layer 1)
#     z1=np.dot(theta1.T,a1)
#     a2=sigmoid(z1)
    
#     # Layer 3 (Output layer)
#     z2=np.dot(theta2.T,a2)
# #     a3=softmax(z2)
#     a3=stableSoftmax(z2)

#     yPredicted=a3
    
#     #BackProp
#     delta3=delta_cross_entropy(a3,y_actual)
#     delta2=np.dot(theta2,delta3) * (a2*(1-a2))
    
    
#     DELTA2=DELTA2+np.dot(delta3,a2.T) #only derivative
#     DELTA1=DELTA1+np.dot(delta2,a1.T) #only derivative
    
#     print(DELTA1.shape,theta1.shape)
#     print(DELTA2.shape,theta2.shape)
#     dTheta2=(1/m)*(DELTA2.T+(regParam*theta2)) #whole derivative part
#     dTheta1=(1/m)*(DELTA1.T+(regParam*theta1)) #whole derivative part
    
#     print('Iteration:',iter,'--> ',end='')
#     accuracy(yPredicted.T)
#     if(accuracy(yPredicted.T) == np.nan):
#         break
        
#     theta2=theta2-(alpha*dTheta2)
#     theta1=theta1-(alpha*dTheta1)

In [None]:
def test(testX):
    # Forward propagation
    for i in range(1,l):
        z=np.dot(layers[i-1].getTheta().T, layers[i-1].getA())
        if(i==l-1):
            a=softmax(z)
        else:
            a=sigmoid(z)
        layers[i].setZ(z)
        layers[i].setA(a)

    accuracy(layers[-1].getA().T)
    if(accuracy(layers[-1].getA().T) == np.nan):
        break