In [77]:
import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils
import pickle

#BASE CLASS
class Layer:
    def __init__(self,input_size,output_size):
        self.input_size=input_size
        self.output_size=output_size



    def forward(self,input):
        pass



    def backward(self,output_error,learning_size):
        pass
    




class Linear(Layer):
    def __init__(self,input_size,output_size):
        self.input_size=input_size
        self.output_size=output_size
        self.weights=np.random.randn(input_size,output_size)
        self.bias=np.random.randn(1,output_size)


    def forward(self,input):
        self.input=input
        return np.dot(input,self.weights)+self.bias

    def backward(self,output_error,learning_rate):
        input_error=np.dot(output_error,self.weights.T)
        weights_error=np.dot(self.input.T,output_error)
        self.weights-=learning_rate*weights_error

        np.save('XOR_s.w.py',self.weights)

        self.bias-=learning_rate*output_error
        return input_error




class sigmoidActivation(Layer):
    def __init__(self,activation,activation_back):
        self.activation=activation
        self.activation_backward=activation_back




    def forward(self,input):
        self.input=input
        return self.activation(input)



    def backward(self,output_error,learning_rate):
        return output_error*self.activation_backward(self.input)



def sigmoid(x):
    return 1/(1+np.exp(-x))



def sigmoid_backward(x):
    return np.exp(-x) / (1+np.exp(-x))**2





class HyperbolicTangent(Layer):
    def __init__(self,h_tangent,h_tangent_back):
        self.h_tangent=h_tangent
        self.h_tangent_backward=h_tangent_back



    def forward(self,input):
        self.input=input
        return self.h_tangent(input)


    def backward(self,output_error,learning_rate):
        return output_error*self.h_tangent_backward(self.input)



def hyperbolictangent(x):
    return np.tanh(x)


def hyperbolic_backward(x):
    return 1-np.tanh(x)**2




class softmax(Layer):
    def __init__(self,input):
        self.input_sz=input


    def forward(self,input):
        self.input=input
        tm=np.exp(input)
        self.output=tm/np.sum(tm)
        return self.output


    def backward(self,output_error,learning_rate):
        inp_error=np.zeros(output_error.shape)
        op=np.tile(self.output.T,self.input_sz)

        return self.output*np.dot(output_error,np.identity(self.input_sz)-op)






def negativeloglikelihood(y_true,y_pred):
    return np.mean(-y_true*np.log(y_pred)-(1-y_true)*np.log(1-y_pred))



def negativelog_backward(y_true,y_pred):
    return ((1-y_true)/(1-y_pred)-y_true/y_pred)/np.size(y_true)





x_train=np.array([[[0,0]],[[0,1]],[[1,0]],[[1,1]]])
y_train=np.array([[[0]],[[1]],[[1]],[[0]]])



network=[
    Linear(2,2),
    HyperbolicTangent(hyperbolictangent,hyperbolic_backward),

    Linear(2,1),
    HyperbolicTangent(hyperbolictangent,hyperbolic_backward)

]



def negativeloglike_XOR(y_true,y_pred):
    if np.mean(-y_true*np.log(y_pred)-(1-y_true)*np.log(1-y_pred)) < 0:
        return 0


    else:
        return np.mean(-y_true*np.log(y_pred)-(1-y_true)*np.log(1-y_pred))



def negativeloglike_backward_XOR(y_true,y_pred):
    return ((1-y_true)/(1-y_pred)-y_true/y_pred)/np.size(y_true)






epochs=100
learning_rate=0.1


for epoch in range(epochs):
    error=0
    for x,y_true in zip(x_train,y_train):
        op=x
        for layer in network:
            op=layer.forward(op)

        error+=negativeloglike_XOR(y_true,op)
        output_error=negativeloglike_backward_XOR(y_true,op)

        for layer in reversed(network):
            output_error=layer.backward(output_error,learning_rate)





def predict(network,input):
    output=input
    for layer in network:
        output=layer.forward(output)

    return output


for test in range(len(x_train)):
    pred=predict(network,x_train)[test]
    if pred<0:
        pred=[[0]]

 
    print(pred)




[[0]]
[[0.60207994]]
[[0.48350913]]
[[0.52116931]]


In [58]:
class Reshape:
    def __init__(self,inp_shape):
        self.inp_shape=inp_shape
        
        
    def forward(self,input):
        return np.reshape(input,(1,-1))
    
    
    def backward(self,op_error,learning_rate):
        return np.reshape(op_error,self.inp_shape)

In [59]:
(x_train,y_train),(x_test,y_test)=mnist.load_data()

x_train=x_train.astype('float32')
x_train/=255
y_train=np_utils.to_categorical(y_train)
x_train=x_train[0:2000]
y_train=y_train[0:2000]

x_test=x_test.astype('float32')
x_test/=255

y_test=np_utils.to_categorical(y_test)

In [60]:
def MSE(y_true,y_pred):
    return np.mean(np.power(y_true-y_pred,2))


In [61]:
network=[
    Reshape(inp_shape=(28,28)),
    Linear(784,128),
    sigmoidActivation(sigmoid,sigmoid_backward),
    HyperbolicTangent(hyperbolictangent,hyperbolic_backward),
    Linear(128,10),
    softmax(10)
]

epochs=5
learning_rate=0.1


for epoch in range(epochs):
    error=0
    for x,y_true in zip(x_train,y_train):
        op=x
        for layer in network:
            op=layer.forward(op)

        error+=negativeloglikelihood(y_true,op)
        output_error=negativelog_backward(y_true,op)

        for layer in reversed(network):
            output_error=layer.backward(output_error,learning_rate)
            
    error/=len(x_train)
    print('%d/%d,error=%f'%(epoch+1,epochs,error))


1/5,error=0.420793
2/5,error=0.164667
3/5,error=0.113116
4/5,error=0.087286
5/5,error=0.070857


In [62]:
def predict(network,input):
    output=input
    for layer in network:
        output=layer.forward(output)

    return output

acc=sum([np.argmax(y)==np.argmax(predict(network,x)) for x,y in zip(x_test,y_test)])/len(x_test)
error=sum([MSE(y,predict(network,x)) for x,y in zip(x_test,y_test)])/len(x_test)

print('Accuracy',acc)
print('MSE',error)

Accuracy 0.7743
MSE 0.03269948719528011


In [63]:
samp=1000
for te,tru in zip(x_test[:samp],y_test[:samp]):
    pred=predict(network,te)[0]
    id=np.argmax(pred)
    id_tru=np.argmax(tru)
    print('Predicted Value:%s, True Value:%d'%(id,id_tru))

Predicted Value:7, True Value:7
Predicted Value:6, True Value:2
Predicted Value:1, True Value:1
Predicted Value:0, True Value:0
Predicted Value:9, True Value:4
Predicted Value:1, True Value:1
Predicted Value:4, True Value:4
Predicted Value:5, True Value:9
Predicted Value:6, True Value:5
Predicted Value:9, True Value:9
Predicted Value:0, True Value:0
Predicted Value:0, True Value:6
Predicted Value:9, True Value:9
Predicted Value:0, True Value:0
Predicted Value:1, True Value:1
Predicted Value:4, True Value:5
Predicted Value:9, True Value:9
Predicted Value:7, True Value:7
Predicted Value:3, True Value:3
Predicted Value:4, True Value:4
Predicted Value:9, True Value:9
Predicted Value:6, True Value:6
Predicted Value:2, True Value:6
Predicted Value:5, True Value:5
Predicted Value:4, True Value:4
Predicted Value:0, True Value:0
Predicted Value:7, True Value:7
Predicted Value:4, True Value:4
Predicted Value:0, True Value:0
Predicted Value:1, True Value:1
Predicted Value:3, True Value:3
Predicte

In [None]:
network=[
    Reshape(inp_shape=(28,28)),
    Linear(784,128),
    sigmoidActivation(sigmoid,sigmoid_backward),
    HyperbolicTangent(hyperbolictangent,hyperbolic_backward),
    Linear(128,10),
    softmax(10)
]

epochs=100
learning_rate=0.01


for epoch in range(epochs):
    error=0
    for x,y_true in zip(x_train,y_train):
        op=x
        for layer in network:
            op=layer.forward(op)

        error+=negativeloglikelihood(y_true,op)
        output_error=negativelog_backward(y_true,op)

        for layer in reversed(network):
            output_error=layer.backward(output_error,learning_rate)
            
    error/=len(x_train)
    print('%d/%d,error=%f'%(epoch+1,epochs,error))
    
    
    
def predict(network,input):
    output=input
    for layer in network:
        output=layer.forward(output)

    return output

acc=sum([np.argmax(y)==np.argmax(predict(network,x)) for x,y in zip(x_test,y_test)])/len(x_test)
error=sum([MSE(y,predict(network,x)) for x,y in zip(x_test,y_test)])/len(x_test)

print('Accuracy',acc)
print('MSE',error)

samp=10
for te,tru in zip(x_test[:samp],y_test[:samp]):
    pred=predict(network,te)[0]
    id=np.argmax(pred)
    id_tru=np.argmax(tru)
    print('Predicted Value:%s, True Value:%d'%(id,id_tru))


In [71]:
#initializing model with all zeros
class Layer:
    def __init__(self,input_size,output_size):
        self.input_size=input_size
        self.output_size=output_size



    def forward(self,input):
        pass



    def backward(self,output_error,learning_size):
        pass
    




class Linear(Layer):
    def __init__(self,input_size,output_size):
        self.input_size=input_size
        self.output_size=output_size
        self.weights=np.random.randn(input_size,output_size)/np.sqrt(input_size+output_size)
        self.weights=np.zeros(self.weights.shape)
        self.bias=np.random.randn(1,output_size)/np.sqrt(input_size+output_size)


    def forward(self,input):
        self.input=input
        return np.dot(input,self.weights)+self.bias

    def backward(self,output_error,learning_rate):
        input_error=np.dot(output_error,self.weights.T)
        weights_error=np.dot(self.input.T,output_error)
        self.weights-=learning_rate*weights_error

        #np.save('XOR_s.w.npy',self.weights)

        self.bias-=learning_rate*output_error
        return input_error




class sigmoidActivation(Layer):
    def __init__(self,activation,activation_back):
        self.activation=activation
        self.activation_backward=activation_back




    def forward(self,input):
        self.input=input
        return self.activation(input)



    def backward(self,output_error,learning_rate):
        return output_error*self.activation_backward(self.input)



def sigmoid(x):
    return 1/(1+np.exp(-x))



def sigmoid_backward(x):
    return np.exp(-x) / (1+np.exp(-x))**2





class HyperbolicTangent(Layer):
    def __init__(self,h_tangent,h_tangent_back):
        self.h_tangent=h_tangent
        self.h_tangent_backward=h_tangent_back



    def forward(self,input):
        self.input=input
        return self.h_tangent(input)


    def backward(self,output_error,learning_rate):
        return output_error*self.h_tangent_backward(self.input)



def hyperbolictangent(x):
    return np.tanh(x)


def hyperbolic_backward(x):
    return 1-np.tanh(x)**2




class softmax(Layer):
    def __init__(self,input):
        self.input_sz=input


    def forward(self,input):
        self.input=input
        tm=np.exp(input)
        self.output=tm/np.sum(tm)
        return self.output


    def backward(self,output_error,learning_rate):
        inp_error=np.zeros(output_error.shape)
        op=np.tile(self.output.T,self.input_sz)

        return self.output*np.dot(output_error,np.identity(self.input_sz)-op)






def negativeloglikelihood(y_true,y_pred):
    return np.mean(-y_true*np.log(y_pred)-(1-y_true)*np.log(1-y_pred))



def negativelog_backward(y_true,y_pred):
    return ((1-y_true)/(1-y_pred)-y_true/y_pred)/np.size(y_true)



network=[
    Reshape(inp_shape=(28,28)),
    Linear(784,128),
    sigmoidActivation(sigmoid,sigmoid_backward),
    HyperbolicTangent(hyperbolictangent,hyperbolic_backward),
    Linear(128,10),
    softmax(10)
]

epochs=20
learning_rate=0.1


for epoch in range(epochs):
    error=0
    for x,y_true in zip(x_train,y_train):
        op=x
        for layer in network:
            op=layer.forward(op)

        error+=negativeloglikelihood(y_true,op)
        output_error=negativelog_backward(y_true,op)

        for layer in reversed(network):
            output_error=layer.backward(output_error,learning_rate)
            
    error/=len(x_train)
    print('%d/%d,error=%f'%(epoch+1,epochs,error))
    
    
    
def predict(network,input):
    output=input
    for layer in network:
        output=layer.forward(output)

    return output

acc=sum([np.argmax(y)==np.argmax(predict(network,x)) for x,y in zip(x_test,y_test)])/len(x_test)
error=sum([MSE(y,predict(network,x)) for x,y in zip(x_test,y_test)])/len(x_test)

print('Accuracy',acc)
print('MSE',error)

samp=10
for te,tru in zip(x_test[:samp],y_test[:samp]):
    pred=predict(network,te)[0]
    id=np.argmax(pred)
    id_tru=np.argmax(tru)
    print('Predicted Value:%s, True Value:%d'%(id,id_tru))



1/20,error=0.332946
2/20,error=0.329397
3/20,error=0.322662
4/20,error=0.304503
5/20,error=0.261571
6/20,error=0.219050
7/20,error=0.176217
8/20,error=0.145646
9/20,error=0.123571
10/20,error=0.105211
11/20,error=0.089465
12/20,error=0.076724
13/20,error=0.066960
14/20,error=0.059534
15/20,error=0.053815
16/20,error=0.049281
17/20,error=0.045560
18/20,error=0.042414
19/20,error=0.039687
20/20,error=0.037269
Accuracy 0.8879
MSE 0.016729691502601365
Predicted Value:7, True Value:7
Predicted Value:2, True Value:2
Predicted Value:1, True Value:1
Predicted Value:0, True Value:0
Predicted Value:4, True Value:4
Predicted Value:1, True Value:1
Predicted Value:4, True Value:4
Predicted Value:9, True Value:9
Predicted Value:6, True Value:5
Predicted Value:9, True Value:9


In [73]:
#Initializing values between -10 and 10 with learning rate 0.1
class Layer:
    def __init__(self,input_size,output_size):
        self.input_size=input_size
        self.output_size=output_size



    def forward(self,input):
        pass



    def backward(self,output_error,learning_size):
        pass
    




class Linear(Layer):
    def __init__(self,input_size,output_size):
        self.input_size=input_size
        self.output_size=output_size
        self.weights=np.random.randn(input_size,output_size)/np.sqrt(input_size+output_size)
        self.weights=np.zeros(self.weights.shape)
        self.bias=np.random.randn(1,output_size)/np.sqrt(input_size+output_size)


    def forward(self,input):
        self.input=input
        return np.dot(input,self.weights)+self.bias

    def backward(self,output_error,learning_rate):
        input_error=np.dot(output_error,self.weights.T)
        weights_error=np.dot(self.input.T,output_error)
        self.weights-=learning_rate*weights_error

        self.bias-=learning_rate*output_error
        return input_error




class sigmoidActivation(Layer):
    def __init__(self,activation,activation_back):
        self.activation=activation
        self.activation_backward=activation_back




    def forward(self,input):
        self.input=input
        return self.activation(input)



    def backward(self,output_error,learning_rate):
        return output_error*self.activation_backward(self.input)



def sigmoid(x):
    return 1/(1+np.exp(-x))



def sigmoid_backward(x):
    return np.exp(-x) / (1+np.exp(-x))**2





class HyperbolicTangent(Layer):
    def __init__(self,h_tangent,h_tangent_back):
        self.h_tangent=h_tangent
        self.h_tangent_backward=h_tangent_back



    def forward(self,input):
        self.input=input
        return self.h_tangent(input)


    def backward(self,output_error,learning_rate):
        return output_error*self.h_tangent_backward(self.input)



def hyperbolictangent(x):
    return np.tanh(x)


def hyperbolic_backward(x):
    return 1-np.tanh(x)**2




class softmax(Layer):
    def __init__(self,input):
        self.input_sz=input


    def forward(self,input):
        self.input=input
        tm=np.exp(input)
        self.output=tm/np.sum(tm)
        return self.output


    def backward(self,output_error,learning_rate):
        inp_error=np.zeros(output_error.shape)
        op=np.tile(self.output.T,self.input_sz)

        return self.output*np.dot(output_error,np.identity(self.input_sz)-op)






def negativeloglikelihood(y_true,y_pred):
    return np.mean(-y_true*np.log(y_pred)-(1-y_true)*np.log(1-y_pred))



def negativelog_backward(y_true,y_pred):
    return ((1-y_true)/(1-y_pred)-y_true/y_pred)/np.size(y_true)





(x_train,y_train),(x_test,y_test)=mnist.load_data()

x_train=x_train.astype('float32')
x_train/=255
y_train=np_utils.to_categorical(y_train)
x_train=x_train[0:2000]
y_train=y_train[0:2000]

x_test=x_test.astype('float32')
x_test/=255

y_test=np_utils.to_categorical(y_test)


network=[
    Reshape(inp_shape=(28,28)),
    Linear(784,128),
    sigmoidActivation(sigmoid,sigmoid_backward),
    HyperbolicTangent(hyperbolictangent,hyperbolic_backward),
    Linear(128,10),
    softmax(10)
]

epochs=10
learning_rate=0.1


for epoch in range(epochs):
    error=0
    for x,y_true in zip(x_train,y_train):
        op=x
        for layer in network:
            op=layer.forward(op)

        error+=negativeloglikelihood(y_true,op)
        output_error=negativelog_backward(y_true,op)

        for layer in reversed(network):
            output_error=layer.backward(output_error,learning_rate)
            
    error/=len(x_train)
    print('%d/%d,error=%f'%(epoch+1,epochs,error))
    
def predict(network,input):
    output=input
    for layer in network:
        output=layer.forward(output)

    return output

acc=sum([np.argmax(y)==np.argmax(predict(network,x)) for x,y in zip(x_test,y_test)])/len(x_test)
error=sum([MSE(y,predict(network,x)) for x,y in zip(x_test,y_test)])/len(x_test)

print('Accuracy',acc)
print('MSE',error)

samp=10
for te,tru in zip(x_test[:samp],y_test[:samp]):
    pred=predict(network,te)[0]
    id=np.argmax(pred)
    id_tru=np.argmax(tru)
    print('Predicted Value:%s, True Value:%d'%(id,id_tru))
    
    


1/10,error=0.332972
2/10,error=0.329427
3/10,error=0.322204
4/10,error=0.300393
5/10,error=0.260507
6/10,error=0.222607
7/10,error=0.185947
8/10,error=0.152595
9/10,error=0.124835
10/10,error=0.102718
Accuracy 0.8314
MSE 0.027641660830204143
Predicted Value:7, True Value:7
Predicted Value:2, True Value:2
Predicted Value:1, True Value:1
Predicted Value:0, True Value:0
Predicted Value:4, True Value:4
Predicted Value:1, True Value:1
Predicted Value:4, True Value:4
Predicted Value:9, True Value:9
Predicted Value:6, True Value:5
Predicted Value:9, True Value:9


In [75]:
network=[
    Reshape(inp_shape=(28,28)),
    Linear(784,128),
    sigmoidActivation(sigmoid,sigmoid_backward),
    HyperbolicTangent(hyperbolictangent,hyperbolic_backward),
    Linear(128,10),
    softmax(10)
]

epochs=20
learning_rate=1


for epoch in range(epochs):
    error=0
    for x,y_true in zip(x_train,y_train):
        op=x
        for layer in network:
            op=layer.forward(op)

        error+=negativeloglikelihood(y_true,op)
        output_error=negativelog_backward(y_true,op)

        for layer in reversed(network):
            output_error=layer.backward(output_error,learning_rate)
            
    error/=len(x_train)
    print('%d/%d,error=%f'%(epoch+1,epochs,error))
    
    
    
def predict(network,input):
    output=input
    for layer in network:
        output=layer.forward(output)

    return output

acc=sum([np.argmax(y)==np.argmax(predict(network,x)) for x,y in zip(x_test,y_test)])/len(x_test)
error=sum([MSE(y,predict(network,x)) for x,y in zip(x_test,y_test)])/len(x_test)

print('Accuracy',acc)
print('MSE',error)

samp=10
for te,tru in zip(x_test[:samp],y_test[:samp]):
    pred=predict(network,te)[0]
    id=np.argmax(pred)
    id_tru=np.argmax(tru)
    print('Predicted Value:%s, True Value:%d'%(id,id_tru))

1/20,error=0.277804
2/20,error=0.121712
3/20,error=0.067675
4/20,error=0.049538
5/20,error=0.035140
6/20,error=0.023774
7/20,error=0.017743
8/20,error=0.011884
9/20,error=0.008835
10/20,error=0.006263
11/20,error=0.004692
12/20,error=0.003899
13/20,error=0.003215
14/20,error=0.002719
15/20,error=0.002395
16/20,error=0.002022
17/20,error=0.001701
18/20,error=0.001411
19/20,error=0.001219
20/20,error=0.001049
Accuracy 0.901
MSE 0.015639931861205502
Predicted Value:7, True Value:7
Predicted Value:2, True Value:2
Predicted Value:1, True Value:1
Predicted Value:0, True Value:0
Predicted Value:4, True Value:4
Predicted Value:1, True Value:1
Predicted Value:4, True Value:4
Predicted Value:9, True Value:9
Predicted Value:2, True Value:5
Predicted Value:9, True Value:9
