In [1]:
import tensorflow as tf
import numpy as np
mnist = tf.keras.datasets.mnist

In [16]:
class layer():
    def __init__(self,inputs,units,activation='relu',use_bias=True,kernel_initializer=None,bias_regularizer=False,dropout=False):
        self.inputs = inputs
        self.units = units
        self.activation = activation
        self.use_bias = use_bias
#         self.kernel_regularizer=kernel_regularizer
        self.bias_regularizer=bias_regularizer
        self.dropout = dropout
        self.w = np.random.rand(self.units,self.inputs) * 0.001
        self.b = np.random.rand(self.units,1)
        self.inward = 0
        self.z = 0
        self.a = 0
        self.dw = 0
        self.db = 0
        self.da = 0
        
    def sigmoid(self, x):
        #applying the sigmoid function
        return 1 / (1 + np.exp(-x))

    def sigmoid_d(self, x):
        return x * (1 - x)
    
    def softmax(self,x):
        x = x - np.mean(x)
#         print x
        expx = np.exp(x)
        return expx / expx.sum()
    
    def tanh(self, x):
        #applying the tanh function
        return 1 / (1 + np.exp(-x))

    def tanh_d(self, x):
        return (1 + x) * (1 - x)
    
    def relu(self,x):
        return np.maximum(0,x) 
    
    def relu_d(self,x):
        x[x<=0] = 0
        x[x>0] = 1
        return x

    def update(self,alpha):
#         print "W = ",self.w
#         print "b = ",self.b
#         print "dW = ",self.dw
#         print "db = ",self.db
        self.w += alpha * self.dw
        self.b += alpha * self.db
        
    
    def forward(self,inward):
        inward = inward.reshape((inward.shape[0],1))
        self.inward = inward
        self.z = np.dot(self.w,inward) 
        self.a = self.z
        if(self.activation == 'sigmoid'):
            self.a = self.sigmoid(self.a)
        elif(self.activation == 'tanh'):
            self.a = self.tanh(self.a)
        elif(self.activation == 'relu'):
            self.a = self.relu(self.a)
        elif(self.activation == 'softmax'):
            self.a = self.softmax(self.a)
        else:
            pass
        return self.a
            
    def backward(self,a):
#         print a.shape
        if(self.activation == 'sigmoid'):
            self.dz = np.multiply(a,self.sigmoid(self.z))
        elif(self.activation == 'tanh'):
            self.dz = np.multiply(a,self.tanh(self.z))
        elif(self.activation == 'relu'):
            self.dz = np.multiply(a,self.relu(self.z))
        elif(self.activation == 'softmax'):
            self.dz = a
        else:
            self.dz = 0 * a
            
        self.dw = np.dot(self.dz, self.inward.T)
        self.db = np.sum(self.dz, axis=1, keepdims=True)
        next_a = np.dot(self.w.T, self.dz)
        return next_a

In [54]:
class model():
    def __init__(self,data,data_label,accuracy=0.9,alpha=0.005,):
        self.layers = []
        self.alpha = alpha
        self.accuracy = accuracy
        self.data = data
        self.data_label = data_label 
        self.cm = np.zeros((self.data_label.shape[-1],self.data_label.shape[-1]),dtype=int)
        self.number_of_class = self.data_label.shape[-1]
        
    def add(self,layer):
        self.layers.append(layer)
    
    def train(self):
        i = 0
        while(i < 100):
            print i
            self.cm = np.zeros((self.data_label.shape[-1],self.data_label.shape[-1]),dtype=int)
            self.do_epoch()
            self.get_accuracy()
            i+=1
#             print self.layers[0].dw

    def get_accuracy(self):
        print self.cm
        print np.trace(self.cm)/(1.0*np.sum(self.cm))

    def update(self):
        for layer in self.layers:
            layer.update(self.alpha)
    
    def get_cost(self,y,y_label):
        y_label = y_label.reshape((y_label.shape[0],1))
#         print y_label,y
        if(self.number_of_class > 2):
            return (y_label - y)
        else:
            cost = (np.multiply(y, np.log(y_label)) + np.multiply(1 - y, np.log(1 - y_label)))
            return cost
    
    def predict(self,sample):
        for layer in self.layers:
            sample = layer.forward(sample)
        return sample.argmax()
    
    def do_epoch(self):
        total_cost = 0
        for j in range(len(self.data)):
            y_label = self.data_label[j]
            x = self.data[j]
            y = self.forprop(x)
            self.cm[y_label.argmax()][y.argmax()] += 1
            loss = self.get_cost(y,y_label)
#             print "loss = ",loss
            self.backprop(loss)
            self.update()
#             total_cost += loss
            
    def forprop(self,sample):
        for layer in self.layers:
#             print sample
            sample = layer.forward(sample)
                  
        return sample       
    
    def backprop(self,loss):
        sample = loss
        for i in range(len(self.layers)-1,-1,-1):
            sample = self.layers[i].backward(sample)
        

In [55]:
(x_train, y_train_label),(x_test, y_test_label) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [56]:
x_train = x_train.reshape([60000,784])
x_test = x_test.reshape([10000,784])
y_train = np.zeros((y_train_label.shape[0], 10))
y_train[np.arange(y_train_label.shape[0]), y_train_label] = 1
y_test = np.zeros((y_test_label.shape[0], 10))
y_test[np.arange(y_test_label.shape[0]), y_test_label] = 1

In [58]:
nn = model(x_train,y_train,0.6)
layer1 = layer(784,196)
layer2 = layer(196,58)
layer3 = layer(58,10,activation='softmax')
nn.add(layer1)
nn.add(layer2)
nn.add(layer3)
nn.train()

0
[[5903    0   16    3    0    0    0    0    1    0]
 [6719    0   21    1    0    0    0    0    1    0]
 [5942    0   15    0    0    0    0    0    1    0]
 [6115    0   15    1    0    0    0    0    0    0]
 [5823    0   14    4    0    0    0    0    1    0]
 [5409    0   11    0    0    0    0    0    1    0]
 [5901    0   16    1    0    0    0    0    0    0]
 [6251    0   13    1    0    0    0    0    0    0]
 [5834    0   16    1    0    0    0    0    0    0]
 [5925    0   21    2    0    0    0    0    1    0]]
0.09865
1
[[5549    0   72   86    0  122   60    2   32    0]
 [6485    0   68   58    0   47   61    0   23    0]
 [5619    0   70   99    0   87   60    2   21    0]
 [5809    0   62   78    0  100   51    0   31    0]
 [5606    0   58   54    0   52   49    0   23    0]
 [5141    0   56   69    0   84   43    1   27    0]
 [5616    0   64   69    0  102   48    1   18    0]
 [5943    0   66   67    0  103   62    0   24    0]
 [5481    0   76   91    0  113  

KeyboardInterrupt: 

[-1.5 -0.5  0.5  1.5]
[0.0320586  0.08714432 0.23688282 0.64391426]
