In [1]:
import numpy as np
import keras
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)



(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [4]:
#apply oop
class NeuralNetwork:
    
    def __init__(self, activation_dims):
        
        self.activation_dims = activation_dims
        self.layers = len(activation_dims)
        self.network = {}
        self.gradient = {}
        
        self.create_network()
        
    def create_network(self):
        
        for layer in range(self.layers):
            self.network[layer+1] = {}
            
            if (layer+1 == 1):
  
                self.network[layer+1]["activations"] = self.sigmoid(np.empty(self.activation_dims[layer]).reshape(-1,1)) 
                
            else:
                self.network[layer+1]["activations"] = np.random.rand(self.activation_dims[layer]).reshape(-1,1)
                self.network[layer+1]["weights"] = np.random.uniform(-0.5, 0.5, size = self.activation_dims[layer]*self.activation_dims[layer-1]).reshape(self.activation_dims[layer], self.activation_dims[layer-1])
                self.network[layer+1]["biases"] = np.random.uniform(-0.5, 0.5, size = self.activation_dims[layer]).reshape(-1,1)
    
    def create_gradient(self):
        
        g = {}
        for layer in range(2, self.layers+1):
            g[layer] = {}
            
            g[layer]["weights"] = np.zeros(self.network[layer]["weights"].shape)
            g[layer]["biases"] = np.zeros(self.network[layer]["biases"].shape)

        return g
       
    def calculate_z(self, a, w, b):

        """caculate z from z = a*w + b"""

        return np.dot(w, a) + b

    def sigmoid(self, x):
    
        """return value of sigmoid function of x"""

        return 1/(1+np.exp(-x))

    def cost_function_derivative(self, a, y):
        
        """return value of the cost function derivative of a and y"""

        return 2*(a-y)

    def sigmoid_derivative(self, x):
        
        """reuturn value of sigmoid derivative of x"""

        return (1/(1+np.exp(-x))**2)*np.exp(-x)

    def forward(self, data, index):
        
        """calculate activations in each layer"""
        
        for layer in range(1, self.layers+1):
            
            if (layer == 1):
  
                # self.network[layer]["activations"] = self.sigmoid(data[index].reshape(-1,1))
                self.network[layer]["activations"] = (data[index].reshape(-1,1))/255
                
            else:
    
                self.network[layer]["activations"] = self.sigmoid(self.calculate_z(self.network[layer-1]["activations"], self.network[layer]["weights"], self.network[layer]["biases"]))
            
        
    def back(self, l, g, e = None):

        """compute backpropagation and return gradient of the network"""
        
        n = self.network
        
        if (l == 1):
            return g
        else:
            z = self.calculate_z(n[l-1]["activations"], n[l]["weights"], n[l]["biases"])
            da_by_dz = self.sigmoid_derivative(z)

            if (l == len(n)):
                dc_by_da = self.cost_function_derivative(n[l]["activations"], e)
            else:
                dc_by_da = np.sum(g[l+1]["biases"]*n[l+1]["weights"], axis = 0).reshape(-1,1)
                # dc_by_da = np.dot(n[l+1]["weights"], g[l+1["biases"]])

            g[l]["weights"] = n[l-1]["activations"].reshape(-1)*dc_by_da*da_by_dz
            g[l]["biases"] = dc_by_da*da_by_dz
            g = self.back(l-1,g)
            return g

    
    def add_gradient(self, g1, g2):

        """return added gradient"""

        w = "weights"
        b = "biases"
        l = len(g1)
        
        for i in range(2, l+2, 1):
            g1[i][w] += g2[i][w]
            g1[i][b] += g2[i][b]

        return g1

    def average_gradient(self, g, batch_size):

        """averaging the gradient with batch size and return averaged gradient"""

        w = "weights"
        b = "biases"
        for i in range(2, self.layers + 1, 1):
            g[i][w]/=batch_size
            g[i][b]/=batch_size

        return g

    def graident_descent(self, g, lr):

        """compute graident descent"""
        
        w = "weights"
        b = "biases"
        for i in range(2, self.layers + 1, 1):
            self.network[i][w] -= lr*g[i][w]
            self.network[i][b] -= lr*g[i][b]
            
    
    def train():
        pass
    
    def test():
        pass
    
        

In [5]:
n = NeuralNetwork([784, 32, 16, 10])

learning_rate = 0.02
epochs = 3
batch_size = 10

print("activation dimensions :", n.activation_dims)
print("learning rate :", learning_rate)
print("epochs :", epochs)
print("batch_size :", batch_size)

for i in range(epochs):
    index = 0
    for j in range(0, 60000, batch_size):
        
        n.gradient = n.create_gradient()
        
        for k in range(batch_size):
            
            n.forward(x_train, index)
            expect_num = y_train[index]
            expect_output = np.zeros(10)
            expect_output[expect_num] = 1
            expect_output = expect_output.reshape((-1,1))
            n.gradient = n.add_gradient(n.gradient, n.back(l = n.layers, g = n.create_gradient(), e = expect_output))
            index += 1  
        
        n.gradient = n.average_gradient(n.gradient, batch_size)
        n.graident_descent(n.gradient, learning_rate)

    index = 0
    correct = 0
    for j in range(10000):
        n.forward(x_test, index)
        expect_num = y_test[index]
        
        output = n.network[n.layers]["activations"]
        if(np.argmax(output) == expect_num):
            correct += 1
        index += 1
        
    print("accuracy", i + 1, ":", (correct/index)*100, "%")

activation dimensions : [784, 32, 16, 10]
learning rate : 0.02
epochs : 3
batch_size : 10
accuracy 1 : 30.930000000000003 %
accuracy 2 : 55.25 %
accuracy 3 : 72.63 %
