In [99]:
import numpy as np

def sigmoid(z):
    return 1 / (1+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))

class NeuralNetwork():
        
    def __init__(self, nodes):
        
        length = len(nodes)
        self.length = length
        self.W = [np.random.randn(nodes[i], nodes[i-1]) for i in range(1, len(nodes))]
        self.B = [np.random.randn(nodes[i]) for i in range(1, len(nodes))]
    
    def forward(self, x):
        
        a = x
        for i in range(0, self.length-1):
            w = self.W[i]
            b = self.B[i]
            a = sigmoid(np.dot(w, a) + b)
        return a
        
    def compute_nablas(self, x, y):
        nabla_W =  [None] * len(self.W)
        nabla_B = [None] * len(self.B)
        
        a = x
        A = [x]
        Z = []
        
        for i in range(0, self.length-1):
            w = self.W[i]
            b = self.B[i]
            z = np.dot(w, a) + b
            a = sigmoid(z)
            
            Z.append(z)
            A.append(a)
    
        grad_loss = self.derivative_cost(A[-1], y)
        delta = grad_loss * sigmoid_prime(Z[-1])
        for i in range(self.length-1, 0, -1):
            
            nabla_B[i-1] = delta
            nabla_W[i-1] = np.dot(delta[:, None], (A[i-1][:, None]).T)
            
            if i > 1:
                w = self.W[i-1]
                sig_prime = sigmoid_prime(Z[i-2])
                
                #print('delta shape : {} ; w shape : {} ; sigmoid shape : {}'.format(delta.shape, w.shape, sig_prime.shape))
                delta = np.dot(np.transpose(w), delta) * sig_prime
        
        return nabla_W, nabla_B
        
    def train(self, training_set, epoch=10, eta=0.1, test_set=None):
        
        x_train, y_train = training_set
        for i in range(epoch):
            
            nabla_b = [np.zeros(b.shape) for b in self.B]
            nabla_w = [np.zeros(w.shape) for w in self.W]
            
            for (x, y) in zip(x_train, y_train):
                nW, nB = self.compute_nablas(x, y)
                self.W = [w-eta*nw for w, nw in zip(self.W, nW)]
                self.B = [b-eta*nb for b, nb in zip(self.B, nB)]
            
            if test_set:
                x_test, y_test = test_set
                predictions = [self.forward(x) for x in x_test]
                
                count = 0
                for j in range(len(predictions)):
                    if np.argmax(predictions[j]) == np.argmax(y_test[j]):
                        count += 1
                
                print('Epoch {} : {} / {}'.format(i, count, len(y_test) ))

    """
    Mean squared error
    """
    def derivative_cost(self, pred, true):
        return pred - true

In [None]:
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

b = np.zeros((y_train.size, y_train.max()+1))
b[np.arange(y_train.size),y_train] = 1

y_train = b

b = np.zeros((y_test.size, y_test.max()+1))
b[np.arange(y_test.size),y_test] = 1

y_test = b

x_train = x_train.reshape((60000, 784))
x_test = x_test.reshape((10000, 784))

In [104]:
nn = NeuralNetwork([784, 100, 100, 10])
nn.train((x_train, y_train), 50, 0.01, (x_test, y_test))

  after removing the cwd from sys.path.


Epoch 0 : 4357 / 10000
Epoch 1 : 6034 / 10000
Epoch 2 : 7058 / 10000
Epoch 3 : 7388 / 10000
Epoch 4 : 7645 / 10000
Epoch 5 : 7711 / 10000
Epoch 6 : 7933 / 10000
Epoch 7 : 8153 / 10000
Epoch 8 : 8084 / 10000
Epoch 9 : 8246 / 10000
Epoch 10 : 8203 / 10000
Epoch 11 : 8318 / 10000
Epoch 12 : 8306 / 10000
Epoch 13 : 8350 / 10000
Epoch 14 : 7995 / 10000
Epoch 15 : 8200 / 10000
Epoch 16 : 8301 / 10000
Epoch 17 : 8446 / 10000
Epoch 18 : 8503 / 10000
Epoch 19 : 8512 / 10000
Epoch 20 : 8458 / 10000
Epoch 21 : 8443 / 10000
Epoch 22 : 8506 / 10000
Epoch 23 : 8677 / 10000
Epoch 24 : 8593 / 10000
Epoch 25 : 8511 / 10000
Epoch 26 : 8694 / 10000
Epoch 27 : 8722 / 10000
Epoch 28 : 8758 / 10000
Epoch 29 : 8725 / 10000
Epoch 30 : 8606 / 10000
Epoch 31 : 8728 / 10000
Epoch 32 : 8715 / 10000
Epoch 33 : 8785 / 10000
Epoch 34 : 8781 / 10000
Epoch 35 : 8810 / 10000
Epoch 36 : 8758 / 10000
Epoch 37 : 8709 / 10000
Epoch 38 : 8780 / 10000
Epoch 39 : 8863 / 10000
Epoch 40 : 8723 / 10000
Epoch 41 : 8848 / 10000
Ep

In [105]:
nn_2 = NeuralNetwork([784, 32, 10])
nn_2.train((x_train, y_train), 15, 0.01, (x_test, y_test))

  after removing the cwd from sys.path.


Epoch 0 : 4745 / 10000
Epoch 1 : 5451 / 10000
Epoch 2 : 5986 / 10000
Epoch 3 : 6368 / 10000
Epoch 4 : 6756 / 10000
Epoch 5 : 6902 / 10000
Epoch 6 : 6838 / 10000
Epoch 7 : 7049 / 10000
Epoch 8 : 7127 / 10000
Epoch 9 : 7129 / 10000
Epoch 10 : 7059 / 10000
Epoch 11 : 7137 / 10000
Epoch 12 : 7254 / 10000
Epoch 13 : 7310 / 10000
Epoch 14 : 7433 / 10000


6 6


  after removing the cwd from sys.path.
