In [1]:
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def gradSigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))
# def relu(x):
#     return (abs(x) + x) / 2

# def input(x):
#     return x

import random

class NN():

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(sizes[:-1], sizes[1:])]

    def forward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def train(self, x_train, y_train, epochs, mini_batch_size, lr):

        for j in range(epochs):
            p = np.random.permutation(len(x_train))
            x_train = x_train[p]
            y_train = y_train[p]
            
            x_batches = [
                x_train[k:k+mini_batch_size]
                for k in range(0, len(x_train), mini_batch_size)]
            
            y_batches = [
                y_train[k:k+mini_batch_size]
                for k in range(0, len(y_train), mini_batch_size)]
            
            for x, y in tqdm(list(zip(x_batches, y_batches))):
                self.handle_batch(x, y, lr)

            print("Epoch {} complete".format(j+1))

    def handle_batch(self, x, y, lr):
        dCdbfull = [np.zeros(b.shape) for b in self.biases]
        dCdWfull = [np.zeros(w.shape) for w in self.weights]
        
        for x_sample, y_sample in zip(x, y):
            dCdb, dCdW = self.backprop(x_sample, y_sample)
            dCdbfull = [nb+dnb for nb, dnb in zip(dCdbfull, dCdb)]
            dCdWfull = [nw+dnw for nw, dnw in zip(dCdWfull, dCdW)]
            
        self.weights = [w - (lr/len(x_sample))*nw
                        for w, nw in zip(self.weights, dCdWfull)]
        self.biases = [b - (lr/len(x_sample))*nb
                       for b, nb in zip(self.biases, dCdbfull)]

    def backprop(self, x, y):
        a = x
        a_history = [x]
        z_history = [] 
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, a) + b
            z_history.append(z)
            a = sigmoid(z)
            a_history.append(a)
            
        errorL = (a_history[-1] - y) * gradSigmoid(z_history[-1])
        
        dCdW = [np.zeros(w.shape) for w in self.weights]
        dCdb = [np.zeros(b.shape) for b in self.biases]

        dCdW[-1] = np.dot(errorL, a_history[-2].transpose())
        dCdb[-1] = errorL
        
        # print("Len z history:"+str(len(z_history)))  2
        # print("Len weights:"+str(len(self.weights))) 2
        # print("Len a history:"+str(len(a_history)))  3
        # print()
        for l in range(self.num_layers-1, 1, -1):
            z = z_history[l-2]
            sp = gradSigmoid(z)
            errorL = np.dot(self.weights[l-1].transpose(), errorL) * sp
            dCdW[-l] = np.dot(errorL, a_history[l-2].transpose())
            dCdb[-l] = errorL
        return (dCdb, dCdW)

    def evaluate(self, test_data):
        test_results = [(np.argmax(self.forward(x)), np.argmax(y))
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results) / len(test_results)
