# Write simple gradient desent in a neural network

you may use numpy


In [52]:
from copy import deepcopy
import numpy as np
import random
import json

# the sigmoid function
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

# the derivative of the sigmoid function
def sigmoidDerivative(z):
    return sigmoid(z) * (1 - sigmoid(z))

def relu(z, leaky):
    if z > 0:
        return z
    else:
        return leaky
    
def reluDerivative(z):
    if z > 0:
        return 1
    else:
        return 0


class NN:
    
    def __init__(self, sizes, activation_func = 'sig'):
        self.numLayers = len(sizes)
        self.sizes = sizes

        if activation_func == 'sig':
            self.activationVector = np.vectorize(sigmoid)
            self.activationPrimeVector = np.vectorize(sigmoidDerivative)
        else:
            self.activationVector = np.vectorize(relu)
            self.activationPrimeVector = np.vectorize(reluDerivative)

        # randomly initialize weights and biases
        # biases
        self.biases = [np.random.randn(n, 1) for n in sizes[1:]]
        # weight matrices
        self.weights = [np.random.randn(m, n) for n, m in zip(sizes[:-1], sizes[1:])]

    # feedForward function - return the output of the network
    def feedForward(self, inputs):
        outputs = deepcopy(inputs)
        for b, w in zip(self.biases, self.weights):
            outputs = self.activationVector(np.dot(w, outputs) + b)
        return outputs

    # train function - train the neural network using mini-batch stochastic gradient descent
    # the trainingData is a list of tuples "(x, y)" representing the training inputs and the desired outputs
    # if testData is provided then the network will be evaluated against the test data after each epoch
    def train(self, trainingData, epochs, miniBatchSize, alpha, testData = None):
        if testData:
            nTest = len(testData)

        n = len(trainingData)
        for j in range(epochs):
            random.shuffle(trainingData)

            miniBatches = [trainingData[k:k + miniBatchSize] for k in range(0, n, miniBatchSize)]
            for miniBatch in miniBatches:
                self.updateMiniBatch(miniBatch, alpha)

            if testData:
                print("Epoch {}: {} / {}".format(j, self.evaluate(testData), nTest))
            else:
                print("Epoch {} complete".format(j))


    def updateMiniBatch(self, miniBatch, alpha):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        for x, y in miniBatch:
            delta_nabla_b, delta_nabla_w = self.backPropagate(x, y)
            #accumulate deltas actoss minibatch
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        # update for whole minibatch
        self.weights = [w - (alpha / len(miniBatch)) * nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (alpha / len(miniBatch)) * nb for b, nb in zip(self.biases, nabla_b)]

        
    def backPropagate(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        x = np.array([x]).T
        y = np.array([y])

        # feedForward
        activation = x
        activations = [x] # list to store all of the activations, layer by layer
        zs = [] # list to store all of the z vectors, layer by layer

        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)

            activation = self.activationVector(z)
            activations.append(activation)

        # backward pass
        # output layer
        cd = self.costDerivative(activations[-1], y).T
        delta = cd * self.activationPrimeVector(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())

        # inner layers
        for l in range(2, self.numLayers):
            spv = self.activationPrimeVector(zs[-l])
            w = self.weights[-l + 1].transpose()
            delta = np.dot(w, delta) * spv

            nabla_b[-l] = delta
            a = activations[-l - 1].transpose()
            nabla_w[-l] = np.dot(delta, a)

        return (nabla_b, nabla_w)

    def evaluate(self, testData):
        x, y = zip(*testData)
        yp = self.feedForward(np.array(x).T).T.tolist()
        nc = 0
        for ypi, yi in zip(yp, y):
            if np.argmax(ypi) == np.argmax(yi):
                nc += 1
        return nc
    
    def costDerivative(self, outputActivations, y):
        return (outputActivations.T - y)

    # save function - save the neural network to filename
    def save(self, filename):
        data = {
            "sizes": self.sizes,
            "weights": [w.tolist() for w in self.weights],
            "biases": [b.tolist() for b in self.biases]
        }

        with open(filename, "w") as f:
            json.dump(data, f, indent=4, sort_keys=True)

# load function - load a neural network from the file filename
# returns a network instance
def loadData():
    with open("iris_data.json", "r") as f:
        data = json.load(f)
    return (data['y'], data['x'])


In [54]:
net_output_path = "trained_network.json"

y, x = loadData()
data = list(zip(x, y))
sizes = [4,16,8,3]
epochs = 100
miniBatchSize = len(data) // 10
alpha = 0.05

network = NN(sizes)
network.train(data, epochs, miniBatchSize, alpha, data)

network.save(net_output_path)

Epoch 0: 50 / 150
Epoch 1: 50 / 150
Epoch 2: 50 / 150
Epoch 3: 50 / 150
Epoch 4: 50 / 150
Epoch 5: 50 / 150
Epoch 6: 50 / 150
Epoch 7: 61 / 150
Epoch 8: 76 / 150
Epoch 9: 57 / 150
Epoch 10: 76 / 150
Epoch 11: 93 / 150
Epoch 12: 96 / 150
Epoch 13: 100 / 150
Epoch 14: 100 / 150
Epoch 15: 100 / 150
Epoch 16: 100 / 150
Epoch 17: 100 / 150
Epoch 18: 100 / 150
Epoch 19: 100 / 150
Epoch 20: 100 / 150
Epoch 21: 100 / 150
Epoch 22: 100 / 150
Epoch 23: 101 / 150
Epoch 24: 102 / 150
Epoch 25: 104 / 150
Epoch 26: 104 / 150
Epoch 27: 104 / 150
Epoch 28: 104 / 150
Epoch 29: 104 / 150
Epoch 30: 104 / 150
Epoch 31: 104 / 150
Epoch 32: 105 / 150
Epoch 33: 105 / 150
Epoch 34: 106 / 150
Epoch 35: 106 / 150
Epoch 36: 107 / 150
Epoch 37: 108 / 150
Epoch 38: 109 / 150
Epoch 39: 109 / 150
Epoch 40: 109 / 150
Epoch 41: 110 / 150
Epoch 42: 113 / 150
Epoch 43: 113 / 150
Epoch 44: 114 / 150
Epoch 45: 116 / 150
Epoch 46: 116 / 150
Epoch 47: 116 / 150
Epoch 48: 116 / 150
Epoch 49: 117 / 150
Epoch 50: 117 / 150
Epo