# Write simple gradient desent in a neural network

you may use numpy


In [13]:
import numpy
import random
import json

# the sigmoid function
def sigmoid(z):
    return 1.0 / (1.0 + numpy.exp(-z))

# the derivative of the sigmoid function
def sigmoidDerivative(z):
    return sigmoid(z) * (1 - sigmoid(z))

def reluActivation(z, leaky = 0):
    if z > 0:
        return z
    else:
        return leaky
    
def reluPrime(z):
    if z > 0:
        return 1
    else:
        return 0

# relu vectors
reluVector = numpy.vectorize(reluActivation)
reluPrimeVector = numpy.vectorize(reluPrime)


class NN:
    
    def __init__(self, sizes):
        self.numLayers = len(sizes)
        self.sizes = sizes

        # randomly initialize weights and biases
        # biases
        self.biases = [numpy.random.randn(y, 1) for y in sizes[1:]]
        # weight matrices
        self.weights = [numpy.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]

    # feedForward function - return the output of the network
    def feedForward(self, inputs):
        for b, w in zip(self.biases, self.weights):
            inputs = reluVector(numpy.dot(w, inputs) + b)
        return inputs

    # train function - train the neural network using mini-batch stochastic gradient descent
    # the trainingData is a list of tuples "(x, y)" representing the training inputs and the desired outputs
    # if testData is provided then the network will be evaluated against the test data after each epoch
    def train(self, trainingData, epochs, miniBatchSize, alpha, testData = None):
        if testData:
            nTest = len(testData)

        n = len(trainingData)
        for j in range(epochs):
            random.shuffle(trainingData)

            miniBatches = [trainingData[k:k + miniBatchSize] for k in range(0, n, miniBatchSize)]
            for miniBatch in miniBatches:
                self.updateMiniBatch(miniBatch, alpha)

            if testData:
                print("Epoch {}: {} / {}".format(j, self.evaluate(testData), nTest))
            else:
                print("Epoch {} complete".format(j))


    def updateMiniBatch(self, miniBatch, alpha):
        nabla_b = [numpy.zeros(b.shape) for b in self.biases]
        nabla_w = [numpy.zeros(w.shape) for w in self.weights]

        for x, y in miniBatch:
            delta_nabla_b, delta_nabla_w = self.backPropagate(x, y)

            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

        self.weights = [w - (alpha / len(miniBatch)) * nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (alpha / len(miniBatch)) * nb for b, nb in zip(self.biases, nabla_b)]

        
    def backPropagate(self, x, y):
        nabla_b = [numpy.zeros(b.shape) for b in self.biases]
        nabla_w = [numpy.zeros(w.shape) for w in self.weights]

        x = numpy.array(x)
        y = numpy.array(y)

        # feedForward
        activation = x
        activations = [x] # list to store all of the activations, layer by layer
        zs = [] # list to store all of the z vectors, layer by layer

        for b, w in zip(self.biases, self.weights):
            z = numpy.dot(w, activation) + b
            zs.append(z)

            activation = reluVector(z)
            activations.append(activation)

        # backward pass
        delta = self.costDerivative(activations[-1], y) * reluPrimeVector(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = numpy.dot(delta, activations[-2].transpose())

        for l in range(2, self.numLayers):
            spv = reluPrimeVector(zs[-l])

            delta = numpy.dot(self.weights[-l + 1].transpose(), delta) * spv

            nabla_b[-l] = delta
            nabla_w[-l] = numpy.dot(delta, activations[-l - 1].transpose())

        return (nabla_b, nabla_w)

    def evaluate(self, testData):
        testResults = [(numpy.argmax(self.feedForward(x)), y) for (x, y) in testData]
        return sum(int(x == y) for (x, y) in testResults)
    
    def costDerivative(self, outputActivations, y):
        return (outputActivations - y)

    # save function - save the neural network to filename
    def save(self, filename):
        data = {
            "sizes": self.sizes,
            "weights": [w.tolist() for w in self.weights],
            "biases": [b.tolist() for b in self.biases]
        }

        with open(filename, "w") as handle:
            json.dump(data, handle)

# load function - load a neural network from the file filename
# returns a network instance
def loadData():
    with open("iris_data.json", "r") as handle:
        data = json.load(handle)
    return (data['y'], data['x'])


In [14]:
y, x = loadData()
data = list(zip(x, y))
sizes = [4,8,8,3]
epochs = 5
miniBatchSize = len(data) // 10
alpha = 0.1

network = NN(sizes)
network.train(data, epochs, miniBatchSize, alpha)

ValueError: operands could not be broadcast together with shapes (3,8) (3,) 