In [19]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from IPython.html.widgets import interact

from sklearn.datasets import load_digits
digits = load_digits()

In [20]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

sigmoid_v = np.vectorize(sigmoid)

def sigmoidprime(x):
    return sigmoid(x) * (1 - sigmoid(x))

sigmoidprime_v = np.vectorize(sigmoidprime)

In [21]:
size = [64, 20, 10]

weights = []
for n in range(1, len(size)):
    weights.append(np.random.rand(size[n-1], size[n]) * 2 - 1)

biases = []
for n in range(1, len(size)):
    biases.append(np.random.rand(size[n]) * 2 - 1)

trainingdata = digits.data[0:1200]
traininganswers = digits.target[0:1200]
lc = 0.02

#convert the integer answers into a 10-dimension array
traininganswervectors = np.zeros((1796,10))
for n in range(1796):
    traininganswervectors[n][digits.target[n]] = 1

In [22]:
def feedforward(weights, biases, a):
    b = []
    #first element is inputs "a"
    b.append(a)
    for n in range(1, len(size)):
        #all other elements depend on the number of neurons
        b.append(np.zeros(size[n]))
        for n2 in range(0, size[n]):
            b[n][n2] = sigmoid_v(np.dot(weights[n-1][0:,n2], b[n-1]) + biases[n-1][n2])
      
    return b

In [115]:
feedforward(weights, biases, trainingdata[0])

[array([  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.,   0.,   0.,  13.,
         15.,  10.,  15.,   5.,   0.,   0.,   3.,  15.,   2.,   0.,  11.,
          8.,   0.,   0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.,   0.,
          5.,   8.,   0.,   0.,   9.,   8.,   0.,   0.,   4.,  11.,   0.,
          1.,  12.,   7.,   0.,   0.,   2.,  14.,   5.,  10.,  12.,   0.,
          0.,   0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]),
 array([  9.90442338e-01,   2.44256528e-09,   1.00000000e+00,
          1.00000000e+00,   5.51988812e-07,   3.78325361e-12,
          1.00000000e+00,   9.99999971e-01,   8.37342711e-01,
          9.99607802e-01,   9.82783336e-01,   9.99997482e-01,
          9.57307764e-01,   1.00000000e+00,   1.00000000e+00,
          1.00000000e+00,   9.84974834e-01,   4.64684702e-03,
          1.00000000e+00,   8.02410452e-03]),
 array([ 0.44779612,  0.87163994,  0.92769695,  0.44619154,  0.06217504,
         0.5942893 ,  0.60903872,  0.9745915 ,  0.09787074,  0.12295571])]

In [116]:
def gradient_descent(weights, biases, inputs, answers, batchsize, lc, epochs):
    for n in range(epochs):
        #pick random locations for input/result data
        locations = np.random.randint(0, len(inputs), batchsize)
        minibatch = []
        #create tuples (inputs, result) based on random locations
        for n2 in range(batchsize):
            minibatch.append((inputs[locations[n2]], answers[locations[n2]]))
        for n3 in range(batchsize):
            weights, biases = train(weights, biases, minibatch, lc)
        
        
        results = []
        for n4 in range(len(trainingdata)):
            results.append(feedforward(weights, biases, inputs[n4])[-1])
            
        accresult = accuracy(inputs, results, answers)
        print("Epoch ", n, " : ", accresult)
        
    return weights, biases

In [117]:
def train(weights, biases, minibatch, lc):
    #set the nabla functions to be the functions themselves initially, same size
    nb = [np.zeros(b.shape) for b in biases]
    nw = [np.zeros(w.shape) for w in weights]
    #largely taken from Michael Nielsen's implementation
    for i, r in minibatch:
        dnb, dnw = backprop(weights, biases, i, r)
        nb = [a+b for a, b in zip(nb, dnb)]
        nw = [a+b for a, b in zip(nw, dnw)]
    
    weights = [w-(lc/len(minibatch))*n_w for w, n_w in zip(weights, nw)]
    biases = [b-(lc/len(minibatch))*n_b for b, n_b in zip(biases, nb)]
    return weights, biases

In [118]:
def backprop(weights, biases, inputs, answers):
    #set the nabla functions to be the same size as functions
    nb = [np.zeros(b.shape) for b in biases]
    nw = [np.zeros(w.shape) for w in weights]
    a = inputs
    alist = [inputs]
    zlist = []
    #from feedforward
    for n in range(1, len(size)):
        #all other elements depend on the number of neurons
        zlist.append(np.zeros(size[n]))
        alist.append(np.zeros(size[n]))
        for n2 in range(1, size[n]):
            zlist[n-1][n2] = np.dot(weights[n-1][0:,n2], alist[n-1]) + biases[n-1][n2]
            alist[n][n2] = sigmoid_v(alist[n-1][n2])
    
    delta = costderivative(alist[-1], answers) * sigmoidprime_v(zlist[-1])
    nb[-1] = delta
    #different from MN, alist[-2] not same size as delta?
    nw[-1] = np.dot(delta, alist[-1].transpose())
    
    for n in range(2, len(size)):
        delta = np.dot(weights[-n+1], delta) * sigmoidprime_v(zlist[-n])
        nb[-n] = delta
        #same here
        nw[-n] = np.dot(delta, alist[-n].transpose())
    
    return nb, nw

In [119]:
def costderivative(output, answers):
    return (output - answers)

In [120]:
def accuracy(inputs, results, answers):
    correct = 0
    binresults = results
    for n in range(0, len(results)):
        #converts the output into a binary y/n for each digit
        for n2 in range(len(results[n])):
            if results[n][n2] == np.amax(results[n]):
                binresults[n][n2] = 1
            else:
                binresults[n][n2] = 0
        
        if np.array_equal(answers[n], binresults[n]):
            correct += 1
    return correct / len(results)

In [124]:
size = [64, 20, 10]

weights = []
for n in range(1, len(size)):
    weights.append(np.random.rand(size[n-1], size[n]) * 2 - 1)

biases = []
for n in range(1, len(size)):
    biases.append(np.random.rand(size[n]) * 2 - 1)

trainingdata = digits.data[0:1000]
traininganswers = digits.target[0:1000]

traininganswervectors = np.zeros((1000,10))
for n in range(1000):
    traininganswervectors[n][digits.target[n]] = 1

In [125]:
final_weights, final_biases = gradient_descent(weights, biases, trainingdata,
                                              traininganswervectors, 5, 1, 30)

print(final_weights)

Epoch  0  :  0.048
Epoch  1  :  0.093
Epoch  2  :  0.095
Epoch  3  :  0.101
Epoch  4  :  0.097
Epoch  5  :  0.095
Epoch  6  :  0.101
Epoch  7  :  0.101
Epoch  8  :  0.105
Epoch  9  :  0.091
Epoch  10  :  0.09
Epoch  11  :  0.091
Epoch  12  :  0.091
Epoch  13  :  0.09
Epoch  14  :  0.092
Epoch  15  :  0.091
Epoch  16  :  0.091
Epoch  17  :  0.091
Epoch  18  :  0.09
Epoch  19  :  0.09
Epoch  20  :  0.089
Epoch  21  :  0.089
Epoch  22  :  0.081
Epoch  23  :  0.14
Epoch  24  :  0.139
Epoch  25  :  0.125
Epoch  26  :  0.117
Epoch  27  :  0.113
Epoch  28  :  0.114
Epoch  29  :  0.114
[array([[ 0.76450383,  0.59251208, -0.33663917, ...,  0.03105159,
        -0.50664191,  0.55243318],
       [-0.32381734, -0.19473504,  0.47964496, ..., -0.74232456,
         0.63391058,  0.11945287],
       [ 0.6231575 ,  0.86058574,  0.88342131, ..., -0.22242994,
         0.25655237, -0.23629923],
       ..., 
       [-0.36932122, -0.3959529 ,  0.81687002, ..., -0.97764035,
        -0.32230678,  0.12894721],
 