# Neural Network
Neural Networks are one of the most used tools in machine learning. Basically, what we are doing is putting together many perceptrons into layers for a more complex system. Here, I will be using an input layer (64 nodes) -> a hidden layer (5 nodes) -> output layer (10 nodes)

In [39]:
import pandas as pd
import numpy as np
import numpy.linalg as lin
import statistics as stat
import matplotlib.pyplot as plt
from sklearn import datasets

In [92]:
digits = datasets.load_digits()
x = digits.data

# Adding a bias term
temp = []
for i in range(len(x)):
    temp.append(x[i])
    temp[i] = np.append(temp[i], 1)

x = np.array(temp)
x.shape

(1797, 65)

In [93]:
x[0]

array([ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.,  0.,  0., 13., 15., 10.,
       15.,  5.,  0.,  0.,  3., 15.,  2.,  0., 11.,  8.,  0.,  0.,  4.,
       12.,  0.,  0.,  8.,  8.,  0.,  0.,  5.,  8.,  0.,  0.,  9.,  8.,
        0.,  0.,  4., 11.,  0.,  1., 12.,  7.,  0.,  0.,  2., 14.,  5.,
       10., 12.,  0.,  0.,  0.,  0.,  6., 13., 10.,  0.,  0.,  0.,  1.])

In [94]:
# We can see that since each row is an 8x8 picture, it has 64 columns in it
# Each row is an image
pd.DataFrame(x).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,64
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,1.0
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,1.0
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,1.0


In [54]:
y = digits.target
y.shape

(1797,)

In [7]:
y[range(20)]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [331]:
def σ(z):
    """
    Sigmoid Function that takes in a number, z, and outputs
    the sigmoid function evaluated at z, which is in the
    interval [0, 1]
    """
    return (1 + np.exp(-z))**(-1)

def loss(y, y_hat):
    """
    Loss function that takes in two arrays of equal length,
    y and y_hat, and calculates the sum of squares loss between
    the two
    """
    return sum((y - y_hat)**2)

    
def gradient_w2(node_output, outputs, targets, w2, α):
    """
    Function to calculate the gradient for any one of the set of
    weights between the hidden layer and one output node
    INPUTS:
        - node_output, the output of the node in the hidden layer
                    we are working with
        - outputs, an array of length 10 that contains all the 
                calculated outputs of the output layer
        - targets, an array of length 10 that contains the target
                    values of the output layer
        - w2, the array of weights that go from the hidden layer
            to one of the output nodes
        - α, the learning rate
    OUTPUTS:
        - w2_new, an array of new weights 
    """
    coeff = 0
    for p in range(10):
        coeff += (outputs[p] - targets[p]) * outputs[p] * (1 - outputs[p])
    coeff *= 2
    coeff *= node_output
    
    w2_new = w2 - (α * coeff) * w2
    return w2_new
    
def gradient_w1(w1, x, node_output, outputs, targets, w2j, α):
    """
    Function to calculate the gradient for any one of the set of
    weights between the input layer and one hidden layer node
    INPUTS:
        - w1, the given array of length 64 + 1 (data + bias) that was 
            fed through the input layer
        - x, the array of input data we are using
        - node_output, the output of the node in the hidden layer
                    we are working with, a float value
        - outputs, an array of length 10 that contains all the 
                calculated outputs of the output layer
        - targets, an array of length 10 that contains the target
                    values of the output layer
        - w2j, the weight that goes from the second layer
                (node j, same as node_output), to output i
        - α, the learning rate with which to scale the change in weights
        
    OUTPUTS:
        - w1_new, an array of new weights 
    """
    total = 0
    for i in range(10):
        total += (outputs[i] - targets[i]) * (outputs[i]) * (1 - outputs[i]) * w2j[i]
        
    coeff = []
    for i in range(65):
        coeff.append(2 * node_output * (1 - node_output) * x[i])
        
     
    w1_new = w1 - (α * np.array(coeff))
    return w1_new

def feed_forward(x, w1, w2):
    """
    Function to feed forward the array, x, and give outputs
    INPUTS:
        - x, an array of length 64, the data to be passed through
        - w1, the matrix holding all the weights from the input
            layer to the hidden layer
        - w2, the matrix holding all the weights from the hidden layer
            to the output layer
    OUTPUTS:
        - outputs, an array of length 10
        - hidden_layer, an array of length 5
    """
    hidden_layer = []
    outputs = []
    
    for i in range(5):
        hidden_layer.append(σ(np.dot(w1[:,i], x)))
    hidden_layer.append(1)
        
    for i in range(10):
        z = np.dot(w2[:,i], np.array(hidden_layer))
        outputs.append(σ(z))
        
    return {'o':outputs, 'hl':hidden_layer}

def NeuralNet(x, y, weights1, weights2, num_iterations=100, α=0.25):
    """
    Function for optimizing the weights for the 3-layer Neural Network
    INPUTS:
        - x, the input data matrix with size m x n, where each row 
            is an observations
        - y, an array of length m with all the correct classifications
            of the data in x
        - weights1, a matrix of size 65 x 6, where each column is a set of weights
            corresponding to one of the hidden layer nodes
        - weights2, a matrix of size 6 x 10, where each column is a set of weights
            corresponding from the hidden layer to one of the output nodes
        - num_iterations, an integer, the maximum number of iterations
                        the algorithm will perform before stopping
        - α, the learning rate for the algorithm
    OUTPUTS:
        - new_w1, the optimized weights for the first layer
        - new_w2, the optimized weights for the second layer
    """
    m = len(x)
    n = len(x[0])
    error = 1
    j = 0
    
    w1 = weights1
    w2 = weights2
    while (j <= num_iterations):
        j+=1
        
        # Feed Forward
        for i in range(n):
            obs = x[i]
            target = y[i]
            
            ff = feed_forward(x[i], w1, w2)
            outputs = ff['o']
            hidden_layer = ff['hl']
            guess = np.argmax(np.array(outputs))
            
            # Back Propagation
            expected_y = np.zeros(10)
            expected_y[target] = 1
            
            if (guess != y[i]):
                for k in range(6):
                    w2[k, :] = gradient_w2(hidden_layer[k], outputs, expected_y, w2[k, :], α)
        
                for k in range(6):
                    w1[:,k] = gradient_w1(w1[:,k], obs, hidden_layer[k], outputs, expected_y, w2[k,:], α)
            
    
    return w2[0], w1[0]

In [332]:
# Weights between the input layer and layer 2
weights1 = np.random.random((64+1)*6).reshape(65,6)

# Weights between layer 2 and the output layer
weights2 = np.random.random((5+1)*10).reshape(6, 10)
weights2[0,:]
# This would represent all of the weights from the first node in the 
# hidden layer to the output layer

array([0.55202673, 0.74199731, 0.10711091, 0.22232707, 0.74926398,
       0.24726266, 0.22070115, 0.54455869, 0.77839006, 0.37996151])

In [333]:
NeuralNet(x, y, weights1, weights2, num_iterations=100, α=.5)

(array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0.30088233, 0.87383575, 0.6725451 , 0.12934434, 0.74214659,
        0.51093607]))

In [193]:
j = np.random.randint(0, 1798)

print(y[j])
np.argmax(feed_forward(x[j], w1, w2)['o'])

5


0

In [198]:
np.zeros(1)

array([0.])

In [71]:
x[0]

array([ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.,  0.,  0., 13., 15., 10.,
       15.,  5.,  0.,  0.,  3., 15.,  2.,  0., 11.,  8.,  0.,  0.,  4.,
       12.,  0.,  0.,  8.,  8.,  0.,  0.,  5.,  8.,  0.,  0.,  9.,  8.,
        0.,  0.,  4., 11.,  0.,  1., 12.,  7.,  0.,  0.,  2., 14.,  5.,
       10., 12.,  0.,  0.,  0.,  0.,  6., 13., 10.,  0.,  0.,  0.])