In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat

import sklearn

%matplotlib inline

In [None]:
data = loadmat('data/ex4data1.mat')
X = data['X']
y = data['y']

thetas = loadmat('data/ex4weights.mat')
theta1 = thetas['Theta1']
theta2 = thetas['Theta2']

thetas = [theta1, theta2]

print("X:", X.shape, '\n', X[:5])
print("y:", y.shape, '\n', y[:5])
print("theta1:", theta1.shape, '\n', theta1[:5])
print("theta2:", theta2.shape, '\n', theta2[:5])

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [None]:
def feedForwardOnce(X, theta):
    X_one = np.matrix(np.insert(X, 0, values=np.ones(len(X)), axis=1))
    theta_matrix = np.matrix(theta)
    
    z = X_one * theta_matrix.T
    a = sigmoid(z)
    
    return a, z

In [None]:
def feedForward(X, thetas):
    X_copy = X.copy()
    for theta in thetas:
        X_copy, _ = feedForwardOnce(X_copy, theta)
    return X_copy

In [None]:
# Tracks all the a's and z's for each node in our NN
def feedForwardTrack(X, thetas):
    a_s = []
    z_s = []

    # Track all the a's and z's for all nodes
    a = X.copy()
    for theta in thetas:
        a, z = feedForwardOnce(a,theta)
        a_s.append(a)
        z_s.append(z)

    return a_s, z_s

In [None]:
def feedForwardPredict(X, thetas):
    X_copy = feedForward(X, thetas)
    return np.argmax(sigmoid(X_copy), axis=1) + 1 # plus one cause of the y's are 1-based

In [None]:
def nnCostFunction(X, y, thetas, lam=0):
    from sklearn.preprocessing import OneHotEncoder
    encoder = OneHotEncoder(sparse=False)
    
    y_onehot = encoder.fit_transform(y)
    y_predicted = feedForward(X, thetas)
    
    first_term = np.multiply(-y_onehot, np.log(y_predicted))
    second_term = np.multiply((1-y_onehot), np.log(1-y_predicted))
    
    reg = 0
    for theta in thetas:
        reg += lam/len(X)/2 * np.sum(np.power(theta,2))
    
    cost = 1/len(X) * np.sum(first_term - second_term) + reg
    
    return cost

In [None]:
print(nnCostFunction(X,y,thetas, 0))
print(nnCostFunction(X,y,thetas, 1))

**2. Backpropagation**

In [None]:
def sigmoidGradient(z):
    return np.multiply(sigmoid(z), (1 - sigmoid(z)))

sigmoidGradient(0)

In [None]:
def randInitializeWeights(shape):
    return np.random.uniform(-0.12, 0.12, size=shape)

In [None]:
def randInitializeLikeThetas(thetas):
    thetas_rand = []
    for theta in thetas:
        thetas_rand.append(randInitializeWeights(theta.shape))
    return np.array(thetas_rand)

In [None]:
def backpropagation(X, y, thetas):
    from sklearn.preprocessing import OneHotEncoder
    encoder = OneHotEncoder(sparse=False)
    y_onehot = encoder.fit_transform(y)
    
    # Feed Forward first
    a_s, z_s = feedForwardTrack(X, thetas)
    
    # Alias the nodes
    a1 = np.insert(X, 0, values=np.ones(len(X)), axis=1)
    a2 = np.insert(a_s[0], 0, values=np.ones(len(a_s[0])), axis=1)
    a3 = a_s[1]
    z2 = z_s[0]
    z3 = z_s[1]
    
    # propagate erros back
    big_delta = randInitializeLikeThetas(thetas)
    
    for i in range(len(X)):
        delta_3_i = (a3[i] - y_onehot[i])
        big_delta[1] = big_delta[1] + delta_3_i.T * a2[i]
        
        z2_ones = np.insert(z2[0], 0, np.ones(1))
        delta_2_i = np.multiply((theta2.T * delta_3_i.T).T, sigmoidGradient(z2_ones))
        big_delta[0] = big_delta[0] + delta_2_i[:,1:].T * a1[0]
        
    D2 = big_delta[1] / len(X)
    D3 = big_delta[0] / len(X)
    
    return np.concatenate((np.ravel(D2), np.ravel(D3)))

In [None]:
backpropagation(X, y, thetas).shape