In [4]:
# %load ../../../standard_import.txt
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

# load MATLAB files
from scipy.io import loadmat

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 150)
pd.set_option('display.max_seq_items', None)
 
#%config InlineBackend.figure_formats = {'pdf',}
%matplotlib inline

import seaborn as sns
sns.set_context('notebook')
sns.set_style('darkgrid')

#### Load MATLAB datafiles

In [5]:
data = loadmat('data/ex4data1.mat')
data.keys()

dict_keys(['y', '__header__', '__globals__', '__version__', 'X'])

In [6]:
y = data['y']
# Add intercept
X = np.c_[np.ones((data['X'].shape[0],1)), data['X']]

print('X:',X.shape, '(with intercept)')
print('y:',y.shape)

X: (5000, 401) (with intercept)
y: (5000, 1)


In [7]:
weights = loadmat('data/ex3weights.mat')
weights.keys()

dict_keys(['Theta2', 'Theta1', '__header__', '__globals__', '__version__'])

In [31]:
theta1, theta2 = weights['Theta1'], weights['Theta2']
print('theta1 :', theta1.shape)
print('theta2 :', theta2.shape)
nn_params = np.r_[theta1.ravel(), theta2.ravel()]
print('nn_params :', nn_params.shape)

theta1 : (25, 401)
theta2 : (10, 26)
nn_params : (10285,)


#### Neural Network
Input layer size = 400 (20x20 pixels) <br>
Hidden layer size = 25 <br>
Number of labels = 10

### Neural Networks - Feed Forward and Cost Function

In [8]:
def sigmoid(z):
    return(1 / (1 + np.exp(-z)))

#### Cost Function 
#### $$ J(\theta) = \frac{1}{m}\sum_{i=1}^{m}\sum_{k=1}^{K}\big[-y^{(i)}_{k}\, log\,(( h_\theta\,(x^{(i)}))_k)-(1-y^{(i)}_k)\,log\,(1-h_\theta(x^{(i)}))_k)\big]$$

#### Regularized Cost Function
#### $$ J(\theta) = \frac{1}{m}\sum_{i=1}^{m}\sum_{k=1}^{K}\bigg[-y^{(i)}_{k}\, log\,(( h_\theta\,(x^{(i)}))_k)-(1-y^{(i)}_k)\,log\,(1-h_\theta(x^{(i)}))_k)\bigg] + \frac{\lambda}{2m}\bigg[\sum_{j=1}{}\sum_{k=1}(\Theta_{j,k}^{(1)})^2+\sum_{j=1}{}\sum_{k=1}(\Theta_{j,k}^{(2)})^2\bigg]$$

In [186]:
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, features, classes, L=0):
    
    # When comparing to Octave code note that Python uses zero-indexed arrays.
    # But because Numpy indexing does not include the right side, the code is the same anyway.
    theta1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))].reshape(hidden_layer_size,(input_layer_size+1))
    theta2 = nn_params[(hidden_layer_size*(input_layer_size+1)):].reshape(num_labels,(hidden_layer_size+1))
    
    m = classes.size
    y_matrix = pd.get_dummies(y.ravel()).as_matrix()
    
    a1 = features # 5000x401
    
    z2 = theta1.dot(a1.T) # 25x401 * 401x5000 = 25x5000 
    a2 = np.c_[np.ones((features.shape[0],1)),sigmoid(z2.T)] # 5000x26 
    
    z3 = theta2.dot(a2.T) # 10x26 * 26x5000 = 10x5000 
    a3 = sigmoid(z3) # 10x5000
    
    J = -1*(1/m)*np.sum((np.log(a3.T)*(y_matrix)+np.log(1-a3).T*(1-y_matrix))) + \
        L/(2*m)*(np.sum(np.square(theta1[:,1:])) + np.sum(np.square(theta2[:,1:])))
    
    return(J)

In [187]:
# Regularization parameter = 0
nnCostFunction(nn_params, 400, 25, 10, X, y, 0)

0.28762916516131892

In [188]:
# Regularization parameter = 1
nnCostFunction(nn_params, 400, 25, 10, X, y, 1)

0.38376985909092365