In [1]:
# used for manipulating directory paths
import os

# Scientific and vector computation for python
import numpy as np

# Plotting library
from matplotlib import pyplot

# Optimization module in scipy
from scipy import optimize

# will be used to load MATLAB mat datafile format
from scipy.io import loadmat

# library written for this exercise providing additional functions for assignment submission, and others
import utils

# define the submission/grader object for this exercise
grader = utils.Grader()

# tells matplotlib to embed plots within the notebook
%matplotlib inline

In [2]:
#  training data stored in arrays X, y
data = loadmat(os.path.join('Data', 'ex4data1.mat'))
X, y = data['X'], data['y'].ravel()

# set the zero digit to 0, rather than its mapped 10 in this dataset
# This is an artifact due to the fact that this dataset was used in 
# MATLAB where there is no index 0
y[y == 10] = 0

# Number of training examples
m = y.size


In [5]:
# Setup the parameters you will use for this exercise
input_layer_size  = 400  # 20x20 Input Images of Digits
hidden_layer_size = 25   # 25 hidden units
num_labels = 10          # 10 labels, from 0 to 9

# Load the weights into variables Theta1 and Theta2
weights = loadmat(os.path.join('Data', 'ex4weights.mat'))

# Theta1 has size 25 x 401
# Theta2 has size 10 x 26
Theta1, Theta2 = weights['Theta1'], weights['Theta2']

# swap first and last columns of Theta2, due to legacy from MATLAB indexing, 
# since the weight file ex3weights.mat was saved based on MATLAB indexing
Theta2 = np.roll(Theta2, 1, axis=0)

# Unroll parameters 
nn_params = np.concatenate([Theta1.ravel(), Theta2.ravel()])


# DATA SUMMARY 

In [30]:
print('Number of samples =', m)
print('Size of X =', X.shape)
print('Hidden layer size =', hidden_layer_size)
print('The size of Theta1 =', Theta1.shape)
print('The size of Theta2 =', Theta2.shape)

temp_y = np.zeros((m, 10))
for i, num in enumerate(y):
    temp_y[i][num] = 1
print('Size of encoded y =', temp_y.shape)
    
rand_indices = np.random.choice(m, 100, replace=False)
for i in range (10):
    print (temp_y[rand_indices[i]], '~~ y =', y[rand_indices[i]])

Number of samples = 5000
Size of X = (5000, 400)
Hidden layer size = 25
The size of Theta1 = (25, 401)
The size of Theta2 = (10, 26)
Size of encoded y = (5000, 10)
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] ~~ y = 1
[0. 0. 0. 0. 0. 0. 1. 0. 0. 0.] ~~ y = 6
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] ~~ y = 1
[0. 0. 0. 0. 0. 0. 1. 0. 0. 0.] ~~ y = 6
[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.] ~~ y = 8
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] ~~ y = 0
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.] ~~ y = 5
[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.] ~~ y = 3
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.] ~~ y = 4
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.] ~~ y = 4


In [48]:
def nnCostFunction(nn_params,
                   input_layer_size,
                   hidden_layer_size,
                   num_labels,
                   X, y, lambda_=0.0):
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, (input_layer_size + 1)))

    Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                        (num_labels, (hidden_layer_size + 1)))

    # Setup some useful variables
    m = y.size
         
    # You need to return the following variables correctly 
    J = 0
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)
    
    X = np.concatenate([np.ones((m,1)), X], axis = 1)
    z_2 = X.dot(Theta1.T)
    a_2 = utils.sigmoid(z_2)
    a_2 = np.concatenate([np.ones((m ,1)), a_2], axis = 1)
    h_X = (utils.sigmoid(a_2.dot(Theta2.T)))
    
#     for i in range (m):
#         for k in range (10):
#             J = J + (- temp_y[i][k] * np.log(h_X[i][k]) - (1 - temp_y[i][k]) * np.log(1 - h_X[i][k]))
#     J = J/m
    for i in range (m):
        J = J + (-temp_y[i].dot(np.log(h_X[i].T)) - (1 - temp_y[i]).dot(np.log(1- h_X[i].T)))
    J  = J/m + lambda_ * (np.sum(Theta1[:,1:]**2) + np.sum(Theta2[:, 1:]**2)) / (2 * m)
    
    print(h_X.shape)
    return J

In [49]:
lambda_ = 0
J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lambda_ = 1)

print('Cost at parameters (loaded from ex4weights): %.6f ' % J)
print('The cost should be about                   : 0.287629.')

(5000, 10)
Cost at parameters (loaded from ex4weights): 0.383770 
The cost should be about                   : 0.287629.


In [51]:
a = np.array([[1,3,4], [3,5,6], [2,9,6]])
print(1-a)
a * (1 - a)

[[ 0 -2 -3]
 [-2 -4 -5]
 [-1 -8 -5]]


array([[  0,  -6, -12],
       [ -6, -20, -30],
       [ -2, -72, -30]])