In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.optimize import minimize
from scipy.io import loadmat

In [2]:
data = loadmat('ex4data1.mat')
X = data['X']
y = data['y'].ravel()

m = len(y)

print(f'Training images : {X.shape}')
print(f'Training labels : {y.shape}')

Training images : (5000, 400)
Training labels : (5000,)


In [12]:
input_layer_size = 400
hidden_layer_size = 25
num_labels = 10

weights = loadmat('ex4weights.mat')
Theta1, Theta2 = weights['Theta1'], weights['Theta2']

print(f'Theta1 : {Theta1.shape}')
print(f'Theta2 : {Theta2.shape}')

nn_params = np.concatenate([Theta1.ravel(), Theta2.ravel()])
print(a.shape)

Theta1 : (25, 401)
Theta2 : (10, 26)
(10285,)


### Compute Cost (Feedforward)

In [21]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [52]:
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda):
    
    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, (input_layer_size + 1)))

    Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                        (num_labels, (hidden_layer_size + 1)))
    
    m = len(y)
    
    A1 = np.append(np.ones((len(X),1)), X, axis=1)
    Z2 = A1.dot(Theta1.T)
    
    A2 = sigmoid(Z2)
    A2 = np.append(np.ones((len(A2),1)), A2, axis=1)
    
    Z3 = A2.dot(Theta2.T)
    A3 = sigmoid(Z3)
    
    J = 0
    
    # convert y into len(y) x num_labels matrix
    new_y = np.zeros((len(y), num_labels))
    
    for i in range(m):
        new_y[i][y[i] - 1] = 1   # new_y would have [0 to 9] instead of y [1 to 10]
    
    inner_sum  =0
    
    for i in range(num_labels):
        inner_sum += - (new_y[:,i].T.dot(np.log(A3[:,i])) + (1 - new_y[:,i]).T.dot(np.log(1 - A3[:,i])))
    
    J = (1/m)  * inner_sum
    
    # cost function with regularization

    J +=  (lmbda/(2 * m)) * (np.sum(np.sum((Theta1[:,1:] ** 2), axis=1)) + np.sum(np.sum((Theta2[:,1:] ** 2), axis=1)))

    return J

#### Cost function without regularization

In [53]:
print('Feedforward Using Neural Networks ... \n')

lmbda = 0

J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda)

print(f'Cost at parameters (loaded from ex4weights): {J} \n(this value should be about 0.287629)\n');

Feedforward Using Neural Networks ... 

Cost at parameters (loaded from ex4weights): 0.287629165161319 
(this value should be about 0.287629)



#### Cost function with regularization

In [54]:
print('\nChecking Cost Function (w/ Regularization) ... \n')

# Weight regularization parameter (we set this to 1 here).
lmbda = 1

J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda)

print(f'Cost at parameters (loaded from ex4weights): {J} \n(this value should be about 0.383770)\n')



Checking Cost Function (w/ Regularization) ... 

Cost at parameters (loaded from ex4weights): 0.38376985909092376 
(this value should be about 0.383770)



#### Sigmoid Gradient

In [None]:
def sigmoidGradient(z):
    return sigmoid(z) * (1 - sigmoid(z))

In [None]:
fprintf('\nEvaluating sigmoid gradient...\n')

g = sigmoidGradient(np.array[-1 -0.5 0 0.5 1]);
fprintf('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:\n  ');
fprintf('%f ', g);