In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.optimize import minimize
from scipy.io import loadmat

In [2]:
data = loadmat('ex4data1.mat')
X = data['X']
y = data['y'].ravel()

m = len(y)

print(f'Training images : {X.shape}')
print(f'Training labels : {y.shape}')

Training images : (5000, 400)
Training labels : (5000,)


In [3]:
input_layer_size = 400
hidden_layer_size = 25
num_labels = 10

weights = loadmat('ex4weights.mat')
Theta1, Theta2 = weights['Theta1'], weights['Theta2']

print(f'Theta1 : {Theta1.shape}')
print(f'Theta2 : {Theta2.shape}')

nn_params = np.concatenate([Theta1.ravel(), Theta2.ravel()])


Theta1 : (25, 401)
Theta2 : (10, 26)


### Compute Cost (Feedforward)

In [4]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [57]:
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda):
    
    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, (input_layer_size + 1)))

    Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                        (num_labels, (hidden_layer_size + 1)))
    
    m = len(y)
    
    A1 = np.append(np.ones((len(X),1)), X, axis=1)
    Z2 = A1.dot(Theta1.T)
    
    A2 = sigmoid(Z2)
    A2 = np.append(np.ones((len(A2),1)), A2, axis=1)
    
    Z3 = A2.dot(Theta2.T)
    A3 = sigmoid(Z3)
    
    J = 0
    
    # convert y into len(y) x num_labels matrix
    new_y = np.zeros((len(y), num_labels))
    
    for i in range(m):
        new_y[i][y[i] - 1] = 1   # new_y would have [0 to 9] instead of y [1 to 10]
    
    inner_sum  =0
    
    for i in range(num_labels):
        inner_sum += - (new_y[:,i].T.dot(np.log(A3[:,i])) + (1 - new_y[:,i]).T.dot(np.log(1 - A3[:,i])))
    
    J = (1/m)  * inner_sum
    
    # cost function with regularization

    J +=  (lmbda/(2 * m)) * (np.sum(np.sum((Theta1[:,1:] ** 2), axis=1)) + np.sum(np.sum((Theta2[:,1:] ** 2), axis=1)))
    
    X = np.append(np.ones((len(X),1)), X, axis=1)
    
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)
    
    for i in range(m):
        a1 = X[i, :].T    # 401x1
        a1 = a1.reshape(len(a1),1)
        # ====== Forward Propagation =======
        
        z2 = Theta1.dot(a1)  # 25x401 * 401x1
        a2 = sigmoid(z2)  # 25x1
        
        a2 = np.append(np.ones((1,a2.shape[1])), a2, axis=0)  # 26x1
        
        z3 = Theta2.dot(a2)  #10x26 * 26x1
        a3 = sigmoid(z3)
        
        # Compute delta
        
        delta3 = a3 - new_y[i, :].T.reshape(len(new_y[i,:]),1)
        delta2 = (Theta2.T.dot(delta3)) * a2 * (1 - a2)
    
        Theta1_grad += delta2[1:,:].dot(a1.T)
        
        Theta2_grad += delta3.dot(a2.T)
        
    Theta1_grad = (1/m) * Theta1_grad
    Theta2_grad *= (1/m)
    
    Theta1_grad[:, 1:] += (lmbda/m) * Theta1_grad[:, 1:]
    Theta2_grad[:, 1:] += (lmbda/m) * Theta2_grad[:, 1:]
    
    
    grad = np.concatenate([Theta1_grad.ravel(), Theta2_grad.ravel()])
        

    return J, grad

#### Cost function without regularization

In [47]:
print('Feedforward Using Neural Networks ... \n')

lmbda = 0

J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda)

print(f'Cost at parameters (loaded from ex4weights): {J} \n(this value should be about 0.287629)\n');

Feedforward Using Neural Networks ... 

Cost at parameters (loaded from ex4weights): 0.287629165161319 
(this value should be about 0.287629)



#### Cost function with regularization

In [48]:
print('\nChecking Cost Function (w/ Regularization) ... \n')

# Weight regularization parameter (we set this to 1 here).
lmbda = 1

J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda)

print(f'Cost at parameters (loaded from ex4weights): {J} \n(this value should be about 0.383770)\n')



Checking Cost Function (w/ Regularization) ... 

Cost at parameters (loaded from ex4weights): 0.38376985909092376 
(this value should be about 0.383770)



#### Sigmoid Gradient

In [49]:
def sigmoidGradient(z):
    return sigmoid(z) * (1 - sigmoid(z))

In [50]:
print('\nEvaluating sigmoid gradient...\n')

g = sigmoidGradient(np.array([[-1, -0.5, 0, 0.5, 1]]));
print('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:\n  ');
print(g);


Evaluating sigmoid gradient...

Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:
  
[[0.19661193 0.23500371 0.25       0.23500371 0.19661193]]


### Initializing Pameters

In [51]:
def randInitializeWeights(L_in, L_out):
    W = np.zeros((L_in, L_out + 1))
    
   # One effective strategy for random initialization is to randomly select values for Θ (l) uniformly in the range [− init ,  init ].
   # You should use  init = 0.12. 2 This range of values ensures that the parameters are kept small and makes the learning more efficient.
    epsilon_init = 0.12
    W = np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init
    return W

In [52]:
print('\nInitializing Neural Network Parameters ...\n')

initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size);
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels);

# Unroll parameters
initial_nn_params = np.concatenate([initial_Theta1.ravel() , initial_Theta2.ravel()]);



Initializing Neural Network Parameters ...



### Implement Backpropagation and Training Neural Network

In [62]:
print('Training Neural Network ... \n')

lmbda = 1

costFunction = lambda p: nnCostFunction(p, input_layer_size,
                                        hidden_layer_size,
                                        num_labels, X, y, lmbda)


res = minimize(costFunction,
                        initial_nn_params,
                        jac=True,
                        method='TNC',
                        options={'maxiter': 100})

Training Neural Network ... 



In [63]:
nn_params = res.x

Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                    (hidden_layer_size, (input_layer_size + 1)))

Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                    (num_labels, (hidden_layer_size + 1)))



### Implement Predict

In [76]:
def predict(Theta1, Theta2, X):
    
    X = np.append(np.ones((len(X),1)), X, axis=1)
    h1 = sigmoid(X.dot(Theta1.T))
    
    h1 = np.append(np.ones((len(h1),1)), h1, axis=1)
    h2 = sigmoid(h1.dot(Theta2.T))
    
    p = np.argmax(h2, axis=1)
    
    return p + 1

In [77]:
p = predict(Theta1, Theta2, X)
print(f'Training set accuracy : {np.mean(p == y.reshape(len(y))) * 100} %')

Training set accuracy : 96.02000000000001 %
