In [1]:
import numpy as np
import time
from sklearn.preprocessing import OneHotEncoder
from scipy.io import loadmat
from scipy.optimize import minimize

In [89]:
def load_data(filename):
    try:
        return loadmat(filename)
    except TypeError:
        print("Not a valid filename argument: " + filename)

In [90]:
def sigmoid(x):
    return 1/(1 + np.exp((-x)))

In [91]:
def sigmoid_gradient(x):
    return np.multiply(x, (1 - x))

In [92]:
def forward_prop(X, theta_list):

    m = X.shape[0]
    a_list = []
    z_list = []
    
    a_list.append(np.insert(X, 0, values=np.ones(m), axis=1))
   
    idx = 0
    for idx, thera in enumerate(theta_list):
        z_list.append(a_list[idx] * (theta_list[idx].T))
        if idx != (len(theta_list)-1):
            a_list.append(np.insert(sigmoid(z_list[idx]), 0, values=np.ones(m), axis=1))
        else:
            a_list.append(sigmoid(z_list[idx]))

    #print("<<<< a list >>>>")
    #for a in a_list:
    #    print(a.shape)
    
    #print("<<<< z list >>>>")
    #for z in z_list:
    #    print(z.shape)
   

    return a_list, z_list

In [93]:
def back_prop(params, input_size, hidden_layers, num_labels, X, y, learning_rate, regularize = False):

    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)
    
    theta_list = []
    startCount = 0
    idx = 0
    for idx, val in enumerate(hidden_layers):
        if idx == 0:
            startCount = val * (input_size + 1)
            theta_list.append(np.matrix(np.reshape(params[:startCount], (val, (input_size + 1)))))
        if idx != 0:
            tempCount = startCount
            startCount += (val * (hidden_layers[idx-1] + 1))
            theta_list.append(np.matrix(np.reshape(params[tempCount:startCount], (val, (hidden_layers[idx-1] + 1)))))
        if idx == (len(hidden_layers)-1):
            theta_list.append(np.matrix(np.reshape(params[startCount:], (num_labels, (val + 1)))))
    #for theta in theta_list:
    #    print(theta.shape)

    a_list, z_list= forward_prop(X, theta_list)
        
    J = cost(X, y, a_list[len(a_list)-1], theta_list, learning_rate, regularize)
    
    #print("***** a,z values ********")
    #for aa in a_list:
        #print(aa.shape)
        
    #for zz in z_list:
        #print(zz.shape)
    #print("***** a,z values end ********")
    
    d_list = []
    d_list.append(a_list[len(a_list)-1] - y)
    
   
    #print(len(a_list))
    
    idx = 0
    while idx < (len(theta_list)-1):
        #print(idx)
        theta_idx = len(theta_list) - 1 - idx
        a_list_idx = len(a_list) - 2 - idx
        _theta = theta_list[theta_idx]
        modified = np.insert(_theta, 0, values=np.ones(_theta.shape[1]), axis=0)
        if idx == 0:
            modified = _theta
        #print(d_list[idx].shape)
        #print(modified.shape)
        #print(sigmoid_gradient(a_list[a_list_idx]).shape)
        d_list.append(np.multiply(d_list[idx] * modified, sigmoid_gradient(a_list[a_list_idx])))
        idx += 1
    
    idx = 1
    while idx < (len(d_list)):
        d_list[idx] = d_list[idx][:,1:]
        idx += 1
        
    #print("***** d values ********")
    #for dd in d_list:
    #    print(dd.shape)        
        
    
    
    delta_list = []
    for theta in theta_list:
        delta_list.append(np.zeros(theta.shape))
        

    #print(len(d_list))
    for idx, delta in enumerate(delta_list):
        #print(idx) 
        #print((d_list[len(d_list) - 1 -idx].T).shape)
        #print(a_list[idx].shape)
        delta_list[idx] = delta_list[idx] + ((d_list[len(d_list) - 1 -idx].T) * a_list[idx])
        delta[idx] = delta[idx] / m


   
    if regularize:
        for idx, delta in enumerate(delta_list):
            delta_list[idx][:, 1:] = delta_list[idx][:, 1:] + (theta_list[idx][:, 1:] * learning_rate) / m

    grad_list = np.ravel(delta_list[0])
    #print("***** delta values ********")
    idx = 1
    while idx < (len(delta_list)):
        grad_list = np.concatenate((grad_list, np.ravel(delta_list[idx])), axis=None)
        #print(grad_list.shape)
        idx += 1

    return J, grad_list

In [94]:
def cost(X, y, h, theta_list, learning_rate, regularize=False):

    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)

    J = (np.multiply(-y, np.log(h)) - np.multiply((1 - y), np.log(1 - h))).sum() / m

        
    if regularize:
        regularization_value = 0.0
        for theta in theta_list:
            regularization_value += np.sum(np.power(theta[:, 1:], 2))
        J += (float(learning_rate) / (2 * m)) * regularization_value
        

    return J

In [105]:
def run_net2():

    input_size = 400
    hidden_layers = [25,25]
    num_labels = 10
    learning_rate = 1

    data = load_data('data/ex3data1.mat')
    X = data['X']  
    y = data['y']  

    print(X.shape, y.shape)

    encoder = OneHotEncoder(sparse=False)
    y_encoded = encoder.fit_transform(y)
    
    print(y_encoded.shape)
    
    total_param_count = 0
    for idx, val in enumerate(hidden_layers):
        if idx == 0:
            total_param_count += val * (input_size + 1)
        if idx != 0:
            total_param_count += val * (hidden_layers[idx-1] + 1)
        if idx == (len(hidden_layers)-1):
            total_param_count += num_labels * (val + 1)
    

    params = (np.random.random(size=total_param_count) - 0.5) * 0.25
    print(params.shape)
    print("Running the backpropagation algorithm...")
    start_time = time.time()

    fmin = minimize(fun=back_prop, x0=params, args=(input_size, hidden_layers, num_labels, X, y_encoded, learning_rate),
                    method='TNC', jac=True, options={'maxiter': 250})

    #back_prop(params, input_size, hidden_layers, num_labels, X, y_encoded, learning_rate)

    print(fmin.x.shape)
    print("Result: ", fmin)
    
    final_theta_list = []
    startCount = 0
    idx = 0
    for idx, val in enumerate(hidden_layers):
        if idx == 0:
            startCount = val * (input_size + 1)
            final_theta_list.append(np.matrix(np.reshape(fmin.x[:startCount], (val, (input_size + 1)))))
        if idx != 0:
            tempCount = startCount
            startCount += (val * (hidden_layers[idx-1] + 1))
            final_theta_list.append(np.matrix(np.reshape(fmin.x[tempCount:startCount], (val, (hidden_layers[idx-1] + 1)))))
        if idx == (len(hidden_layers)-1):
            final_theta_list.append(np.matrix(np.reshape(fmin.x[startCount:], (num_labels, (val + 1)))))
            
    
    a_list, z_list = forward_prop(X, final_theta_list)
    y_pred = np.array(np.argmax(z_list[len(z_list)-1], axis=1) + 1)
    
    correct = [1 if a == b else 0 for (a, b) in zip(y_pred, y)]
    accuracy = (sum(map(int, correct)) / float(len(correct)))

    print('accuracy = {0}%'.format(accuracy * 100))


In [106]:
np.random.seed(3)
run_net2()

(5000, 400) (5000, 1)
(5000, 10)
(10935,)
Running the backpropagation algorithm...
(10935,)
Result:       fun: 0.2561707289781057
     jac: array([-1.01455681,  0.        ,  0.        , ..., -3.95359931,
       -5.7992127 , -0.30985449])
 message: 'Max. number of function evaluations reached'
    nfev: 251
     nit: 19
  status: 3
 success: False
       x: array([ 0.48905795,  0.05203696, -0.05227382, ..., -1.91674159,
        0.83845213, -0.33507214])
accuracy = 96.64%
