In [34]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.io as sio#Used to load the OCTAVE *.mat files
import scipy.misc #Used to show matrix as an image
import matplotlib.cm as cm #Used to display images in a specific colormap
import random #To pick random images to display
import scipy.optimize #fmin_cg to train neural network
import itertools
from scipy.special import expit #Vectorized sigmoid function

#You have been provided with a set of network parameters (Θ(1),Θ(2)) 
#already trained by us. These are stored in ex4weights.mat

datafile = './machine-learning-ex4/ex4/ex4data1.mat'
mat = scipy.io.loadmat( datafile )
X, y = mat['X'], mat['y']
#Insert a column of 1's to X as usual
X = np.insert(X,0,1,axis=1)

mat = sio.loadmat('./machine-learning-ex4/ex4/ex4weights.mat')
Theta1, Theta2 = mat['Theta1'], mat['Theta2']
# The matrices Theta1 and Theta2 will now be in your workspace
# Theta1 has size 25 x 401
# Theta2 has size 10 x 26

# These are some global variables I'm suing to ensure the sizes# These 
# of various matrices are correct
#these are NOT including bias nits
input_layer_size = 400
hidden_layer_size = 25
output_layer_size = 10 
n_training_samples = X.shape[0]

def flattenParams(thetas_list):
    """
    Hand this function a list of theta matrices, and it will flatten it
    into one long (n,1) shaped numpy array
    """
    flattened_list = [ mytheta.flatten() for mytheta in thetas_list ]
    combined = list(itertools.chain.from_iterable(flattened_list))
    assert len(combined) == (input_layer_size+1)*hidden_layer_size + \
                            (hidden_layer_size+1)*output_layer_size
    return np.array(combined).reshape((len(combined),1))

def reshapeParams(flattened_array):
    theta1 = flattened_array[:(input_layer_size+1)*hidden_layer_size] \
            .reshape((hidden_layer_size,input_layer_size+1))
    theta2 = flattened_array[(input_layer_size+1)*hidden_layer_size:] \
            .reshape((output_layer_size,hidden_layer_size+1))
    
    return [ theta1, theta2 ]

def flattenX(myX):
    return np.array(myX.flatten()).reshape((n_training_samples*(input_layer_size+1),1))

def reshapeX(flattenedX):
    return np.array(flattenedX).reshape((n_training_samples,input_layer_size+1))

def computeCost(mythetas_flattened,myX_flattened,myy,mylambda=0.):
    """
    This function takes in:
        1) a flattened vector of theta parameters (each theta would go from one
           NN layer to the next), the thetas include the bias unit.
        2) the flattened training set matrix X, which contains the bias unit first column
        3) the label vector y, which has one column
    It loops over training points (recommended by the professor, as the linear
    algebra version is "quite complicated") and:
        1) constructs a new "y" vector, with 10 rows and 1 column, 
            with one non-zero entry corresponding to that iteration
        2) computes the cost given that y- vector and that training point
        3) accumulates all of the costs
        4) computes a regularization term (after the loop over training points)
    """
    
    # First unroll the parameters
    mythetas = reshapeParams(mythetas_flattened)
    
    # Now unroll X
    myX = reshapeX(myX_flattened)
    
    #This is what will accumulate the total cost
    total_cost = 0.
    
    m = n_training_samples

    # Loop over the training points (rows in myX, already contain bias unit)
    for irow in range(m):
        myrow = myX[irow]
                
        # First compute the hypothesis (this is a (10,1) vector
        # of the hypothesis for each possible y-value)
        # propagateForward returns (zs, activations) for each layer
        # so propagateforward[-1][1] means "activation for -1st (last) layer"
        myhs = propagateForward(myrow,mythetas)[-1][1]
        
        # Construct a 10x1 "y" vector with all zeros and only one "1" entry
        # note here if the hand-written digit is "0", then that corresponds
        # to a y- vector with 1 in the 10th spot (different from what the
        # homework suggests)
        tmpy  = np.zeros((10,1))
        tmpy[myy[irow]-1] = 1
        
        # Compute the cost for this point and y-vector
        mycost = -tmpy.T.dot(np.log(myhs))-(1-tmpy.T).dot(np.log(1-myhs))
        
        # Accumulate the total cost
        total_cost += mycost
  
    print(total_cost)
    # Normalize the total_cost, cast as float
    total_cost = float(total_cost) / m
    
    # Compute the regularization term
    total_reg = 0.
    for mytheta in mythetas:
        total_reg += np.sum(mytheta*mytheta) #element-wise multiplication
    total_reg *= float(mylambda)/(2*m)
        
    return total_cost + total_reg
       

def propagateForward(row,Thetas):
    """
    Function that given a list of Thetas (NOT flattened), propagates the
    row of features forwards, assuming the features ALREADY
    include the bias unit in the input layer, and the 
    Thetas also include the bias unit

    The output is a vector with element [0] for the hidden layer,
    and element [1] for the output layer
        -- Each element is a tuple of (zs, as)
        -- where "zs" and "as" have shape (# of units in that layer, 1)
    
    ***The 'activations' are the same as "h", but this works for many layers
    (hence a vector of thetas, not just one theta)
    Also, "h" is vectorized to do all rows at once...
    this function takes in one row at a time***
    """
   
    features = row
    zs_as_per_layer = []
    for i in range(len(Thetas)):  
        Theta = Thetas[i]
        #Theta is (25,401), features are (401, 1)
        #so "z" comes out to be (25, 1)
        #this is one "z" value for each unit in the hidden layer
        #not counting the bias unit
        z = Theta.dot(features).reshape((Theta.shape[0],1))
        a = expit(z)

        zs_as_per_layer.append( (z, a) )
        if i == len(Thetas)-1:
            return np.array(zs_as_per_layer)
        a = np.insert(a,0,1) #Add the bias unit
        features = a

#Once you are done, using the loaded set of parameters Theta1 and Theta2,
#you should see that the cost is about 0.287629
myThetas = [ Theta1, Theta2 ]

#Note I flatten the thetas vector before handing it to the computeCost routine,
#as per the input format of the computeCost function.
#It does the unrolling/reshaping itself
#I also flatten the X vector, similarly
print(computeCost(flattenParams(myThetas),flattenX(X),y, mylambda=1))

[[-9.09112254e+00]
 [-6.35313563e+00]
 [-5.98073448e+00]
 [-1.09029843e+01]
 [-4.67102813e+00]
 [-5.52328692e+00]
 [-5.20025185e+00]
 [-7.82038247e+00]
 [-5.03892319e+00]
 [-4.27511330e-03]]
[[-7.64375402e+00]
 [-6.02607273e+00]
 [-5.67008946e+00]
 [-1.01126881e+01]
 [-5.03071694e+00]
 [-6.34283832e+00]
 [-4.45857489e+00]
 [-6.03601412e+00]
 [-6.22959440e+00]
 [-4.31235344e-03]]
[[ -9.33171475]
 [ -5.73135904]
 [ -3.66743192]
 [-10.75388855]
 [ -5.52920897]
 [ -4.57676654]
 [ -7.85750182]
 [ -2.7759667 ]
 [ -5.20336443]
 [ -0.0747145 ]]
[[-9.79488532e+00]
 [-4.82216771e+00]
 [-4.02688377e+00]
 [-9.35520546e+00]
 [-7.35044936e+00]
 [-4.46846063e+00]
 [-6.29159834e+00]
 [-5.36169458e+00]
 [-7.10460415e+00]
 [-5.91337578e-03]]
[[-9.15412555e+00]
 [-6.49242861e+00]
 [-5.57120236e+00]
 [-9.29689997e+00]
 [-3.78850106e+00]
 [-7.03436105e+00]
 [-5.35263490e+00]
 [-6.54555573e+00]
 [-6.16787746e+00]
 [-7.26415937e-03]]
[[-9.72234141e+00]
 [-5.83092615e+00]
 [-5.54354364e+00]
 [-1.05751256e+01]

[[-0.04117306]
 [-5.41551644]
 [-5.3806486 ]
 [-6.5478781 ]
 [-7.28477537]
 [-5.47579855]
 [-7.74333357]
 [-3.03704996]
 [-6.96155471]
 [-7.56435208]]
[[ -0.01652808]
 [ -5.9293564 ]
 [ -4.95932106]
 [ -7.58049933]
 [ -6.68563488]
 [ -4.86341224]
 [ -5.1989528 ]
 [ -4.61971132]
 [ -4.60521537]
 [-11.3524375 ]]
[[ -0.02164857]
 [ -4.85927068]
 [ -4.09028698]
 [ -8.80890886]
 [ -5.2224612 ]
 [ -5.2000294 ]
 [ -4.52288934]
 [ -6.35050569]
 [ -4.99973381]
 [-11.21604779]]
[[ -0.05479378]
 [ -4.9487581 ]
 [ -4.05427488]
 [ -7.81270878]
 [ -5.41090123]
 [ -5.47105554]
 [ -3.55266992]
 [ -6.29971394]
 [ -4.87626126]
 [-10.74907248]]
[[-0.26369313]
 [-5.64191019]
 [-4.12055422]
 [-8.07981151]
 [-8.41803669]
 [-6.42590507]
 [-3.07216087]
 [-2.15910136]
 [-5.35017076]
 [-9.45817057]]
[[ -0.0209443 ]
 [ -5.15130973]
 [ -4.42509359]
 [ -6.67792506]
 [ -6.1941885 ]
 [ -4.52834582]
 [ -5.94183172]
 [ -5.07922271]
 [ -4.77975049]
 [-11.85445491]]
[[-0.51101502]
 [-6.75816733]
 [-4.77127741]
 [-7.6234

[[-6.79003105e+00]
 [-4.25910092e-03]
 [-5.43953039e+00]
 [-4.34439320e+00]
 [-7.10412689e+00]
 [-6.08023643e+00]
 [-1.02632808e+01]
 [-5.13311798e+00]
 [-6.64295723e+00]
 [-6.41642782e+00]]
[[-7.56574023e+00]
 [-3.59680897e-03]
 [-5.28549260e+00]
 [-8.07269116e+00]
 [-7.08290785e+00]
 [-7.39757051e+00]
 [-8.74390764e+00]
 [-5.85161997e+00]
 [-4.45368452e+00]
 [-5.17920138e+00]]
[[-4.85705778e+00]
 [-7.74500368e-03]
 [-5.29191019e+00]
 [-9.83438599e+00]
 [-8.09847520e+00]
 [-7.56615643e+00]
 [-5.41612688e+00]
 [-5.59223759e+00]
 [-7.96749719e+00]
 [-8.74190814e+00]]
[[-7.03106827e+00]
 [-7.71426237e-04]
 [-4.83311972e+00]
 [-6.64637579e+00]
 [-7.48003862e+00]
 [-6.92108885e+00]
 [-9.76186838e+00]
 [-7.03965846e+00]
 [-7.49915946e+00]
 [-7.13806982e+00]]
[[ -7.13836378]
 [ -0.21068834]
 [ -2.96866782]
 [-14.00087097]
 [ -6.43733188]
 [ -7.03796745]
 [ -2.27910177]
 [ -9.67461616]
 [-10.08145147]
 [ -3.17103349]]
[[-7.37604063e+00]
 [-1.30100743e-03]
 [-4.33190198e+00]
 [-6.08074773e+00]

[[-7.82859538e+00]
 [-8.48331422e+00]
 [-1.08179107e-02]
 [-5.22697627e+00]
 [-4.18247380e+00]
 [-1.27251553e+01]
 [-6.51978525e+00]
 [-3.81844065e+00]
 [-6.90760321e+00]
 [-7.79129181e+00]]
[[ -6.47124683]
 [ -6.06389206]
 [ -0.21609013]
 [-11.68846589]
 [ -2.34044891]
 [ -5.73722622]
 [ -7.01151153]
 [ -6.79480675]
 [ -5.57442362]
 [ -5.30696319]]
[[-2.55195512]
 [-3.73665241]
 [-0.0428632 ]
 [-9.52209595]
 [-4.66112482]
 [-9.64377743]
 [-7.29702909]
 [-4.79511275]
 [-7.93743794]
 [-6.13785385]]
[[ -5.58852302]
 [ -7.88109851]
 [ -0.04461291]
 [ -8.20728364]
 [ -2.90062003]
 [-11.53688808]
 [ -5.52563664]
 [ -3.72279016]
 [ -4.84770285]
 [ -6.92762644]]
[[-3.81887411e+00]
 [-2.81838895e+00]
 [-1.21794249e-02]
 [-1.31067624e+01]
 [-3.48549556e+00]
 [-7.95998412e+00]
 [-6.24010840e+00]
 [-1.05987446e+01]
 [-6.82375162e+00]
 [-4.52940856e+00]]
[[-5.81528599]
 [-3.15177103]
 [-0.22115503]
 [-8.84942234]
 [-3.351009  ]
 [-8.36466241]
 [-9.73676739]
 [-3.25163815]
 [-5.66898319]
 [-7.48288

 [-4.80777958]]
[[ -8.64449183]
 [-13.08016273]
 [ -2.30072654]
 [ -7.21011484]
 [ -0.03622052]
 [ -8.57460123]
 [ -8.40650508]
 [ -4.38257961]
 [ -4.56564215]
 [ -4.74056972]]
[[ -8.00402446]
 [-11.77395406]
 [ -3.99429491]
 [ -6.11522007]
 [ -0.14485412]
 [ -7.53152282]
 [ -8.40172423]
 [ -3.46182154]
 [ -5.65866675]
 [ -3.77039606]]
[[ -7.33379458]
 [ -8.87289627]
 [ -6.54476593]
 [ -4.88398867]
 [ -0.04667006]
 [ -3.9070072 ]
 [-11.95788279]
 [ -3.22817846]
 [ -3.62001966]
 [ -6.44261967]]
[[-5.883081  ]
 [-0.42931528]
 [-5.83554843]
 [-5.85506161]
 [-3.59907811]
 [-9.45877203]
 [-4.10547236]
 [-7.08559107]
 [-4.12736618]
 [-2.94027103]]
[[ -4.75809804]
 [ -9.85836842]
 [ -3.53217039]
 [-11.07762648]
 [ -0.11266831]
 [ -4.30891818]
 [ -9.28405231]
 [ -3.1808432 ]
 [ -3.05817205]
 [ -4.4646781 ]]
[[ -4.68568221]
 [-11.26653287]
 [ -3.68337605]
 [ -3.23822418]
 [ -0.02224424]
 [ -7.23691566]
 [ -7.63388414]
 [ -7.6278999 ]
 [ -5.58081079]
 [ -7.37229286]]
[[ -6.31815174]
 [ -9.857912

[[-5.46989247e+00]
 [-6.43407225e+00]
 [-8.54485505e+00]
 [-4.41310787e+00]
 [-6.19171137e+00]
 [-7.07438268e-03]
 [-8.98034200e+00]
 [-6.63800040e+00]
 [-7.40753783e+00]
 [-6.89086318e+00]]
[[-5.67109464e+00]
 [-5.19121848e+00]
 [-9.43387855e+00]
 [-6.56196801e+00]
 [-5.62579556e+00]
 [-7.22643645e-03]
 [-1.01189372e+01]
 [-6.04465748e+00]
 [-6.16614623e+00]
 [-6.69747813e+00]]
[[ -5.86333437]
 [ -5.40160508]
 [ -7.49970949]
 [ -6.19281951]
 [ -3.61350907]
 [ -0.01558996]
 [-11.26990632]
 [ -4.32336211]
 [ -6.99299885]
 [ -5.2421678 ]]
[[-6.35678845e+00]
 [-5.13278816e+00]
 [-1.01383793e+01]
 [-5.22278819e+00]
 [-6.78696721e+00]
 [-4.36155500e-03]
 [-1.02712876e+01]
 [-6.42537317e+00]
 [-5.68122836e+00]
 [-6.24318074e+00]]
[[-5.55367733]
 [-4.49017254]
 [-7.89786279]
 [-5.4475075 ]
 [-5.7502499 ]
 [-0.01667362]
 [-9.49719743]
 [-4.30766023]
 [-7.0072941 ]
 [-7.26823624]]
[[ -7.07508605]
 [ -4.0185424 ]
 [-10.21113052]
 [ -4.78758351]
 [ -5.52961926]
 [ -0.01228955]
 [ -9.13967444]
 [ 

[[-8.43917808e+00]
 [-5.64202536e+00]
 [-6.08090011e+00]
 [-7.98304521e+00]
 [-8.51173852e+00]
 [-1.09433675e+01]
 [-8.06798252e-03]
 [-8.85898369e+00]
 [-4.47315237e+00]
 [-6.77032424e+00]]
[[ -4.80924301]
 [ -4.07886567]
 [ -4.45315508]
 [ -8.35959079]
 [ -6.86100199]
 [-11.40009299]
 [ -0.04218611]
 [ -4.53531134]
 [ -4.53640308]
 [-10.74767843]]
[[-4.50191807]
 [-8.91788121]
 [-3.93648274]
 [-0.79525245]
 [-7.33835698]
 [-3.81066008]
 [-0.45102997]
 [-4.90936668]
 [-4.22273197]
 [-8.54913073]]
[[-6.74016942e+00]
 [-1.12540983e+01]
 [-5.58814052e+00]
 [-5.46011761e+00]
 [-7.40231962e+00]
 [-6.93350443e+00]
 [-6.32552947e-03]
 [-7.07887840e+00]
 [-4.57978909e+00]
 [-4.14112029e+00]]
[[ -7.23202379]
 [ -1.08346435]
 [ -7.67570918]
 [ -6.26884109]
 [ -7.99046222]
 [-10.75694221]
 [ -0.16986331]
 [ -6.25182055]
 [ -4.15969949]
 [ -6.80625487]]
[[ -7.52827625]
 [ -6.02952805]
 [ -3.83032633]
 [ -7.1265139 ]
 [ -6.67162019]
 [-11.30260776]
 [ -0.01421841]
 [-10.21787062]
 [ -3.83778844]
 

 [-9.31641479]]
[[-7.0054943 ]
 [-8.89999798]
 [-4.49047086]
 [-4.17756728]
 [-5.78829848]
 [-9.43405958]
 [-5.43514957]
 [-4.92170424]
 [-0.02467512]
 [-8.94034068]]
[[-7.60542721]
 [-6.49588209]
 [-8.20044521]
 [-3.89267129]
 [-7.88608029]
 [-5.84116695]
 [-4.36726718]
 [-5.12784971]
 [-0.07518756]
 [-6.58258054]]
[[-8.3497047 ]
 [-6.49429877]
 [-7.92179767]
 [-2.47333708]
 [-8.33055432]
 [-5.01099027]
 [-3.55403398]
 [-4.79322342]
 [-0.0772546 ]
 [-7.53348169]]
[[ -7.68895676]
 [ -6.77456887]
 [ -7.41534402]
 [ -1.29680948]
 [ -4.00871958]
 [ -7.46409684]
 [ -8.02716696]
 [ -4.06150468]
 [ -0.15572075]
 [-10.16944768]]
[[ -7.45245851]
 [-11.66967796]
 [ -1.10045347]
 [ -6.49128544]
 [ -3.47039874]
 [-10.85244218]
 [ -6.29368161]
 [ -3.74198   ]
 [ -0.8211852 ]
 [ -6.27316185]]
[[-7.68892016]
 [-6.93545402]
 [-8.1117087 ]
 [-4.11116998]
 [-6.62714082]
 [-6.3410914 ]
 [-3.91188118]
 [-4.96268294]
 [-0.03275564]
 [-8.5979794 ]]
[[-7.9698716 ]
 [-8.17235925]
 [-7.21479467]
 [-2.71705852