In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.io #Used to load the OCTAVE *.mat files
import scipy.misc #Used to show matrix as an image
import matplotlib.cm as cm #Used to display images in a specific colormap
import random #To pick random images to display
import math
from scipy.special import expit #Vectorized sigmoid function

In [2]:
dataFile = "data/ex4data1.mat"
data = scipy.io.loadmat(dataFile)

X = data['X']
y = data['y']

X = np.insert(X, 0, 1, axis = 1)

In [3]:
# taken from ex3
def reshapeRow(row):
    """
    @param {row} 1 x 401 matrix since an image of a digit is 20x20 + 1 that was added as a bias
    Function that takes in the pixel intensity values and puts it into a 20x20 square 
    """ 
    # the [1:] is used to take everything after the 1st index 
    
    return row[1:].reshape(20,20).T

def displayData(indiciesToDisplay = None):
    """
    Function that selects 100 random examples for the 5000 we have and organizes
    them into a 10x10 matrix
    """
    width = 20
    height = 20
    numRows = 10
    numCols = 10
    
    if not indiciesToDisplay:
        indiciesToDisplay = random.sample(range(X.shape[0]), numRows * numCols)

    
    bigPicture = np.zeros((height * numRows, width * numCols))
    
    iRow = 0
    iCol = 0

    for i in indiciesToDisplay :
        if iCol == numCols:
            iCol = 0
            iRow += 1
 
        curImg = reshapeRow(X[i])
        bigPicture[iRow * height :iRow * height + curImg.shape[0], 
                    iCol * width : iCol * width + curImg.shape[1]] = curImg
        iCol += 1 
    fig = plt.figure( figsize = (6,6) )
    img = scipy.misc.toimage( bigPicture )
    plt.imshow(img,cmap = cm.Greys_r)

#displayData()

In [4]:
thetaFile = "data/ex4weights.mat"
thetas = scipy.io.loadmat(thetaFile)

# Theta1.shape = 25 x 401
Theta1 = thetas['Theta1']

# Theta2.shape = 10 x 26
Theta2 = thetas['Theta2']

inputSize = 400
hiddenSize = 25
outputSize = 10

# n = 401
n = X.shape[1]

In [7]:
def sigmoid(X, theta):
    return expit(np.dot(X,theta))

def costFunction(X, y, thetas, lmbda):
    # m = 5000
    m = X.shape[0]
    
    totalCost = 0
    for i in range(m):
        a = feedForward(X[i], thetas)
        # create output array with 
        tempY = np.zeros((10, 1))
        tempY[y[i]-1] = 1
    
        cost = -(tempY.T.dot(np.log(a)) + (1-tempY.T).dot(np.log(1-a)))
        totalCost += cost

       
    totalCost = totalCost/m
    totalCost = np.sum(totalCost)
    totalRegularized = 0
    thetas = np.array(thetas)
    for i in thetas:
        totalRegularized += np.sum(np.multiply(i,i))
        
    totalRegularized *= float(lmbda)/(2*m)
    
    return totalCost + totalRegularized

# remember that theta is initialized to random values
# returns a1,a2...aL in matrix a
def feedForward(row, thetas):
    nThetas = len(thetas)
    a = np.array(2)
    rVal = []
    for i in range(nThetas):
        curTheta = thetas[i]
        if (i == 0):
            z = curTheta.dot(row)
            a = expit(z)
            a = np.insert(a,0,1)
            a = a.reshape(len(a), 1)
            
            continue
        z = curTheta.dot(a)
        a = expit(z)
        
        # if we're on the last iteration we don't want to add the bias term because
        # the last iteration will be the output
        if (i != nThetas-1):
            a = np.insert(a,0,1)
        
    a = a.reshape(len(a), 1)
    return a

In [13]:
def testCost():
    thetas = [Theta1,Theta2]
    J = costFunction(X,y,thetas,1)
    print("Expected value is ~ 0.383\nActual value %.5f"%J)

In [20]:
def gradientSigmoid(z):
    return expit(z)*(1-expit(z))

def initRandomThetas():
    epsilon = 0.12
    theta1 = (hiddenSize, inputSize + 1)
    theta2 = (outputSize, hiddenSize + 1)
    thetas = [np.random.rand(*theta1)*2*epsilon - epsilon, \
                    np.random.rand(*theta2)*2*epsilon - epsilon]
    return thetas

temp = initRandomThetas()

    