In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.io #Used to load the OCTAVE *.mat files
import scipy.misc #Used to show matrix as an image
import matplotlib.cm as cm #Used to display images in a specific colormap
import random #To pick random images to display
import math
from scipy.special import expit #Vectorized sigmoid function
import array

In [2]:
dataFile = "data/ex4data1.mat"
data = scipy.io.loadmat(dataFile)

X = data['X']
y = data['y']

X = np.insert(X, 0, 1, axis = 1)


In [3]:
# taken from ex3
def reshapeRow(row):
    """
    @param {row} 1 x 401 matrix since an image of a digit is 20x20 + 1 that was added as a bias
    Function that takes in the pixel intensity values and puts it into a 20x20 square 
    """ 
    # the [1:] is used to take everything after the 1st index 
    
    return row[1:].reshape(20,20).T

def displayData(indiciesToDisplay = None):
    """
    Function that selects 100 random examples for the 5000 we have and organizes
    them into a 10x10 matrix
    """
    width = 20
    height = 20
    numRows = 10
    numCols = 10
    
    if not indiciesToDisplay:
        indiciesToDisplay = random.sample(range(X.shape[0]), numRows * numCols)

    
    bigPicture = np.zeros((height * numRows, width * numCols))
    
    iRow = 0
    iCol = 0

    for i in indiciesToDisplay :
        if iCol == numCols:
            iCol = 0
            iRow += 1
 
        curImg = reshapeRow(X[i])
        bigPicture[iRow * height :iRow * height + curImg.shape[0], 
                    iCol * width : iCol * width + curImg.shape[1]] = curImg
        iCol += 1 
    fig = plt.figure( figsize = (6,6) )
    img = scipy.misc.toimage( bigPicture )
    plt.imshow(img,cmap = cm.Greys_r)

#displayData()

In [4]:
thetaFile = "data/ex4weights.mat"
thetas = scipy.io.loadmat(thetaFile)

# Theta1.shape = 25 x 401
Theta1 = thetas['Theta1']

# Theta2.shape = 10 x 26
Theta2 = thetas['Theta2']

thetaUnrolled = np.r_[Theta1.ravel(), Theta2.ravel()]

inputSize = 400
hiddenSize = 25
outputSize = 10


# m = 5000
m = X.shape[0]
# n = 401
n = X.shape[1]

aVals = [None] * 3
zVals = [None] * 2

In [58]:
def sigmoid(X, theta):
    return expit(np.dot(X,theta))

def costFunction(thetas, X, y, lmbda):
    
    theta1 = thetas[0:(hiddenSize*(inputSize+1))].reshape(hiddenSize,(inputSize+1))
    theta2 = thetas[(hiddenSize*(inputSize+1)):].reshape(10,(hiddenSize+1))
    
    m = X.shape[0]

    a1 = X # 5000 x 401
    z2 = theta1.dot(X.T)
    
    a2 = expit(z2) # 25 x 5000
    a2 = np.insert(a2,0,1,axis=0)
    print(a2.shape)
       
    z3 = theta2.dot(a2)
    a3 = expit(z3)
    
    aVals[0] = a1
    aVals[1] = a2
    aVals[2] = a3
    
    zVals[0] = z2
    zVals[1] = z3
     
    tempY = pd.get_dummies(y.ravel()).values
    
    J = (-1/m)*np.sum(np.log(a3.T)*(tempY)+np.log(1-a3).T*(1-tempY)) + \
    (lmbda/(2*m))*(np.sum(np.square(theta1[:,1:])) + np.sum(np.square(theta2[:,1:])))  
    
    return J

In [59]:
def testCost():
    thetas = [Theta1,Theta2]
    J = costFunction(thetaUnrolled,X,y,1)
    print("Expected value is ~ 0.383\nActual value %.9f"%J)
testCost()

(26, 5000)
Expected value is ~ 0.383
Actual value 0.383769859


In [19]:
def gradientSigmoid(z):
    return expit(z)*(1-expit(z))

def initRandomThetas():
    epsilon = 0.12
    theta1Size = (hiddenSize, inputSize + 1)
    theta2Size = (outputSize, hiddenSize + 1)
    
    
    theta1 = np.random.rand(*theta1Size).ravel()*2*epsilon - epsilon
    theta2 = np.random.rand(*theta2Size).ravel()*2*epsilon - epsilon
    
   # thetas = np.r_((np.random.rand(*theta1).ravel()*2*epsilon - epsilon, \
   #                 np.random.rand(*theta2).ravel()*2*epsilon - epsilon))
    
    thetas = np.r_[Theta1.ravel(), Theta2.ravel()]
    return thetas

temp = initRandomThetas()


In [66]:
def backProp(thetas,X,y, lmbda):
    m = X.shape[0]
    
    theta1 = thetas[0:(hiddenSize*(inputSize+1))].reshape(hiddenSize,(inputSize+1))
    theta2 = thetas[(hiddenSize*(inputSize+1)):].reshape(10,(hiddenSize+1))
    
    a1 = aVals[0]
    a2 = aVals[1]
    a3 = aVals[2]
    
    z2 = zVals[0]
    z3 = zVals[1]
    tempY = pd.get_dummies(y.ravel()).values 
    
    print(tempY[3599,:])

    D1 = np.zeros((theta1.shape))
    D2 = np.zeros((theta2.shape))
    for i in range(m):
        a1t = a1[i,:]
        a2t = a2[1,:]
        a3t = a3[i,:]
        yT = tempY[i,:]
        delta3 = a3t.T - yT
        delta2 = theta2[:,1:].T.dot(delta3.T)*gradientSigmoid(a1t*theta1)
        D1 = D1 + delta2[:,1:].dot(a1t)
        D2 = D2 + delta3.T.dot(a2T.T)

        
        
    
    #delta3 = a3.T - tempY
    #delta2 = theta2[:,1:].T.dot(delta3.T)*gradientSigmoid(z2)
    # dont need delta1
    
    #D1 = delta2.dot(a1)
    #D2 = delta3.T.dot(a2.T)

    print(D1.shape)
    print(D2.shape)
    
    
    return D1,D2
       

def printShape(a,name):
    print("%s.shape is (%d,%d)" %(name,a.shape[0],a.shape[1]))

thetas = np.r_[Theta1.ravel(), Theta2.ravel()]
D1,D2 = backProp(thetas,X,y,0)


[0 0 0 0 0 0 0 0 0 1]


ValueError: operands could not be broadcast together with shapes (5000,) (10,) 

In [54]:
import scipy.optimize
def trainNN(lmbda):
    generatedThetas = initRandomThetas()


    result = scipy.optimize.fmin_cg(costFunction, fprime=backProp, x0=generatedThetas, \
                               args=(X,y,lmbda),maxiter=50,disp=True,full_output=True)
    return reshapeParams(result[0])

trainThetas = trainNN(0)

ValueError: operands could not be broadcast together with shapes (26,5000) (25,5000) 