In [1]:
'''
    Comparing single layer MLP with deep MLP (using TensorFlow)
'''

import numpy as np
from scipy.optimize import minimize
from scipy.io import loadmat
from scipy.stats import logistic
from math import sqrt
import time
import pickle

In [2]:
# Do not change this
def initializeWeights(n_in,n_out):
    """
    # initializeWeights return the random weights for Neural Network given the
    # number of node in the input layer and output layer

    # Input:
    # n_in: number of nodes of the input layer
    # n_out: number of nodes of the output layer
                            
    # Output: 
    # W: matrix of random initial weights with size (n_out x (n_in + 1))"""
    epsilon = sqrt(6) / sqrt(n_in + n_out + 1);
    W = (np.random.rand(n_out, n_in + 1)*2* epsilon) - epsilon;
    return W

In [3]:
def sigmoid(z):
    return (1.0 / (1.0 + np.exp(-z)))

In [4]:
def nnObjFunction(params, *args):
    
    n_input, n_hidden, n_class, training_data, training_label, lambdaval = args

    w1 = params[0:n_hidden * (n_input + 1)].reshape((n_hidden, (n_input + 1)))
    w2 = params[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))
    
    obj_val = 0
    n = training_data.shape[0]
    ''' 
                                Step 01: Feedforward Propagation 
    '''
    
    '''Input Layer --> Hidden Layer
    '''
    # Adding bias node to every training data. Here, the bias value is 1 for every training data
    # A training data is a feature vector X. 
    # We have 717 features for every training data

    biases1 = np.full((n,1), 1)
    training_data_bias = np.concatenate((biases1, training_data),axis=1)
    
    # aj is the linear combination of input data and weight (w1) at jth hidden node. 
    # Here, 1 <= j <= no_of_hidden_units
    aj = np.dot( training_data_bias, np.transpose(w1))
    
    # zj is the output from the hidden unit j after applying sigmoid as an activation function
    zj = sigmoid(aj)
    
    '''Hidden Layer --> Output Layer
    '''
    
    # Adding bias node to every zj. 
    
    m = zj.shape[0]
    
    biases2 = np.full((m,1), 1)
    zj_bias = np.concatenate((biases2, zj), axis=1)
    
    # bl is the linear combination of hidden units output and weight(w2) at lth output node. 
    # Here, l = 10 as we are classifying 10 digits
    bl = np.dot(zj_bias, np.transpose(w2))
    ol = sigmoid(bl)
    
    ''' 
                            Step 2:  Error Calculation by error function
    '''
    # yl --> Ground truth for every training dataset
    yl = np.full((n, n_class), 0)

    for i in range(n):
        trueLabel = training_label[i]
        yl[i][trueLabel] = 1
    
    yl_prime = (1.0-yl)
    ol_prime = (1.0-ol)
    
    lol = np.log(ol)
    lol_prime = np.log(ol_prime)
    
    # Our Error function is "negative log-likelihood"
    # We need elementwise multiplication between the matrices
    
    error = np.sum( np.multiply(yl,lol) + np.multiply(yl_prime,lol_prime) )/((-1)*n)

#     error = -np.sum( np.sum(yl*lol + yl_prime*lol_prime, 1))/ n
    
    ''' 
                         Step 03: Gradient Calculation for Backpropagation of error
    '''
    
    delta = ol- yl
    gradient_w2 = np.dot(delta.T, zj_bias)
   
    temp = np.dot(delta,w2) * ( zj_bias * (1.0-zj_bias))
    
    gradient_w1 = np.dot( np.transpose(temp), training_data_bias)
    gradient_w1 = gradient_w1[1:, :]
    
    ''' 
                                Step 04: Regularization 
    '''
    regularization =  lambdaval * (np.sum(w1**2) + np.sum(w2**2)) / (2*n)
    obj_val = error + regularization
    
    gradient_w1_reg = (gradient_w1 + lambdaval * w1)/n
    gradient_w2_reg = (gradient_w2 + lambdaval * w2)/n

    obj_grad = np.concatenate((gradient_w1_reg.flatten(), gradient_w2_reg.flatten()), 0)

    return (obj_val, obj_grad)

In [5]:
def nnPredict(w1, w2, training_data):

    n = training_data.shape[0]

    biases1 = np.full((n,1),1)
    training_data = np.concatenate((biases1, training_data), axis=1)

    aj = np.dot(training_data, w1.T)
    zj = sigmoid(aj)
    
    m = zj.shape[0]
    
    biases2 = np.full((m,1), 1)
    zj = np.concatenate((biases2, zj), axis=1)

    bl = np.dot(zj, w2.T)
    ol = sigmoid(bl)

    labels = np.argmax(ol, axis=1)

    return labels

In [6]:

# Do not change this
def preprocess():
    pickle_obj = pickle.load(file=open('face_all.pickle', 'rb'))
    features = pickle_obj['Features']
    labels = pickle_obj['Labels']
    train_x = features[0:21100] / 255
    valid_x = features[21100:23765] / 255
    test_x = features[23765:] / 255

    labels = labels[0]
    train_y = labels[0:21100]
    valid_y = labels[21100:23765]
    test_y = labels[23765:]
    return train_x, train_y, valid_x, valid_y, test_x, test_y

"""**************Neural Network Script Starts here********************************"""
train_data, train_label, validation_data, validation_label, test_data, test_label = preprocess()
#  Train Neural Network

trainingStart = time.time()
# set the number of nodes in input unit (not including bias unit)
n_input = train_data.shape[1]
# set the number of nodes in hidden unit (not including bias unit)
n_hidden = 256
# set the number of nodes in output unit
n_class = 2

# initialize the weights into some random matrices
initial_w1 = initializeWeights(n_input, n_hidden);
initial_w2 = initializeWeights(n_hidden, n_class);
# unroll 2 weight matrices into single column vector
initialWeights = np.concatenate((initial_w1.flatten(), initial_w2.flatten()),0)
# set the regularization hyper-parameter
lambdaval = 10;
args = (n_input, n_hidden, n_class, train_data, train_label, lambdaval)

#Train Neural Network using fmin_cg or minimize from scipy,optimize module. Check documentation for a working example
opts = {'maxiter' :50}    # Preferred value.

nn_params = minimize(nnObjFunction, initialWeights, jac=True, args=args,method='CG', options=opts)
params = nn_params.get('x')
#Reshape nnParams from 1D vector into w1 and w2 matrices
w1 = params[0:n_hidden * (n_input + 1)].reshape( (n_hidden, (n_input + 1)))
w2 = params[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))

#Test the computed parameters
predicted_label = nnPredict(w1,w2,train_data)
#find the accuracy on Training Dataset
print('\n Training set Accuracy:' + str(100*np.mean((predicted_label == train_label).astype(float))) + '%')
predicted_label = nnPredict(w1,w2,validation_data)
#find the accuracy on Validation Dataset
print('\n Validation set Accuracy:' + str(100*np.mean((predicted_label == validation_label).astype(float))) + '%')
predicted_label = nnPredict(w1,w2,test_data)
#find the accuracy on Validation Dataset
print('\n Test set Accuracy:' +  str(100*np.mean((predicted_label == test_label).astype(float))) + '%')
trainingEnd = time.time()

print('Training Time:',(trainingEnd-trainingStart))


 Training set Accuracy:85.77251184834124%

 Validation set Accuracy:84.765478424015%

 Test set Accuracy:86.26040878122635%
Training Time: 48.06817364692688
