In [16]:
'''
Comparing single layer MLP with deep MLP (using TensorFlow)
'''

import numpy as np
import pickle
from scipy.optimize import minimize
from scipy.io import loadmat
from math import sqrt
import time
# Do not change this
def initializeWeights(n_in,n_out):
    """
    # initializeWeights return the random weights for Neural Network given the
    # number of node in the input layer and output layer

    # Input:
    # n_in: number of nodes of the input layer
    # n_out: number of nodes of the output layer
                            
    # Output: 
    # W: matrix of random initial weights with size (n_out x (n_in + 1))"""
    epsilon = sqrt(6) / sqrt(n_in + n_out + 1);
    W = (np.random.rand(n_out, n_in + 1)*2* epsilon) - epsilon;
    return W



# Replace this with your sigmoid implementation
def sigmoid(z):
    return  1.0 / (1.0 + np.exp(-z))
def onehotvector(training_label,n_class):
    new_label=np.zeros((training_label.shape[0],n_class),dtype=np.int)
    for i in range(training_label.shape[0]):
        for index in range(n_class):
            if(index==int(training_label[i])):
            
             new_label[i][index]=1
    return new_label
# Replace this with your nnObjFunction implementation
def nnObjFunction(params, *args):
    n_input, n_hidden, n_class, training_data, training_label, lambdaval = args

    w1 = params[0:n_hidden * (n_input + 1)].reshape((n_hidden, (n_input + 1)))
    w2 = params[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))
    obj_val = 0
    obj_grad = np.array([])
    n=(training_data.shape[0])
    b = np.ones((n, 1))

    X = np.concatenate((training_data,b), axis = 1)

    
    A=np.dot(w1,X.T)
    
    Z=sigmoid(A)
    Z=Z.T
    Z=np.array(Z)
    
    #n=np.shape(t)[0]
    b1 = np.ones((len(Z), 1))

    Z = np.concatenate((Z,b1), axis = 1)

#     for i in range(len(Z)):
#         Z[i].append(1)
#     Z=Z
        
    B=np.dot(w2,Z.T)
    
    O=sigmoid(B)
    #print(O[2])
    Y=onehotvector(training_label,n_class)
    Y=Y.T
    #print(Y.shape)
    Error_Function=Y*np.log(O) + (1 - Y)*np.log(1 - O) 
    EF=-(1/n)*(np.sum(Error_Function[:,:]))
    delta_output=O-Y
    w2err=np.dot(delta_output,Z)
    #print(w2err)
    delta_hidden=np.dot(w2.T,delta_output)*((Z.T)*(1-Z.T))

    #print(delta_output[1:9])
  #  w2err=np.dot(delta_output,Z)
  #  delta_hidden=np.dot(w2.T, delta_output)*(Z.T*(1 - Z.T))
    w1err=np.dot(delta_hidden,X)
    w1err=w1err[:-1,:]
    Regularization=(lambdaval/(2*n))*(np.sum(w1**2)+np.sum(w2**2))
    obj_val=EF+Regularization
   # print(obj_val)
   
    grad_w1=(lambdaval*w1+w1err)/n
    grad_w2=(lambdaval*w2+w2err)/n
    obj_grad = np.concatenate((grad_w1.flatten(), grad_w2.flatten()),0)
    #print(obj_val)

    
    
    # Your code here
    #
    #
    #
    #
    #
    
    

    # Make sure you reshape the gradient matrices to a 1D array. for instance if your gradient matrices are grad_w1 and grad_w2
    # you would use code similar to the one below to create a flat array
    # obj_grad = np.concatenate((grad_w1.flatten(), grad_w2.flatten()),0)
    

    return (obj_val, obj_grad)
    
# Replace this with your nnPredict implementation
def nnPredict(w1,w2,data):
    labels = np.array([])
    # Your code here
    n=(data.shape[0])
    b = np.ones((n, 1))
    X = np.concatenate((data,b), axis = 1)
    
    A=np.dot(w1,X.T)
    
    Z=sigmoid(A)
    Z=Z.T
    b1 = np.ones((len(Z), 1))

    Z = np.concatenate((Z,b1), axis = 1)

    B=np.dot(w2,Z.T)
    
    O=sigmoid(B)
    labels=(np.argmax(O,0))
    return labels
# Do not change this
def preprocess():
    pickle_obj = pickle.load(file=open('face_all.pickle', 'rb'))
    features = pickle_obj['Features']
    labels = pickle_obj['Labels']
    train_x = features[0:21100] / 255
    valid_x = features[21100:23765] / 255
    test_x = features[23765:] / 255

    labels = labels[0]
    train_y = labels[0:21100]
    valid_y = labels[21100:23765]
    test_y = labels[23765:]
    return train_x, train_y, valid_x, valid_y, test_x, test_y

ts=time.time()
"""**************Neural Network Script Starts here********************************"""
train_data, train_label, validation_data, validation_label, test_data, test_label = preprocess()
#  Train Neural Network
# set the number of nodes in input unit (not including bias unit)
n_input = train_data.shape[1]
# set the number of nodes in hidden unit (not including bias unit)
n_hidden = 256
# set the number of nodes in output unit
n_class = 2

# initialize the weights into some random matrices
initial_w1 = initializeWeights(n_input, n_hidden);
initial_w2 = initializeWeights(n_hidden, n_class);
# unroll 2 weight matrices into single column vector
initialWeights = np.concatenate((initial_w1.flatten(), initial_w2.flatten()),0)
# set the regularization hyper-parameter
lambdaval = 10;
args = (n_input, n_hidden, n_class, train_data, train_label, lambdaval)

#Train Neural Network using fmin_cg or minimize from scipy,optimize module. Check documentation for a working example
opts = {'maxiter' :50}    # Preferred value.

nn_params = minimize(nnObjFunction, initialWeights, jac=True, args=args,method='CG', options=opts)
params = nn_params.get('x')
#Reshape nnParams from 1D vector into w1 and w2 matrices
w1 = params[0:n_hidden * (n_input + 1)].reshape( (n_hidden, (n_input + 1)))
w2 = params[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))

#Test the computed parameters
predicted_label = nnPredict(w1,w2,train_data)
#find the accuracy on Training Dataset
print('\n Training set Accuracy:' + str(100*np.mean((predicted_label == train_label).astype(float))) + '%')
predicted_label = nnPredict(w1,w2,validation_data)
#find the accuracy on Validation Dataset
print('\n Validation set Accuracy:' + str(100*np.mean((predicted_label == validation_label).astype(float))) + '%')
predicted_label = nnPredict(w1,w2,test_data)
#find the accuracy on Validation Dataset
print('\n Test set Accuracy:' +  str(100*np.mean((predicted_label == test_label).astype(float))) + '%')

te=time.time()
print("Time required",te-ts)


 Training set Accuracy:83.7630331754%

 Validation set Accuracy:82.4390243902%

 Test set Accuracy:83.9515518547%
Time required 110.70666193962097
