In [99]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split 
import utils
# tells matplotlib to embed plots within the notebook
%matplotlib inline

# Clasification of adults that have an income larger than 50K

# Using a Neural Network

### Loading Processed Data

In [100]:
data_x = pd.read_csv('adult_processed_x.csv')

In [101]:
data_y= pd.read_csv('adult_processed_y.csv')

In [102]:
X= data_x.to_numpy()
print(X.shape)
print(X.dtype)

(32561, 88)
float64


In [103]:
y = data_y.to_numpy()
print(y.shape)
print(y.dtype)

(32561, 1)
int64


## Checking if data is biased 

The data is biased 
Only 25% of the data erans more than 50K 

In [155]:
large_income=y[y==1].size
print('Total number of samples with income above 50K:\t%d' % large_income)
print('Total number of samples :\t\t\t%d' % y.size)

Total number of samples with income above 50K:	7841
Total number of samples :			32561


## Splitting Data

In [105]:
N = len(X)
N_train = int(0.5*N)      # The model  parameters for the network are adjusted using this set
N_val = int(0.25*N) # Use to tune parameters in the model. And avoid overfitting to the trainning set.  
N_test = N-N_train-N_val

# set random seed:
np.random.seed(0) 

# create a random permutation for splitting into training, validation and test
randperm = np.random.permutation(N)

# split into training and test
train_idx = randperm[:N_train]
val_idx = randperm[N_train:(N_train+N_val)]
test_idx = randperm[(N_train+N_val):]

Xtrain,Xval, Xtest = X[train_idx, :],X[val_idx, :], X[test_idx, :]
ytrain,yval, ytest = y[train_idx], y[val_idx] , y[test_idx]

print('Total number of samples:\t%d' % N)
print('Number of training samples:\t%d' % N_train)
print('Number of validation samples:\t%d' % N_val)
print('Number of test samples:\t%d' % N_test)
print(Xtrain.shape)
print(Xval.shape)
print(Xtest.shape)

Total number of samples:	32561
Number of training samples:	16280
Number of validation samples:	8140
Number of test samples:	8141
(16280, 88)
(8140, 88)
(8141, 88)


## Model representation

Our neural network is shown in the following figure.

It has 3 layers - an input layer, a hidden layer and an output layer.
- Input layer has 88 layer units.
- Hidden layer 88 layer units.
- Output layer has 2 layer units. 


In [106]:
input_layer_size  = Xtrain.shape[1]  
hidden_layer_size = input_layer_size   
num_labels = 1          # where 1 means (income => 50k)

## Random Initialization of parameters

In [107]:
# ====================== MY CODE HERE ASSIG.4 ======================

def randInitializeWeights(L_in, L_out, epsilon_init=0.12):
    # Assignment 4
    W = np.zeros((L_out, 1 + L_in))
    # ====================== MY CODE HERE ======================
    W = np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init
    # ============================================================
    return W

In [108]:
# ====================== MY CODE HERE ASSIG.4 ======================

initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)
# Unroll parameters
initial_nn_params = np.concatenate([initial_Theta1.ravel(), initial_Theta2.ravel()], axis=0)

## Feedforward and cost function

In [143]:
# ====================== MY CODE HERE ASSIG.4 ======================
def matrix_of_y(y,num_labels):
    n = y.shape[0]
    y_v = np.zeros([n,num_labels])
    if(y_v.shape[1]==1):
        return y
    else:
        for r in range (n):
            y_v[r,y[r]] = 1
        return  y_v  

In [144]:
# ====================== MY CODE HERE ASSIG.3 ======================
def sigmoid(z):
    z = np.array(z)
    g = np.reciprocal((np.exp(z*-1))+1)
    
    return g

In [145]:
# ====================== MY CODE HERE ASSIG.4 ======================
def sigmoidGradient(z):
    
    g = np.zeros(z.shape)
    g_t = sigmoid(z)
    g= g_t*(1-g_t)
    
    return g

In [150]:
# ====================== MY CODE HERE ASSIG.4 ADAPTED======================
def nnCostFunction(nn_params,
                   input_layer_size,
                   hidden_layer_size,
                   num_labels,
                   X, y, lambda_=0.0):
    
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, (input_layer_size + 1)))

    Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                        (num_labels, (hidden_layer_size + 1)))

    m = y.size
    J = 0
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    
    # ====================== MY CODE HERE ASSIG.4======================
    #Calculate hypothesis 
    a1 = np.concatenate([np.ones((m, 1)), X], axis=1) # add x0 = 1
    z2 = np.matmul(Theta1,a1.T)
    a2 = sigmoid(z2)
    a2 = np.concatenate([np.ones((1, a2.shape[1])), a2], axis=0)
    z3 = np.matmul(Theta2,(a2))
    a3 = sigmoid(z3)
    hyp = a3.T
    y_v = matrix_of_y(y,num_labels)

    #Calculate cost function. 
    J = (-1 / m) * np.sum((np.log(hyp) * y_v) + np.log(1 - hyp) * (1 - y_v)) 

    #Calculate regularized cost function.
    temp_theta1 = Theta1[:,1:]
    temp_theta2 = Theta2[:,1:]

    J_reg_factor = (lambda_/(2*m))*(np.sum(np.square(temp_theta1))+np.sum(np.square(temp_theta2)))
    J_reg = J+J_reg_factor
    
    delta_3 = hyp - y_v
    delta_2 = delta_3.dot(Theta2)[:, 1:] * sigmoidGradient(a1.dot(Theta1.T))

    Delta1 = delta_2.T.dot(a1)
    Delta2 = delta_3.T.dot(a2.T)
    
    # Add regularization to gradient
    Theta1_grad = (1 / m) * Delta1
    Theta1_grad[:, 1:] = Theta1_grad[:, 1:] + (lambda_ / m) * Theta1[:, 1:]
    
    Theta2_grad = (1 / m) * Delta2
    Theta2_grad[:, 1:] = Theta2_grad[:, 1:] + (lambda_ / m) * Theta2[:, 1:]
      
    grad = np.concatenate([Theta1_grad.ravel(), Theta2_grad.ravel()])
    
    return J_reg, grad

### Test Gradient 

In [151]:
lambda_=1
utils.checkNNGradients(nnCostFunction, lambda_)


[[-0.00927825 -0.00927825]
 [-0.00559136 -0.00559136]
 [-0.02017486 -0.02017486]
 [-0.00585433 -0.00585433]
 [ 0.00889912  0.00889912]
 [ 0.01315402  0.01315402]
 [-0.01049831 -0.01049831]
 [-0.01910997 -0.01910997]
 [-0.00836011 -0.00836011]
 [ 0.01976123  0.01976123]
 [ 0.00811587  0.00811587]
 [-0.01515689 -0.01515689]
 [ 0.00762814  0.00762814]
 [ 0.00827936  0.00827936]
 [ 0.02014747  0.02014747]
 [ 0.00315079  0.00315079]
 [-0.00674798 -0.00674798]
 [-0.0109273  -0.0109273 ]
 [ 0.01262954  0.01262954]
 [ 0.01809234  0.01809234]
 [ 0.31454497  0.31454497]
 [ 0.14895477  0.14895477]
 [ 0.17770766  0.17770766]
 [ 0.14745891  0.14745891]
 [ 0.15953087  0.15953087]
 [ 0.14381027  0.14381027]
 [ 0.11105659  0.11105659]
 [ 0.03839516  0.03839516]
 [ 0.0775739   0.0775739 ]
 [ 0.03592373  0.03592373]
 [ 0.07350885  0.07350885]
 [ 0.03392626  0.03392626]
 [ 0.0974007   0.0974007 ]
 [ 0.04486928  0.04486928]
 [ 0.05899539  0.05899539]
 [ 0.03843063  0.03843063]
 [ 0.06015138  0.06015138]
 

## Learning parameters using `scipy.optimize.minimize`