# Final Exam Second Semester 2567 - Neural Network (Mobile Price Problem)

This exam problem has an objective to develop a neural network model to classify the mobile price as belonging to one out of four classes from 20 features.

In [1]:
# used for manipulating directory paths
import os

# Scientific and vector computation for python
import numpy as np

# Plotting library
from matplotlib import pyplot

# Optimization module in scipy
from scipy import optimize

# library written for this exam
import utilsNN as utils

# tells matplotlib to embed plots within the notebook
%matplotlib inline

import random 
random.seed(10)

## We start the exam by first loading the dataset. 

In [4]:
# Load training dataset

# Read tab-separated training data
data = np.loadtxt(r'D:\M.eng\Machine learning\final_crisis\data\NN_MobileData_Train.txt')

# Automatically detect number of features
n_features = data.shape[1] - 1  # Total columns - 1 label column

# First 20 columns are features, last column is the label
X, y = data[:, :20], data[:, 20].astype(int)

m = y.size  # Number of training examples

# Load testing dataset

# Read tab-separated testing data
data_test = np.loadtxt(r'D:\M.eng\Machine learning\final_crisis\data\NN_MobileData_Test.txt',)

# Use the same n_features detected earlier
X_test, y_test = data_test[:, :20], data_test[:, 20].astype(int)

# Quick check
print(f"Training set: X shape = {X.shape}, y shape = {y.shape}")
print(f"Testing set: X_test shape = {X_test.shape}, y_test shape = {y_test.shape}")

Training set: X shape = (1600, 20), y shape = (1600,)
Testing set: X_test shape = (400, 20), y_test shape = (400,)


### Initial parameters to be used in optimize.minimize

#### *** Do not initialize parameters by yourself in this exam problem. ***

You have been provided with a set of initialized network parameters ($\Theta^{(1)}, \Theta^{(2)}$). These are stored in `InitMobileWeight1.txt` and `InitMobileWeight2.txt` which will be loaded in the next cell of this notebook into `Theta1` and `Theta2`. The parameters have dimensions that are sized for a neural network with 30 units in the second layer (hidden layer) and 4 output units (corresponding to 4 mobile phone price groups).

In [5]:
# Load initiallized network parameters

Theta1 = np.loadtxt(r'D:\M.eng\Machine learning\final_crisis\data\InitMobileWeight1.txt')
Theta2 = np.loadtxt(r'D:\M.eng\Machine learning\final_crisis\data\InitMobileWeight2.txt')
print('Shape of Theta1 =', Theta1.shape)
print('Shape of Theta2 =', Theta2.shape)

# Unroll parameters 
# To unroll the matrix into vector (1-D array), we use `np.ravel()` 
nn_params = np.concatenate([np.ravel(Theta1), np.ravel(Theta2)])

Shape of Theta1 = (30, 21)
Shape of Theta2 = (4, 31)


In [6]:
initial_nn_params = nn_params

In [7]:
initial_nn_params.shape

(754,)

### Model representation

This neural network has 3 layers - an input layer, a hidden layer and an output layer. 

The inputs are **20** features of mobile phones.

The hidden layer has **30** neurons.

The outputs are **4** mobile phone price groups (0 to 3).

The training data was loaded into the variables `X` and `y` above.

In [8]:
# Setup the parameters you will use for this exam
input_layer_size  = 20
hidden_layer_size = 30
num_labels = 4

<blockquote>Forward Propagation

In [9]:
def sigmoid(z):
    z = np.array(z)
    g = np.zeros(z.shape)
    g = 1 / (1 + np.exp(-z))
    return g

In [10]:
# Test the implementation of sigmoid function here
z = 0
g = sigmoid(z)

print('g(',z,') = ', g)

g( 0 ) =  0.5


In [38]:
#cost function of neural network
def nnCostFunction(nn_params,
                   input_layer_size,
                   hidden_layer_size,
                   num_labels,
                   X, y, lambda_=0.0):
    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, (input_layer_size + 1)))  

    Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):], 
                        (num_labels, (hidden_layer_size + 1)))  

    # Setup some useful variables
    m = y.size
         
    # You need to return the following variables correctly 
    J = 0
    grad = []
    Theta1_grad = np.zeros(Theta1.shape) 
    Theta2_grad = np.zeros(Theta2.shape) 

    """ Feed Forward """
    #let a1 = X and add the bias unit
    a1 = np.concatenate([np.ones((m,1)),X],axis = 1) 
    z2 = np.dot(a1,Theta1.T) 
    a2 = sigmoid(z2)
    a2 = np.concatenate([np.ones((a2.shape[0], 1)), a2],axis=1) 
    z3 = np.dot(a2,Theta2.T) 
    a3 = sigmoid(z3)
    # h = activation unit values of the last output layer
    h = a3 

    """ Cost Function"""
    y_matrix = y
    y_matrix = np.eye(num_labels)[y_matrix] #dimen = 5000 x 10
    logprobs = np.multiply(np.log(h),y_matrix) + np.multiply(np.log(1-h),1-y_matrix)
    J = (-1/m)*np.sum(logprobs)

    """Regularization term"""
    reg_term = (lambda_ / (2 * m)) * (np.sum(np.square(Theta1[:,1:])) \
                                        + np.sum(np.square(Theta2[:,1:])))
    J = J + reg_term
    
    """Back Propagation"""
    delta_3 = h - y_matrix #dimen = 5000 x 10
    delta_2 = np.dot(delta_3, Theta2[:,1:]) * sigmoidGradient(z2) #dimen = 5000 x 25
    Delta1 = np.dot(delta_2.T, a1) #dimen = 25 x 401
    Delta2 = np.dot(delta_3.T, a2) #dimen = 10 x 26
    Theta1_grad = (1/m) * Delta1
    Theta2_grad = (1/m) * Delta2
    grad = np.concatenate([np.ravel(Theta1_grad), np.ravel(Theta2_grad)])
    
    """Regularized to gradient"""
    Theta1_grad[:,1:] = Theta1_grad[:,1:] + (lambda_ /m) * Theta1[:,1:]
    Theta2_grad[:,1:] = Theta2_grad[:,1:] + (lambda_ /m) * Theta2[:,1:]
    
    #update grad with regularization
    grad = np.concatenate([np.ravel(Theta1_grad),np.ravel(Theta2_grad)])
    
    return J, grad


In [15]:
#run sigmoid gradient first
lambda_ = 0
J, _ = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lambda_)
print('Cost at parameters (loaded from NN_weights): %.6f ' % J)

Cost at parameters (loaded from NN_weights): 2.746518 


In [16]:
#run sigmoid gradient first
#ไม่ regularized bias
lambda_ = 1
J, _ = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lambda_)
print('Cost at parameters (loaded from NN_weights): %.6f ' % J)

Cost at parameters (loaded from NN_weights): 2.746661 


<blockquote>Backward Propagation

In [14]:
def sigmoidGradient(z):
    g = np.zeros(z.shape)
    g = sigmoid(z)*(1-sigmoid(z))
    return g

In [17]:
z = np.array([-1, -0.5, 0, 0.5, 1])
g = sigmoidGradient(z)
print('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:\n  ')
print(g)

Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:
  
[0.19661193 0.23500371 0.25       0.23500371 0.19661193]


In [18]:
utils.predict

<function utilsNN.predict(Theta1, Theta2, X)>

In [19]:
#Gradient Checking
utils.checkNNGradients(nnCostFunction)

[[-9.27825235e-03 -9.27825236e-03]
 [-3.04978709e-06 -3.04978914e-06]
 [-1.75060084e-04 -1.75060082e-04]
 [-9.62660640e-05 -9.62660620e-05]
 [ 8.89911959e-03  8.89911960e-03]
 [ 1.42869450e-05  1.42869443e-05]
 [ 2.33146358e-04  2.33146357e-04]
 [ 1.17982666e-04  1.17982666e-04]
 [-8.36010761e-03 -8.36010762e-03]
 [-2.59383093e-05 -2.59383100e-05]
 [-2.87468729e-04 -2.87468729e-04]
 [-1.37149709e-04 -1.37149706e-04]
 [ 7.62813550e-03  7.62813551e-03]
 [ 3.69883257e-05  3.69883234e-05]
 [ 3.35320351e-04  3.35320347e-04]
 [ 1.53247082e-04  1.53247082e-04]
 [-6.74798369e-03 -6.74798370e-03]
 [-4.68759742e-05 -4.68759769e-05]
 [-3.76215583e-04 -3.76215587e-04]
 [-1.66560294e-04 -1.66560294e-04]
 [ 3.14544970e-01  3.14544970e-01]
 [ 1.64090819e-01  1.64090819e-01]
 [ 1.64567932e-01  1.64567932e-01]
 [ 1.58339334e-01  1.58339334e-01]
 [ 1.51127527e-01  1.51127527e-01]
 [ 1.49568335e-01  1.49568335e-01]
 [ 1.11056588e-01  1.11056588e-01]
 [ 5.75736494e-02  5.75736493e-02]
 [ 5.77867378e-02  5

In [43]:
#  Check gradients by running checkNNGradients
lambda_ = 1
utils.checkNNGradients(nnCostFunction, lambda_)

# Also output the costFunction debugging values
debug_J, _  = nnCostFunction(nn_params, input_layer_size,
                          hidden_layer_size, num_labels, X, y, lambda_)

print('\n\nCost at (fixed) debugging parameters (w/ lambda = %f): %f ' % (lambda_, debug_J))

[[-0.00927825 -0.00927825]
 [-0.00559136 -0.00559136]
 [-0.02017486 -0.02017486]
 [-0.00585433 -0.00585433]
 [ 0.00889912  0.00889912]
 [ 0.01315402  0.01315402]
 [-0.01049831 -0.01049831]
 [-0.01910997 -0.01910997]
 [-0.00836011 -0.00836011]
 [ 0.01976123  0.01976123]
 [ 0.00811587  0.00811587]
 [-0.01515689 -0.01515689]
 [ 0.00762814  0.00762814]
 [ 0.00827936  0.00827936]
 [ 0.02014747  0.02014747]
 [ 0.00315079  0.00315079]
 [-0.00674798 -0.00674798]
 [-0.0109273  -0.0109273 ]
 [ 0.01262954  0.01262954]
 [ 0.01809234  0.01809234]
 [ 0.31454497  0.31454497]
 [ 0.14895477  0.14895477]
 [ 0.17770766  0.17770766]
 [ 0.14745891  0.14745891]
 [ 0.15953087  0.15953087]
 [ 0.14381027  0.14381027]
 [ 0.11105659  0.11105659]
 [ 0.03839516  0.03839516]
 [ 0.0775739   0.0775739 ]
 [ 0.03592373  0.03592373]
 [ 0.07350885  0.07350885]
 [ 0.03392626  0.03392626]
 [ 0.0974007   0.0974007 ]
 [ 0.04486928  0.04486928]
 [ 0.05899539  0.05899539]
 [ 0.03843063  0.03843063]
 [ 0.06015138  0.06015138]
 

In [44]:
options= {'maxfun':2000} #adjust maxfun
#  You should also try different values of lambda
lambda_ = 1
costFunction = lambda p: nnCostFunction(p, input_layer_size,
                                        hidden_layer_size,
                                        num_labels, X, y, lambda_)
res = optimize.minimize(costFunction,
                        initial_nn_params,
                        jac=True,
                        method='TNC',
                        options=options)

nn_params = res.x
        
Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                    (hidden_layer_size, (input_layer_size + 1)))

Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                    (num_labels, (hidden_layer_size + 1)))

  logprobs = np.multiply(np.log(h),y_matrix) + np.multiply(np.log(1-h),1-y_matrix)
  logprobs = np.multiply(np.log(h),y_matrix) + np.multiply(np.log(1-h),1-y_matrix)


In [45]:
print('Cost function when lambda = ', lambda_,'is ', res.fun)

Cost function when lambda =  1 is  0.38888254827490243


In [46]:
pred_train = utils.predict(Theta1, Theta2, X)
print('Training Set Accuracy: %f' % (np.mean(pred_train == y) * 100))
pred_test = utils.predict(Theta1, Theta2, X_test)
print('TestingSet Accuracy: %f' % (np.mean(pred_test == y_test) * 100))

Training Set Accuracy: 98.687500
TestingSet Accuracy: 97.000000


Scipy.Optimize module

In [33]:
import scipy.optimize

In [37]:
scipy.optimize??

[1;31mType:[0m        module
[1;31mString form:[0m <module 'scipy.optimize' from 'd:\\M.eng\\Machine learning\\env\\Lib\\site-packages\\scipy\\optimize\\__init__.py'>
[1;31mFile:[0m        d:\m.eng\machine learning\env\lib\site-packages\scipy\optimize\__init__.py
[1;31mSource:[0m     
[1;34m"""
Optimization and root finding (:mod:`scipy.optimize`)

.. currentmodule:: scipy.optimize

.. toctree::
   :hidden:

   optimize.cython_optimize

SciPy ``optimize`` provides functions for minimizing (or maximizing)
objective functions, possibly subject to constraints. It includes
solvers for nonlinear problems (with support for both local and global
optimization algorithms), linear programming, constrained
and nonlinear least-squares, root finding, and curve fitting.

Common functions and objects, shared across different solvers, are:

.. autosummary::
   :toctree: generated/

   show_options - Show specific options optimization solvers.
   OptimizeResult - The optimization result return

In [39]:
options= {'maxfun': 2000}

#  You should also try different values of lambda
lambda_ = 2
costFunction = lambda p: nnCostFunction(p, input_layer_size,
                                        hidden_layer_size,
                                        num_labels, X, y, lambda_)
res = optimize.minimize(costFunction,
                        initial_nn_params,
                        jac=True,
                        method='TNC',
                        options=options)
nn_params = res.x
Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                    (hidden_layer_size, (input_layer_size + 1)))

Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                    (num_labels, (hidden_layer_size + 1)))

In [40]:
print('Cost function when lambda = ', lambda_,'is ', res.fun)

Cost function when lambda =  2 is  0.538077215951699


In [42]:
pred_train = utils.predict(Theta1, Theta2, X)
print('Training Set Accuracy: %f' % (np.mean(pred_train == y) * 100))
pred_test = utils.predict(Theta1, Theta2, X_test)
print('Testing Set Accuracy: %f' % (np.mean(pred_test == y_test) * 100))

Training Set Accuracy: 98.500000
Testing Set Accuracy: 97.250000


In [47]:
options= {'maxfun':2000} #adjust maxfun
#  You should also try different values of lambda
lambda_ = 3
costFunction = lambda p: nnCostFunction(p, input_layer_size,
                                        hidden_layer_size,
                                        num_labels, X, y, lambda_)
res = optimize.minimize(costFunction,
                        initial_nn_params,
                        jac=True,
                        method='TNC',
                        options=options)

nn_params = res.x
        
Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                    (hidden_layer_size, (input_layer_size + 1)))

Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                    (num_labels, (hidden_layer_size + 1)))

In [48]:
print('Cost function when lambda = ', lambda_,'is ', res.fun)

Cost function when lambda =  3 is  0.6673434348890104


In [49]:
pred_train = utils.predict(Theta1, Theta2, X)
print('Training Set Accuracy: %f' % (np.mean(pred_train == y) * 100))
pred_test = utils.predict(Theta1, Theta2, X_test)
print('Training Set Accuracy: %f' % (np.mean(pred_test == y_test) * 100))

Training Set Accuracy: 98.062500
Training Set Accuracy: 97.000000


### End of Neural Network Problem