In [None]:
# Importing the libraries
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import confusion_matrix,precision_score,recall_score,f1_score,roc_auc_score

In [None]:
#load the dataset with 4 variables
BSOM_data=pd.read_csv('BSOM_DataSet_for_HW3.csv',usecols = ['all_mcqs_avg_n20','all_NBME_avg_n4','CBSE_01','CBSE_02','LEVEL'])
#checking for missing values
BSOM_data.isnull().sum()

In [None]:
#removing the rows with missing values
BSOM_data=BSOM_data.dropna(axis=0)
BSOM_data.isnull().sum()

In [None]:
#Applying feature scaling to variables
def Feature_scaling(feat):
    num=feat.shape[0]
    mean_X=np.mean(feat,axis=1)
    range_X=(np.amax(feat, axis=1)-np.amin(feat, axis=1))
    for i in range(1,num):
        xi=feat[i,:]
        xi=(xi-mean_X[i])/range_X[i]
        feat[i,:]=xi
    return feat

In [None]:
#Randomly initialize the parameters
#Note: bias thetas are initialized separately
#Takes the list of number of nodes in each layer as its elements, as input and returns the number of layers and 
#   randomly initialised parameters in a dictionary
def initialize_params(nodes_list):
    np.random.seed(1)
    e=0.0001
    num_layers=len(nodes_list)
    thetasandbias={}
    for i in range(1,num_layers):
        thetasandbias['theta_L'+str(i)]= (np.random.randn(nodes_list[i],nodes_list[i-1]))*e
        thetasandbias['bias_L'+str(i)]=(np.random.randn(nodes_list[i],1))*e
    return thetasandbias,num_layers

In [None]:
#sigmoid activation function
def sigmoid(z):
    a = 1/(1 + np.exp(-z))
    return a

In [None]:
#derivative of sigmoid function (used in back propagation)
def d_sigmoid(a):
    d_s = a*(1-a)
    return d_s

In [None]:
#Forward propagation 
#returns the activation functions of all the layers in a dictionary
def forward_prop(X,thetas,L):
    activations={}
    activations['a_L1']=X
    for i in range(1,L):
        z=np.dot(thetas['theta_L'+str(i)],activations['a_L'+str(i)])+thetas['bias_L'+str(i)]
        a=sigmoid(z)
        activations['a_L'+str(i+1)]=a
    h=activations['a_L'+str(L)]
    return h,activations

In [None]:
#Calculate the cost function
def calc_cost(ypred,yactual):
    m=yactual.shape[1]
    cost=(-1/m)*(np.sum(np.sum((np.multiply(yactual,np.log(ypred))+np.multiply((1-yactual),np.log(1-ypred))),axis=0)))
    return cost

In [None]:
#backward propagation
#returns the partial derivatives of thetas of all the layers(both for non bias ans bias separately) in a dictionary
def backward_prop(thetas,X,activation,y_actual,L):
    m=X.shape[1]
    deltas={}
    d_thetas={}
    delta_L=activation['a_L'+str(L)]-y_actual
    deltas['delta_L'+str(L)]=delta_L
    for i in range(L,2,-1):
        d_theta_Lprev=(1/m)*np.dot(deltas['delta_L'+str(i)],activation['a_L'+str(i-1)].T)
        d_bias_Lprev=(1/m)*np.sum(deltas['delta_L'+str(i)],axis=1,keepdims=True)
        deltas['delta_L'+str(i-1)]=np.multiply(np.dot(thetas['theta_L'+str(i-1)].T,deltas['delta_L'+str(i)]),d_sigmoid(activation['a_L'+str(i-1)]))
        d_thetas['d_theta_L'+str(i-1)]=d_theta_Lprev
        d_thetas['d_bias_L'+str(i-1)]=d_bias_Lprev
        
    d_theta_L1 = (1/m)*np.dot(deltas['delta_L2'],X.T)
    d_bias_L1=(1/m)*np.sum(deltas['delta_L2'],axis=1,keepdims=True)
    d_thetas['d_theta_L1']=d_theta_L1
    d_thetas['d_bias_L1']=d_bias_L1
    

    return d_thetas


In [None]:
#updating the thetas of all the layers
def update_thetas(thetas,d_thetas,L,alpha):
    thetas_updated=thetas
    for i in range(1,L):
        thetas_updated['theta_L'+str(i)]=thetas['theta_L'+str(i)]-(alpha*d_thetas['d_theta_L'+str(i)])
        thetas_updated['bias_L'+str(i)]=thetas['bias_L'+str(i)]-(alpha*d_thetas['d_bias_L'+str(i)])
    return thetas_updated

In [None]:
def NN_multihiddenlayer(X,y,nodelist,num_iter,alpha):
    thetas,num_layer = initialize_params(nodelist)
    count=0
    cost_list=[]
    while count<num_iter:
        ypred,act_function=forward_prop(X,thetas,num_layer)
        cost = calc_cost(ypred,y)
        cost_list.append(cost)
        pd_thetas = backward_prop(thetas,X,act_function,y,num_layer)
        if (len(cost_list)>=2) and ((cost_list[count-1]-cost_list[count])<0.0000001):
            print("convergence is reached at iteration\n",str(count))
            break
        thetas=update_thetas(thetas,pd_thetas,num_layer,alpha)
        count+=1
    return cost_list,thetas

In [None]:
#plotting the cost (vs) iterations graph
def plot_costfunction(iter_num,J_list,data):
    iterations=list(np.arange(0,iter_num,1))
    cost_J=[]
    for i in iterations:
        cost_J.append(J_list[i])

    plt.plot(iterations,cost_J)
    plt.xlabel("#Iterations")
    plt.ylabel("J (cost)")
    plt.title("NeuralNetworks cost function vs iterations " +str(data))
    plt.show()

In [None]:
def plot_confusion_matrix(cf_matrix):
    sns.heatmap(cf,xticklabels=['A','B','C','D'],yticklabels=['A','B','C','D'],annot=True,linecolor='white',linewidths=0.5,cmap='coolwarm')
    plt.xlabel("Predicted labels")
    plt.ylabel("actual labels")
    plt.show()

In [None]:
#calculate AUC score for multiclass classification
def calc_AUCscore(actual, predicted):
    label_bin=LabelBinarizer()
    label_bin.fit(actual)
    actual = label_bin.transform(actual)
    predicted = label_bin.transform(predicted)
    return roc_auc_score(actual, predicted, average='weighted')

In [None]:
#split the data into train(80%) and test(20%) datasets
features_X = BSOM_data.iloc[:,:-1].to_numpy()
y=BSOM_data.iloc[:,-1].to_numpy()
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(features_X, y, test_size = 0.2,random_state=0)

In [None]:
#Applying feature scaling to the independent variables and one hot encoding the target variable
train_X = Xtrain.T
m_train=train_X.shape[0]
train_X=Feature_scaling(train_X)
train_y=ytrain
train_y=pd.get_dummies(train_y).to_numpy()
train_y=train_y.T
test_X = Xtest.T
m_test=test_X.shape[0]
test_X=Feature_scaling(test_X)
test_y=ytest
test_y=pd.get_dummies(test_y).to_numpy()
test_y=test_y.T

In [None]:
#encode the class labels in the train data
actual_train=ytrain
actual_train=np.where(actual_train=='A', 0, actual_train)
actual_train=np.where(actual_train=='B', 1, actual_train)
actual_train=np.where(actual_train=='C', 2, actual_train)
actual_train=np.where(actual_train=='D', 3, actual_train)
#encode the class labels in the test data
actual_test=ytest
actual_test=np.where(actual_test=='A', 0, actual_test)
actual_test=np.where(actual_test=='B', 1, actual_test)
actual_test=np.where(actual_test=='C', 2, actual_test)
actual_test=np.where(actual_test=='D', 3, actual_test)

#### Single hidden layer

In [None]:
#training the model with single hidden layer with 5 hidden nodes using the train dataset (comparing different learning rate values)
iter_num=1500
alphas_list=[0.01,0.1,0.5,0.6,0.7]
layernodes=[4,5,4]
n_layers=len(layernodes)
for i in alphas_list:
    print("learning_rate :\n",str(i))
    final_cost,final_thetas=NN_multihiddenlayer(train_X,train_y,layernodes,iter_num,i)
    ypred_train,activations_train=forward_prop(train_X,final_thetas,n_layers)
    ypred_labels_train=np.argmax(ypred_train,axis=0)
    print("Confusion Matrix \n")
    cf=confusion_matrix(list(actual_train),list(ypred_labels_train))
    print(cf)
    pr=precision_score(list(actual_train),list(ypred_labels_train),average='weighted')
    rc=recall_score(list(actual_train),list(ypred_labels_train),average='weighted')
    f1=f1_score(list(actual_train),list(ypred_labels_train),average='weighted')
    print("Precision : ",str(pr))
    print("Recall : ",str(rc))
    print("F1 score : ",str(f1))


In [None]:
#training the model with single hidden layer with 5 hidden nodes using the train dataset (with selected best learning rate  )
iter_num=1500
alpha=0.7
layernodes=[4,5,4]
n_layers=len(layernodes)
print("learning_rate :\n",str(alpha))
final_cost,final_thetas=NN_multihiddenlayer(train_X,train_y,layernodes,iter_num,alpha)
ypred_train,activations_train=forward_prop(train_X,final_thetas,n_layers)
ypred_labels_train=np.argmax(ypred_train,axis=0)
print("Confusion Matrix of training data\n")
cf=confusion_matrix(list(actual_train),list(ypred_labels_train))
print(cf)
pr=precision_score(list(actual_train),list(ypred_labels_train),average='weighted')
rc=recall_score(list(actual_train),list(ypred_labels_train),average='weighted')
f1=f1_score(list(actual_train),list(ypred_labels_train),average='weighted')
auc_train=calc_AUCscore(list(actual_train),list(ypred_labels_train))
print("Precision : ",str(pr))
print("Recall : ",str(rc))
print("F1 score : ",str(f1))
print("AUC score : ",str(auc_train))
plot_costfunction(iter_num,final_cost,'training data')
plot_confusion_matrix(cf)

In [None]:
#predicting the test data with the neural network of single hidden layer and 5 hidden nodes.
alpha=0.7
layernodes=[4,5,4]
n_layers=len(layernodes)
print("learning_rate :\n",str(alpha))
ypred_test,activations_test=forward_prop(test_X,final_thetas,n_layers)
ypred_labels_test=np.argmax(ypred_test,axis=0)
print("Confusion Matrix of test data\n")
cf=confusion_matrix(list(actual_test),list(ypred_labels_test))
print(cf)
plot_confusion_matrix(cf)
pr_test=precision_score(list(actual_test),list(ypred_labels_test),average='weighted')
rc_test=recall_score(list(actual_test),list(ypred_labels_test),average='weighted')
f1_test=f1_score(list(actual_test),list(ypred_labels_test),average='weighted')
auc_test=calc_AUCscore(list(actual_test),list(ypred_labels_test))
print("Precision : ",str(pr_test))
print("Recall : ",str(rc_test))
print("F1 score : ",str(f1_test))
print("AUC score : ",str(auc_test))

#### Single hidden layer with different number of hidden nodes

In [None]:
#Applying the neural network model of single layer with different number of hidden nodes on training data
alpha=0.7
iter_num=1500
final_thetas_list=[]
layernodeslist=[[4,2,4],[4,3,4],[4,5,4],[4,7,4],[4,9,4],[4,50,4]]
for i in layernodeslist:
    print("layernode :",str(i))
    print("number of nodes in the hidden layer :",str(i[1]))
    layernodes=i
    n_layers=len(i)
    print("learning_rate :\n",str(alpha))
    final_cost,final_thetas=NN_multihiddenlayer(train_X,train_y,i,iter_num,alpha)
    final_thetas_list.append(final_thetas)
    ypred_train,activations_train=forward_prop(train_X,final_thetas,n_layers)
    ypred_labels_train=np.argmax(ypred_train,axis=0)
    print("Confusion Matrix \n")
    cf=confusion_matrix(list(actual_train),list(ypred_labels_train))
    print(cf)
    pr=precision_score(list(actual_train),list(ypred_labels_train),average='weighted')
    rc=recall_score(list(actual_train),list(ypred_labels_train),average='weighted')
    f1=f1_score(list(actual_train),list(ypred_labels_train),average='weighted')
    auc_train=calc_AUCscore(list(actual_train),list(ypred_labels_train))
    print("Precision : ",str(pr))
    print("Recall : ",str(rc))
    print("F1 score : ",str(f1))
    print("AUC score : ",str(auc_train))
    plot_confusion_matrix(cf)

In [None]:
##predicting the test data with  the neural network model of single layer with different number of hidden nodes
alpha=0.7

layernodeslist=[[4,2,4],[4,3,4],[4,5,4],[4,7,4],[4,9,4],[4,50,4]]
for j,i in enumerate(layernodeslist):
    print("layernode :",str(i))
    print("number of nodes in the hidden layer :",str(i[1]))
    layernodes=i
    n_layers=len(i)
    print("learning_rate :\n",str(alpha))
    final_thetas=final_thetas_list[j]
    ypred_test,activations_test=forward_prop(test_X,final_thetas,n_layers)
    ypred_labels_test=np.argmax(ypred_test,axis=0)
    print("Confusion Matrix \n")
    cf=confusion_matrix(list(actual_test),list(ypred_labels_test))
    print(cf)
    pr_test=precision_score(list(actual_test),list(ypred_labels_test),average='weighted')
    rc_test=recall_score(list(actual_test),list(ypred_labels_test),average='weighted')
    f1_test=f1_score(list(actual_test),list(ypred_labels_test),average='weighted')
    auc_test=calc_AUCscore(list(actual_test),list(ypred_labels_test))
    print("Precision : ",str(pr_test))
    print("Recall : ",str(rc_test))
    print("F1 score : ",str(f1_test))
    print("AUC score : ",str(auc_test))
    plot_confusion_matrix(cf)

#### Multiple hidden layers

In [None]:
##Applying the neural network model of different number of layers with 5 hidden nodes in each layer on training data
alpha=0.7
iter_num=1500
final_thetas_list=[]
layernodeslist=[[4,5,4],[4,5,5,4],[4,5,5,5,4],[4,5,5,5,5,4]]
for i in layernodeslist:
    print("layernode :",str(i))
    
    layernodes=i
    n_layers=len(i)
    print("number of layers in the hidden layer :",str(n_layers-2))
    print("learning_rate :\n",str(alpha))
    final_cost,final_thetas=NN_multihiddenlayer(train_X,train_y,i,iter_num,alpha)
    final_thetas_list.append(final_thetas)
    ypred_train,activations_train=forward_prop(train_X,final_thetas,n_layers)
    ypred_labels_train=np.argmax(ypred_train,axis=0)
    print("Confusion Matrix \n")
    cf=confusion_matrix(list(actual_train),list(ypred_labels_train))
    print(cf)
    pr=precision_score(list(actual_train),list(ypred_labels_train),average='weighted')
    rc=recall_score(list(actual_train),list(ypred_labels_train),average='weighted')
    f1=f1_score(list(actual_train),list(ypred_labels_train),average='weighted')
    auc_train=calc_AUCscore(list(actual_train),list(ypred_labels_train))
    print("Precision : ",str(pr))
    print("Recall : ",str(rc))
    print("F1 score : ",str(f1))
    print("AUC score : ",str(auc_train))
    plot_confusion_matrix(cf)

In [None]:
##predicting the test data by using the  the neural network model of different number of layers with 5 hidden nodes in each layer
alpha=0.7
layernodeslist=[[4,5,4],[4,5,5,4],[4,5,5,5,4],[4,5,5,5,5,4]]
for j,i in enumerate(layernodeslist):
    print("layernode :",str(i))
    layernodes=i
    n_layers=len(i)
    print("number of layers in the hidden layer :",str(n_layers-2))
    print("learning_rate :\n",str(alpha))
    final_thetas=final_thetas_list[j]
    ypred_test,activations_test=forward_prop(test_X,final_thetas,n_layers)
    ypred_labels_test=np.argmax(ypred_test,axis=0)
    print("Confusion Matrix \n")
    cf=confusion_matrix(list(actual_test),list(ypred_labels_test))
    print(cf)
    plot_confusion_matrix(cf)
    pr_test=precision_score(list(actual_test),list(ypred_labels_test),average='weighted')
    rc_test=recall_score(list(actual_test),list(ypred_labels_test),average='weighted')
    f1_test=f1_score(list(actual_test),list(ypred_labels_test),average='weighted')
    auc_test=calc_AUCscore(list(actual_test),list(ypred_labels_test))
    print("Precision : ",str(pr_test))
    print("Recall : ",str(rc_test))
    print("F1 score : ",str(f1_test))
    print("AUC score : ",str(auc_test))
