In [None]:
#Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import confusion_matrix,precision_score,recall_score,f1_score
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
#load the dataset with 2 variables
BSOM_data=pd.read_csv('BSOM_DataSet_for_HW2.csv',usecols = ['all_mcqs_avg_n20','all_NBME_avg_n4','LEVEL'])
#checking for missing values
BSOM_data.isnull().sum()

In [None]:
#removing the rows with missing values
BSOM_data=BSOM_data.dropna(axis=0)
BSOM_data.isnull().sum()

In [None]:
#initialise the parameters with zeros
def initial_parameters(size):
    parameters=np.zeros((size,1))
    return parameters

In [None]:
#predicting the probabilities
def hypothesis(X,thetas):
    x=np.dot(np.transpose(thetas),X)
    h=1 / (1 + np.exp(-x))
    zero_matrix=np.zeros(h.shape)
    ones_matrix=np.ones(h.shape)
    if np.array_equal(h,zero_matrix):
        h[h==0.0]=0.00001
     
    return h

In [None]:
#predicting the labels based on maximum probability among the 4 classifiers
def final_prediction(h1,h2,h3,h4):
    h1=list(h1)[0]
    h2=list(h2)[0]
    h3=list(h3)[0]
    h4=list(h4)[0]
    final_h=[]
    max_index=[]
    for i in range(0,len(h1)):
        temp_list=[]
        temp_list.append(h1[i])
        temp_list.append(h2[i])
        temp_list.append(h3[i])
        temp_list.append(h4[i])
        max_index.append(temp_list.index(max(temp_list)))
        final_h.append(max(temp_list))
    return final_h,max_index

In [None]:
#claculating the cost function using regularization
def cost_reg(thetas,X,y,lamda):
    h=hypothesis(X,thetas)
    m=X.shape[1]
    J=(-1/(m))*np.sum(y*np.log(h.astype(float))+(1-y)*np.log(1-h.astype(float)))
    J_reg=J+(lamda/(2*m))*np.sum(thetas[1:,:]**2)
    return J,J_reg

In [None]:
#Calculating the gradient descent with regularization and updating the parameters
def Regularization_gradient(X,y,alpha,lamda):
    m=X.shape[1]
    thetas=initial_parameters(X.shape[0])
    cost_list=[]
    thetas_list=[]
    thetas_list.append(thetas)
    cost_reg_list=[]
    count=0
    final_h=np.zeros(y.shape)
    while True:
        ypred=hypothesis(X,thetas)
        cost,reg_cost=cost_reg(thetas,X,y,lamda)
        cost_list.append(cost)
        cost_reg_list.append(reg_cost)
        
        if (len(cost_reg_list)>=2) and (cost_reg_list[count-1]-cost_reg_list[count])<0.00001:
            print("convergence is reached at iteration",str(count),'\n')
            final_h=ypred
            break
        grad1=(alpha/m)*np.matmul(X,(ypred-y).T)[0]
        grad2=(alpha/m)*np.matmul(X,(ypred-y).T)[1:]+((alpha*lamda)/m)*thetas[1:]
        grad=np.vstack((grad1[:,np.newaxis],grad2))
        update_thetas=thetas-grad
        thetas=update_thetas
        count+=1
    return thetas,cost_reg_list,count,final_h

In [None]:
def plot_confusion_matrix(cf_matrix):
    sns.heatmap(cf,xticklabels=['A','B','C','D'],yticklabels=['A','B','C','D'],annot=True,linecolor='white',linewidths=0.5,cmap='coolwarm')
    plt.xlabel("Predicted labels")
    plt.ylabel("actual labels")
    plt.show()

In [None]:
#split the data into train(70%) and test(30%) datasets
features_X = BSOM_data.iloc[:,:-1].to_numpy()
y=BSOM_data.iloc[:,-1].to_numpy()
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(features_X, y, test_size = 0.3,random_state=0)

In [None]:
#adding bias term to the feature vector and create 4 classifier labels(for each of the 4 classes) in the train and test data sets
train_X = Xtrain
m_train=train_X.shape[0]
train_X=np.append(np.ones((m_train,1)),train_X,axis=1).T
train_y=ytrain
train_y=pd.get_dummies(train_y).to_numpy()
test_X = Xtest
m_test=test_X.shape[0]
test_X=np.append(np.ones((m_test,1)),test_X,axis=1).T
test_y=ytest
test_y=pd.get_dummies(test_y).to_numpy()


In [None]:
#encode the class labels in the train data
actual_train=ytrain
actual_train=np.where(actual_train=='A', 0, actual_train)
actual_train=np.where(actual_train=='B', 1, actual_train)
actual_train=np.where(actual_train=='C', 2, actual_train)
actual_train=np.where(actual_train=='D', 3, actual_train)
#encode the class labels in the test data
actual_test=ytest
actual_test=np.where(actual_test=='A', 0, actual_test)
actual_test=np.where(actual_test=='B', 1, actual_test)
actual_test=np.where(actual_test=='C', 2, actual_test)
actual_test=np.where(actual_test=='D', 3, actual_test)

In [None]:
#Regularization
reg_list=[0.01,0.1,0.5,1,10,100]
best_alpha=0.6
for i in reg_list:
    print("regularization parameter :\n",str(i))
    print("classifier 1(class A vs not class A)")
    coef1,costs_J1,num_iter1,train_pred1=Regularization_gradient(train_X,train_y[:,0],best_alpha,i)
    
    print("classifier 2(class B vs not class B)")
    coef2,costs_J2,num_iter2,train_pred2=Regularization_gradient(train_X,train_y[:,1],best_alpha,i)
    
    print("classifier 3(class C vs not class C)")
    coef3,costs_J3,num_iter3,train_pred3=Regularization_gradient(train_X,train_y[:,2],best_alpha,i)
    
    print("classifier 4(class D vs not class D)")
    coef4,costs_J4,num_iter4,train_pred4=Regularization_gradient(train_X,train_y[:,3],best_alpha,i)

    fh,labels=final_prediction(train_pred1,train_pred2,train_pred3,train_pred4)
    final_labels=np.array(labels)
    print("Confusion Matrix \n")
    cf=confusion_matrix(list(actual_train),list(final_labels))
    print(cf)
    pr=precision_score(list(actual_train),list(final_labels),average='macro')
    rc=recall_score(list(actual_train),list(final_labels),average='macro')
    f1=f1_score(list(actual_train),list(final_labels),average='macro')
    print("Precision : ",str(pr))
    print("Recall : ",str(rc))
    print("F1 score : ",str(f1))

In [None]:
##best parameters with best alpha on training data set
best_reg=0.01
best_alpha=0.6
print("classifier 1(class A vs not class A)")
best_coef11,costs_J1,num_iter1,best_train_pred1=Regularization_gradient(train_X,train_y[:,0],best_alpha,best_reg)
print("classifier 2(class B vs not class B)")
best_coef22,costs_J2,num_iter2,best_train_pred2=Regularization_gradient(train_X,train_y[:,1],best_alpha,best_reg)
print("classifier 3(class C vs not class C)")
best_coef33,costs_J3,num_iter3,best_train_pred3=Regularization_gradient(train_X,train_y[:,2],best_alpha,best_reg)
print("classifier 4(class D vs not class D)")
best_coef44,costs_J4,num_iter4,best_train_pred4=Regularization_gradient(train_X,train_y[:,3],best_alpha,best_reg)
best_pred,best_labels=final_prediction(best_train_pred1,best_train_pred2,best_train_pred3,best_train_pred4)
final_labels_train=np.array(best_labels)
print("Confusion Matrix of train data \n")
cf=confusion_matrix(list(actual_train),list(final_labels_train))
print(cf)
pr=precision_score(list(actual_train),list(final_labels_train),average='macro')
rc=recall_score(list(actual_train),list(final_labels_train),average='macro')
f1=f1_score(list(actual_train),list(final_labels_train),average='macro')
print("Precision : ",str(pr))
print("Recall : ",str(rc))
print("F1 score : ",str(f1))
plot_confusion_matrix(cf)

In [None]:
#predicting on test data with best alpha and best parameters
test_pred1=hypothesis(test_X,best_coef11)
test_pred2=hypothesis(test_X,best_coef22)
test_pred3=hypothesis(test_X,best_coef33)
test_pred4=hypothesis(test_X,best_coef44)
pred_test,labels_test=final_prediction(test_pred1,test_pred2,test_pred3,test_pred4)
final_labels_test=np.array(labels_test)
print("Confusion Matrix of test data \n")
cf=confusion_matrix(list(actual_test),list(final_labels_test))
print(cf)

pr=precision_score(list(actual_test),list(final_labels_test),average='macro')
rc=recall_score(list(actual_test),list(final_labels_test),average='macro')
f1=f1_score(list(actual_test),list(final_labels_test),average='macro')
print("Precision : ",str(pr))
print("Recall : ",str(rc))
print("F1 score : ",str(f1))
plot_confusion_matrix(cf)