In [None]:
#Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import confusion_matrix,precision_score,recall_score,f1_score
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
#load the dataset with 2 variables
BSOM_data=pd.read_csv('BSOM_DataSet_for_HW2.csv',usecols = ['all_mcqs_avg_n20','all_NBME_avg_n4','LEVEL'])
#checking for missing values
BSOM_data.isnull().sum()

In [None]:
#removing the rows with missing values
BSOM_data=BSOM_data.dropna(axis=0)
BSOM_data.isnull().sum()

In [None]:
#initialise the parameters with zeros
def initial_parameters(size):
    parameters=np.zeros((size,1))
    return parameters

In [None]:
#predicting the probabilities
def hypothesis(X,thetas):
    x=np.dot(np.transpose(thetas),X)
    h=1 / (1 + np.exp(-x))
    zero_matrix=np.zeros(h.shape)
    ones_matrix=np.ones(h.shape)
    if np.array_equal(h,zero_matrix):
        h[h==0.0]=0.00001
     
    return h

In [None]:
#calculating the cost function
def Calc_cost(thetas,X,y):
    h=hypothesis(X,thetas)
    m=X.shape[1]
    J=(-1/(m))*np.sum(y*np.log(h.astype(float))+(1-y)*np.log(1-h.astype(float)))
    return J

In [None]:
#calculating the gradient descent and updating the parameters
def Gradientdescent(X,y,alpha):
    m=X.shape[1]
    thetas=initial_parameters(X.shape[0])
    cost_list=[]
    thetas_list=[]
    thetas_list.append(thetas)
    count=0
    final_h=np.zeros(y.shape)
    while True:
        ypred=hypothesis(X,thetas)
        cost=Calc_cost(thetas,X,y)
        cost_list.append(cost)
        
        if (len(cost_list)>=2) and ((cost_list[count-1]-cost_list[count])<0.00001):
            print("convergence is reached at iteration",str(count),'\n')
            final_h=ypred
            break
        update_thetas=thetas-(alpha/m)*np.matmul(X,(ypred-y).T)
        thetas=update_thetas
        count+=1
    return thetas,cost_list,count,final_h

In [None]:
#predicting the labels based on maximum probability among the 4 classifiers
def final_prediction(h1,h2,h3,h4):
    h1=list(h1)[0]
    h2=list(h2)[0]
    h3=list(h3)[0]
    h4=list(h4)[0]
    final_h=[]
    max_index=[]
    for i in range(0,len(h1)):
        temp_list=[]
        temp_list.append(h1[i])
        temp_list.append(h2[i])
        temp_list.append(h3[i])
        temp_list.append(h4[i])
        max_index.append(temp_list.index(max(temp_list)))
        final_h.append(max(temp_list))
    return final_h,max_index

In [None]:
#Applying feature scaling to variables
def Feature_scaling(feat):
    num=feat.shape[0]
    mean_X=np.mean(feat,axis=1)
    range_X=(np.amax(feat, axis=1)-np.amin(feat, axis=1))
    for i in range(1,num):
        xi=feat[i,:]
        xi=(xi-mean_X[i])/range_X[i]
        feat[i,:]=xi
    return feat

In [None]:
def plot_confusion_matrix(cf_matrix):
    sns.heatmap(cf,xticklabels=['A','B','C','D'],yticklabels=['A','B','C','D'],annot=True,linecolor='white',linewidths=0.5,cmap='coolwarm')
    plt.xlabel("Predicted labels")
    plt.ylabel("actual labels")
    plt.show()

In [None]:
#split the data into train(70%) and test(30%) datasets
features_X = BSOM_data.iloc[:,:-1].to_numpy()
y=BSOM_data.iloc[:,-1].to_numpy()
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(features_X, y, test_size = 0.3,random_state=0)

In [None]:
#Adding bias terms and feature scaling
train_X = Xtrain
m_train=train_X.shape[0]
train_X=np.append(np.ones((m_train,1)),train_X,axis=1).T
train_y=ytrain
train_y=pd.get_dummies(train_y).to_numpy()
train_X=Feature_scaling(train_X)
test_X = Xtest
m_test=test_X.shape[0]
test_X=np.append(np.ones((m_test,1)),test_X,axis=1).T
test_y=ytest
test_y=pd.get_dummies(test_y).to_numpy()
test_X=Feature_scaling(test_X)


In [None]:
#encode the class labels in the train data
actual_train=ytrain
actual_train=np.where(actual_train=='A', 0, actual_train)
actual_train=np.where(actual_train=='B', 1, actual_train)
actual_train=np.where(actual_train=='C', 2, actual_train)
actual_train=np.where(actual_train=='D', 3, actual_train)
#encode the class labels in the test data
actual_test=ytest
actual_test=np.where(actual_test=='A', 0, actual_test)
actual_test=np.where(actual_test=='B', 1, actual_test)
actual_test=np.where(actual_test=='C', 2, actual_test)
actual_test=np.where(actual_test=='D', 3, actual_test)

In [None]:
##best parameters with best alpha on training data set
best_alpha=0.6
print("classifier 1(class A vs not class A)")
best_coef1,costs_J1,num_iter1,best_train_pred1=Gradientdescent(train_X,train_y[:,0],best_alpha)
print("classifier 2(class B vs not class B)")
best_coef2,costs_J2,num_iter2,best_train_pred2=Gradientdescent(train_X,train_y[:,1],best_alpha)
print("classifier 3(class C vs not class C)")
best_coef3,costs_J3,num_iter3,best_train_pred3=Gradientdescent(train_X,train_y[:,2],best_alpha)
print("classifier 4(class D vs not class D)")
best_coef4,costs_J4,num_iter4,best_train_pred4=Gradientdescent(train_X,train_y[:,3],best_alpha)
best_pred,best_labels=final_prediction(best_train_pred1,best_train_pred2,best_train_pred3,best_train_pred4)
final_labels_train=np.array(best_labels)
print("Confusion Matrix \n")
cf=confusion_matrix(list(actual_train),list(final_labels_train))
print(cf)
pr=precision_score(list(actual_train),list(final_labels_train),average='macro')
rc=recall_score(list(actual_train),list(final_labels_train),average='macro')
f1=f1_score(list(actual_train),list(final_labels_train),average='macro')
print("Precision : ",str(pr))
print("Recall : ",str(rc))
print("F1 score : ",str(f1))
print("Confusion Matrix of training data")
plot_confusion_matrix(cf)

In [None]:
#predicting on test data with best alpha and best parameters
test_pred1=hypothesis(test_X,best_coef1)
test_pred2=hypothesis(test_X,best_coef2)
test_pred3=hypothesis(test_X,best_coef3)
test_pred4=hypothesis(test_X,best_coef4)
pred_test,labels_test=final_prediction(test_pred1,test_pred2,test_pred3,test_pred4)
final_labels_test=np.array(labels_test)
print("Confusion Matrix \n")
cf=confusion_matrix(list(actual_test),list(final_labels_test))
print(cf)
pr=precision_score(list(actual_test),list(final_labels_test),average='macro')
rc=recall_score(list(actual_test),list(final_labels_test),average='macro')
f1=f1_score(list(actual_test),list(final_labels_test),average='macro')
print("Precision : ",str(pr))
print("Recall : ",str(rc))
print("F1 score : ",str(f1))
print("Confusion Matrix of test data")
plot_confusion_matrix(cf)