In [51]:
#Multiclass Logistic Regression using 5 fold cross validation technique

In [52]:
#Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
#only for jupyter notebook
%matplotlib inline

In [53]:
dataSet = 'data4.xlsx'

In [54]:
#header = None states that there is no header row or else it would take first row of our data as header.
df = pd.read_excel(dataSet,sheet_name='Sheet1',header=None)

In [55]:
"""
5 fold cross validation ->

150 points / 5 = 30 points in each of 5 subsets

loop(5 times) ->
    choose one subset as testing and remaining 4 for training
    run the logistic regression and test on the one testing subset
"""

'\n5 fold cross validation ->\n\n150 points / 5 = 30 points in each of 5 subsets\n\nloop(5 times) ->\n    choose one subset as testing and remaining 4 for training\n    run the logistic regression and test on the one testing subset\n'

In [56]:
#Binary Logistic Regression -> classifies between 1 and 0
def sigmoid(z):
    return 1.0/ (1.0 + math.exp(-z))

def hypothesis(X,w,bias):
    sum = 0.0
    for index_feature,feature in enumerate(X):
        sum += w[index_feature][0] * feature
    sum += bias
    return sigmoid(sum)

def gradient(X,y,index_feature,w,bias):
    grad = 0.0
    if index_feature == -1 :
        #find gradient for bias
        for index in range(X.shape[0]):
            grad += (hypothesis(X[index],w,bias) - y[index][0])
    else:
        #find gradient for w[index][0]
        for index in range(X.shape[0]):
            grad += (hypothesis(X[index],w,bias) - y[index][0]) * X[index][index_feature]
            
    return grad

def logisticRegression(X,y,w,bias,alpha,iterations):
    for index in range(iterations):
        #update 7 weights and bias
        #w = w - alpha/m * gradient
        m = X.shape[0]
        w0 = w[0][0] - (alpha/m) * gradient(X,y,0,w,bias)
        w1 = w[1][0] - (alpha/m) * gradient(X,y,1,w,bias)
        w2 = w[2][0] - (alpha/m) * gradient(X,y,2,w,bias)
        w3 = w[3][0] - (alpha/m) * gradient(X,y,3,w,bias)
        w4 = w[4][0] - (alpha/m) * gradient(X,y,4,w,bias)
        w5 = w[5][0] - (alpha/m) * gradient(X,y,5,w,bias)
        w6 = w[6][0] - (alpha/m) * gradient(X,y,6,w,bias)
        b  = bias - (alpha/m) * gradient(X,y,-1,w,bias)
        w[0][0] = w0
        w[1][0] = w1
        w[2][0] = w2
        w[3][0] = w3
        w[4][0] = w4
        w[5][0] = w5
        w[6][0] = w6
        bias = b
    return [w,bias]

In [57]:
def oneVsAllTrain(X,y,alpha,iterations):
    np.random.seed(44) 
    w = np.random.rand(7,1) #Weight matrix with random values 7*1 matrix
    #w[0][0] = w[1][0] = w[2][0] = w[3][0] = w[4][0] = w[5][0] = w[6][0] = 0.0
    bias = 1
    
    y1 = y2 = y3 = np.zeros(shape = (y.shape[0],1))
    #Model - 1
    for index_target,target in enumerate(y):
        if target == 2.0 or target == 3.0 :
            y1[index_target][0] = 0.0
        elif target == 1.0:
            y1[index_target][0] = 1.0
    
    w = np.random.rand(7,1)
    bias = 1
    parameters = logisticRegression(X,y1,w,bias,alpha,iterations)
    model1_w = parameters[0]
    model1_bias = parameters[1]
    #Model - 2
    for index_target,target in enumerate(y):
        if target == 1.0 or target == 3.0 :
            y2[index_target][0] = 0.0
        elif target == 2.0:
            y2[index_target][0] = 1.0
        
    
    w = np.random.rand(7,1)
    bias = 1
    parameters = logisticRegression(X,y2,w,bias,alpha,iterations)
    model2_w = parameters[0]
    model2_bias = parameters[1]
    
    #Model - 3
    for index_target,target in enumerate(y):
        if target == 1.0 or target == 2.0 :
            y3[index_target][0] = 0.0
        elif target == 3.0:
            y3[index_target][0] = 1.0
    
    w = np.random.rand(7,1)
    bias = 1
    parameters = logisticRegression(X,y3,w,bias,alpha,iterations)
    model3_w = parameters[0]
    model3_bias = parameters[1]

    return [model1_w,model1_bias,model2_w,model2_bias,model3_w,model3_bias]

In [58]:
def oneVsAllTest(models,X,y):
    y_predicted = []
    model1_w = models[0]
    model1_bias = models[1]
    model2_w = models[2]
    model2_bias = models[3]
    model3_w = models[4]
    model3_bias = models[5]
    for index in range(y.shape[0]):
        h1 = hypothesis(X[index],model1_w,model1_bias)
        h2 = hypothesis(X[index],model2_w,model2_bias)
        h3 = hypothesis(X[index],model3_w,model3_bias)
        h = [h1,h2,h3]
        h = np.asarray(h)
        predicted_class = np.argmax(h) + 1
        y_predicted.append(predicted_class)
    return y_predicted

In [59]:
def confusionMatrix(y_test,y_predicted) :
    u11 = u12 = u13 = u21 = u22 = u23 = u31 = u32 = u33 = 0
    for index in range(y_test.shape[0]):
        if y_test[index][0] == 1 :
            if y_predicted[index] == 1:
                u11 += 1
            elif y_predicted[index] == 2:
                u12 += 1
            else:
                u13 += 1
        elif y_test[index][0] == 2:
            if y_predicted[index] == 1:
                u21 += 1
            elif y_predicted[index] == 2:
                u22 += 1
            else:
                u23 += 1
        elif y_test[index][0] == 3:
            if y_predicted[index] == 1:
                u31 += 1
            elif y_predicted[index] == 2:
                u32 += 1
            else:
                u33 += 1
    print("Confusion Matrix is :")
    print(u11," ",u12," ",u13)
    print(u21," ",u22," ",u23)
    print(u31," ",u32," ",u33)
    
    IA_class1 = (float(u11)/float(u11 + u12 + u13))*100
    IA_class2 = (float(u22)/float(u21 + u22 + u23))*100
    IA_class3 = (float(u33)/float(u31 + u32 + u33))*100
    Overall_Acc = (float(u11 + u22 + u33)/float(u11 + u12 + u13 + u21 + u22 + u23 + u31 + u32 + u33))*100
    
    print("Individual Accuracy of class 1 is : ",IA_class1,"%")
    print("Individual Accuracy of class 2 is : ",IA_class2,"%")
    print("Individual Accuracy of class 3 is : ",IA_class3,"%")
    print("Overall Accuracy is : ",Overall_Acc,"%")

In [60]:
valueArray = df.to_numpy()
np.random.shuffle(valueArray)

In [61]:
#Partition data set into 5 subsets
subset1 = valueArray[0:30,0:]
subset2 = valueArray[30:60,0:]
subset3 = valueArray[60:90,0:]
subset4 = valueArray[90:120,0:]
subset5 = valueArray[120:150,0:]
subsets = [subset1,subset2,subset3,subset4,subset5]

In [62]:
testData = np.zeros(shape = (30,8))
for testing_index in range(len(subsets)):
    print("\n\nFOLD : ",testing_index+1,"\n\n")
    trainData = np.zeros(shape = (0,8))
    testData = subsets[testing_index]
    for training_index in range(len(subsets)):
        if training_index == testing_index:
            continue
        trainData = np.concatenate((trainData,subsets[training_index]))
    X_train , X_test = trainData[0:,0:7] , testData[0:,0:7]
    X_train = (X_train - X_train.mean())/X_train.std()
    X_test = (X_test - X_test.mean())/X_test.std()
    y_train , y_test = trainData[0:,7:] , testData[0:,7:]
    #Use logistic regression function on this trainingData
    models = oneVsAllTrain(X_train,y_train,0.75,5000)
    #Find accuracy on this testingData
    y_predicted = oneVsAllTest(models,X_test,y_test)
    confusionMatrix(y_test,y_predicted)



FOLD :  1 


Confusion Matrix is :
10   1   0
2   3   8
0   0   6
Individual Accuracy of class 1 is :  90.9090909090909 %
Individual Accuracy of class 2 is :  23.076923076923077 %
Individual Accuracy of class 3 is :  100.0 %
Overall Accuracy is :  63.33333333333333 %


FOLD :  2 


Confusion Matrix is :
10   0   0
4   2   2
0   3   9
Individual Accuracy of class 1 is :  100.0 %
Individual Accuracy of class 2 is :  25.0 %
Individual Accuracy of class 3 is :  75.0 %
Overall Accuracy is :  70.0 %


FOLD :  3 


Confusion Matrix is :
6   2   0
0   5   4
1   3   9
Individual Accuracy of class 1 is :  75.0 %
Individual Accuracy of class 2 is :  55.55555555555556 %
Individual Accuracy of class 3 is :  69.23076923076923 %
Overall Accuracy is :  66.66666666666666 %


FOLD :  4 


Confusion Matrix is :
12   0   0
3   5   2
0   1   7
Individual Accuracy of class 1 is :  100.0 %
Individual Accuracy of class 2 is :  50.0 %
Individual Accuracy of class 3 is :  87.5 %
Overall Accuracy is :  80.0 %
