In [28]:
#Logistic Regression for Binary Classification

In [29]:
#Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
#only for jupyter notebook
%matplotlib inline

In [30]:
dataSet = 'data3.xlsx'

In [31]:
#header = None states that there is no header row or else it would take first row of our data as header.
df = pd.read_excel(dataSet,sheet_name='Sheet1',header=None)

In [32]:
valueArray = df.to_numpy()
np.random.shuffle(valueArray)
#Hold out cross validation technique 60 - 40
trainData , testData = valueArray[0:60,0:] , valueArray[60:,0:]
X_train , y_train  = trainData[0:,0:4] , trainData[0:,4:]
X_test , y_test = testData[0:,0:4] , testData[0:,4:]
y_train = y_train - 1
y_test = y_test -1
print(y_train.shape)

(60, 1)


In [33]:
#Training the binary classifier using X_train and y_train

In [34]:
def sigmoid(z):
    return 1.0/ (1.0 + math.exp(-z))

In [35]:
def hypothesis(X,w,bias):
    sum = 0.0
    for index_feature,feature in enumerate(X):
        sum += w[index_feature][0] * feature
    sum += bias
    return sigmoid(sum)

In [36]:
def gradient(X,y,index_feature,w,bias):
    grad = 0.0
    if index_feature == -1 :
        #find gradient for bias
        for index in range(X.shape[0]):
            grad += (hypothesis(X[index],w,bias) - y[index][0])
    else:
        #find gradient for w[index][0]
        for index in range(X.shape[0]):
            grad += (hypothesis(X[index],w,bias) - y[index][0]) * X[index][index_feature]
            
    return grad

In [37]:
def logisticRegression(X,y,w,bias,alpha,iterations):
    for index in range(iterations):
        #update 4 weights and bias
        #w = w - alpha/m * gradient
        m = X.shape[0]
        w0 = w[0][0] - (alpha/m) * gradient(X,y,0,w,bias)
        w1 = w[1][0] - (alpha/m) * gradient(X,y,1,w,bias)
        w2 = w[2][0] - (alpha/m) * gradient(X,y,2,w,bias)
        w3 = w[3][0] - (alpha/m) * gradient(X,y,3,w,bias)
        b  = bias - (alpha/m) * gradient(X,y,-1,w,bias)
        w[0][0] = w0
        w[1][0] = w1
        w[2][0] = w2
        w[3][0] = w3
        bias = b
    return [w,bias]

In [38]:
np.random.seed(1) 
w = np.random.rand(4,1) #Weight matrix with random values 4*1 matrix
bias = 1
#print(w,bias)
w[0][0] = 4.17022005e-01
w[1][0] = 7.20324493e-01
w[2][0] = 1.14374817e-04
w[3][0] = 3.02332573e-01
print(w)

[[4.17022005e-01]
 [7.20324493e-01]
 [1.14374817e-04]
 [3.02332573e-01]]


In [39]:
parameters = logisticRegression(X_train,y_train,w,bias,0.5,100)
w = parameters[0]
bias = parameters[1]

In [40]:
w 

array([[-0.7745544 ],
       [-1.67418221],
       [ 2.7840963 ],
       [ 1.54966892]])

In [41]:
bias

0.5430265815983963

In [42]:
# Evaluating the performance of binary classifier using X_test , y_test

In [43]:
"""
Confusion Matrix

Actual Output   Predicted Output

                class 2 | class 1
class 2         TN      |   FP
________________________|____________
                        |
class 1         FN      |   TP

"""

'\nConfusion Matrix\n\nActual Output   Predicted Output\n\n                class 2 | class 1\nclass 2         TN      |   FP\n________________________|____________\n                        |\nclass 1         FN      |   TP\n\n'

In [44]:
"""
Predicting output based on our trained classifier
If signmoid >= 0.5 then it belongs to class 2 else class 1
"""
y_predicted = []
for index in range(X_test.shape[0]):
    x = X_test[index]
    h = hypothesis(x,w,bias)
    if h >= 0.5:
        y_predicted.append(1)
    else:
        y_predicted.append(0)

In [45]:
trueNegatives = truePositives = falsePositives = falseNegatives = 0
for index in range(y_test.shape[0]):
    if y_test[index][0] == 1 :
        if y_predicted[index] == 1 :
            trueNegatives += 1
        else :
            falsePositives += 1
    else :
        if y_predicted[index] == 0:
            truePositives += 1
        else :
            falseNegatives += 1
print("Confusion Matrix -> ")
print(trueNegatives," ",falsePositives)
print(falseNegatives," ",truePositives)

Confusion Matrix -> 
20   0
0   20


In [46]:
#Sensitivity = TP(TP + FN)
SE = float(truePositives)/float(truePositives + falseNegatives)
#Specificity = TN(TN + FP)
SP = float(trueNegatives)/float(trueNegatives + falsePositives)
#Accuracy = (TP + TN)/(TP + TN + FP + FN)
Acc = float(truePositives + trueNegatives)/float(trueNegatives + truePositives + falsePositives + falseNegatives)

In [47]:
print("Sensitivity is : ",SE*100,"%")
print("Specificity is : ",SP*100,"%")
print("Accuracy is : ",Acc*100,"%")

Sensitivity is :  100.0 %
Specificity is :  100.0 %
Accuracy is :  100.0 %
