<h1>Logistic Regression From Scratch</h1>
<hr>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
cwd = os.getcwd()

def importOrRenewDataset():
    # importing dataset
    pdTestFeatures  = pd.read_csv( os.path.join(cwd, "data", "test-features.csv"),  header=None)
    pdTestLabels    = pd.read_csv( os.path.join(cwd, "data", "test-labels.csv"),    header=None)
    pdTrainFeatures = pd.read_csv( os.path.join(cwd, "data", "train-features.csv"), header=None)
    pdTrainLabels   = pd.read_csv( os.path.join(cwd, "data", "train-labels.csv"),   header=None)
    
    # labeling as 0 or 1 the dataset
    pdTestLabels[ pdTestLabels[0] <  90 ] = 0
    pdTestLabels[ pdTestLabels[0] >= 90 ] = 1
    pdTrainLabels[ pdTrainLabels[0] <  90 ] = 0
    pdTrainLabels[ pdTrainLabels[0] >= 90 ] = 1
    
    # dataset 
    npTestFeatures  = pdTestFeatures.values
    npTestLabels    = pdTestLabels.values
    npTrainFeatures = pdTrainFeatures.values
    npTrainLabels   = pdTrainLabels.values
    
    # properties
    testSampleCount  = npTestFeatures.shape[0]
    trainSampleCount = npTrainFeatures.shape[0]
    featureCount = npTestFeatures.shape[1]
    weights = np.zeros((8, 1))
    bias = 0
    
    return bias, weights, npTrainFeatures, npTrainLabels, npTestFeatures,\
           npTestLabels, testSampleCount, trainSampleCount, featureCount

In [2]:
# sigmoid function
def sigmoidFunction(z):
    return 1 / (1 + np.exp(-z))

# updating weights 
def calculateDerivatives(npTrainFeatures, npTrainLabels, trainSampleCount, bias, weights):
    z = npTrainFeatures.dot(weights) + bias
    npTrainLabelsHead = sigmoidFunction(z)
    # finding out derivatives
    derivativeOfWeight = np.dot(npTrainFeatures.T , (npTrainLabels - npTrainLabelsHead)) / trainSampleCount 
    derivativeOfBias   = np.sum(npTrainLabels - npTrainLabelsHead) / trainSampleCount
    return  derivativeOfWeight, derivativeOfBias

In [3]:
def printTestResults(weights, bias, npTestFeatures, npTestLabels, testSampleCount):
    z = npTestFeatures.dot(weights) + bias
    npTestLabelsHead = sigmoidFunction(z)
    predictions = np.zeros((testSampleCount, 1))

    for i in range(testSampleCount):
        if npTestLabelsHead[i,0] > 0.5:
            predictions[i,0] = 1
        else:
            predictions[i,0] = 0
    
    # calculating confusion matrix
    truePositives  = np.sum( np.logical_and( npTestLabels, predictions) )
    falsePositives = np.sum( np.logical_and( np.logical_not(npTestLabels), predictions) ) 
    falseNegatives = np.sum( np.logical_and( npTestLabels, np.logical_not(predictions)) ) 
    trueNegatives  = np.sum( np.logical_and( np.logical_not(npTestLabels), np.logical_not(predictions)) )

    # printing out confusion matrix
    print("Learning rate " + str(learningRate) + ": ")
    print("-----------------------------------------------------")
    print("True Positives: "  + str(truePositives))
    print("False Positives: " + str(falsePositives))
    print("False Negatives: " + str(falseNegatives))
    print("True Negatives: " + str(trueNegatives))
    
    # adding one to avoid nan values
    truePositives  = truePositives  + 1
    falsePositives = falsePositives + 1
    falseNegatives = falseNegatives + 1
    trueNegatives  = trueNegatives  + 1
    testSampleCount += 4
    
    acurracy = ((truePositives + trueNegatives) / testSampleCount) * 100
    precision = truePositives / (truePositives + falsePositives) 
    recall = truePositives / (truePositives + falseNegatives)
    negativePredictiveValue = trueNegatives / (falseNegatives + trueNegatives)
    falsePositiveRate = falsePositives / (falsePositives + trueNegatives)
    falseDiscoveryRate = falsePositives / (falsePositives + truePositives)
    f1Score = (2 * precision * recall) / (precision + recall)
    f2Score = (5 * precision * recall) / (4 * precision + recall)
    
    # printing out results
    print("Acurracy: " + str(acurracy))
    print("Precision: " + str(precision))
    print("Recall: " + str(recall))
    print("Negative Predictive Value: " + str(negativePredictiveValue))
    print("False Positive Rate: " + str(falsePositiveRate))
    print("False Discovery Rate: " + str(falseDiscoveryRate))
    print("F1 Score: " + str(f1Score))
    print("F2 Score: " + str(f2Score))
    print()
    
    return truePositives, falsePositives, falseNegatives, trueNegatives

In [4]:
def printMicroMacroResults(truePositiveArr, falsePositiveArr, falseNegativeArr, trueNegativeArr):
    precision = truePositiveArr / (truePositiveArr + falsePositiveArr)
    recall = truePositiveArr / (truePositiveArr + falseNegativeArr)
    
    # MACRO AVERAGING
    macroPrecision = precision.mean() 
    macroRecall = recall.mean()
    macroNegativePredictiveValue = (trueNegativeArr / (falseNegativeArr + trueNegativeArr)).mean()
    macroFalsePositiveRate = (falsePositiveArr / (falsePositiveArr + trueNegativeArr)).mean()
    macroFalseDiscoveryRate = (falsePositiveArr / (falsePositiveArr + truePositiveArr)).mean()
    macroF1Score = ((2 * precision * recall) / (precision + recall)).mean()
    macroF2Score = ((5 * precision * recall) / (4 * precision + recall)).mean()
    
    # MICRO AVERAGING
    microPrecision = truePositiveArr.sum() / ((truePositiveArr + falsePositiveArr).sum())
    microRecall = truePositiveArr.sum() / ((truePositiveArr + falseNegativeArr).sum())
    microNegativePredictiveValue = trueNegatives.sum() / ((falseNegatives + trueNegatives).sum())
    microFalsePositiveRate = falsePositives.sum() / ((falsePositives + trueNegatives).sum())
    microFalseDiscoveryRate = falsePositives.sum() / ((falsePositives + truePositives).sum())
    microF1Score = (2 * precision * recall).sum() / ((precision + recall).sum())
    microF2Score = (5 * precision * recall).sum() / ((4 * precision + recall).sum())
    
    # printing results
    print('-----------------------------------------------------------------')
    print('-------------------------MACRO AVERAGING-------------------------')
    print('-----------------------------------------------------------------')
    print("Macro Precision: " + str(macroPrecision))
    print("Macro Recall: " + str(macroRecall))
    print("Macro Negative Predictive Value: " + str(macroNegativePredictiveValue))
    print("Macro False Positive Rate: " + str(macroFalsePositiveRate))
    print("Macro False Discovery Rate: " + str(macroFalseDiscoveryRate))
    print("Macro F1 Score: " + str(macroF1Score))
    print("Macro F2 Score: " + str(macroF2Score))
    print()
    
    print('-----------------------------------------------------------------')
    print('-------------------------MICRO AVERAGING-------------------------')
    print('-----------------------------------------------------------------')
    print("Micro Precision: " + str(microPrecision))
    print("Micro Recall: " + str(microRecall))
    print("Micro Negative Predictive Value: " + str(microNegativePredictiveValue))
    print("Micro False Positive Rate: " + str(microFalsePositiveRate))
    print("Micro False Discovery Rate: " + str(microFalseDiscoveryRate))
    print("Micro F1 Score: " + str(microF1Score))
    print("Micro F2 Score: " + str(microF2Score))
    print()

<strong>Question 3.1: </strong>You will implement full batch gradient ascent algorithm to train your logistic
regression model. Initialize all weights to 0. Try different learning rates from the given logarithmic scale
[10􀀀5; 10􀀀4; 10􀀀3; 10􀀀2; 10􀀀1] and choose the one which works best for you. Use 1000 iterations to train your
model. Report the accuracy and the confusion matrix using your model on the test set given. Calculate
and report micro and macro averages of precision, recall, negative predictive value (NPV), false positive rate
(FPR), false discovery rate (FDR), F1 and F2 scores.

In [5]:
learningRates = [0.00001, 0.0001, 0.001, 0.01, 0.1]

# APPLYING FULL BATCH GRADIENT DESCENT
batchGradientTruePositives  = []
batchGradientFalsePositives = []
batchGradientFalseNegatives = []
batchGradientTrueNegatives  = []

for learningRate in learningRates:
    bias, weights, npTrainFeatures, npTrainLabels, npTestFeatures, npTestLabels,\
    testSampleCount, trainSampleCount, featureCount = importOrRenewDataset()
    
    for j in range(1000):  
        derivativeOfWeight, derivativeOfBias = calculateDerivatives\
        (npTrainFeatures, npTrainLabels, trainSampleCount, bias, weights)
    
        # updating weights and bias
        weights = weights + learningRate * derivativeOfWeight
        bias = bias + learningRate * derivativeOfBias
        
    truePositives, falsePositives, falseNegatives, trueNegatives = \
    printTestResults(weights, bias, npTestFeatures, npTestLabels, testSampleCount)
    
    batchGradientTruePositives.append( truePositives )
    batchGradientFalsePositives.append( falsePositives )
    batchGradientFalseNegatives.append( falseNegatives )
    batchGradientTrueNegatives.append( trueNegatives )

Learning rate 1e-05: 
-----------------------------------------------------
True Positives: 2342
False Positives: 1036
False Negatives: 0
True Negatives: 0
Acurracy: 69.30810171496155
Precision: 0.6931952662721893
Recall: 0.9995733788395904
Negative Predictive Value: 0.5
False Positive Rate: 0.9990366088631984
False Discovery Rate: 0.30680473372781064
F1 Score: 0.8186582809224318
F2 Score: 0.9183913452492946

Learning rate 0.0001: 
-----------------------------------------------------
True Positives: 2289
False Positives: 868
False Negatives: 53
True Negatives: 168
Acurracy: 72.70845653459492
Precision: 0.7249129471351694
Recall: 0.976962457337884
Negative Predictive Value: 0.757847533632287
False Positive Rate: 0.8371868978805395
False Discovery Rate: 0.27508705286483065
F1 Score: 0.8322733054697438
F2 Score: 0.9134423613881133

Learning rate 0.001: 
-----------------------------------------------------
True Positives: 2105
False Positives: 247
False Negatives: 237
True Negatives: 789

In [6]:
# printing out micro and macro results.
batchGradientTruePositives  = np.array(batchGradientTruePositives)
batchGradientFalsePositives = np.array(batchGradientFalsePositives)
batchGradientFalseNegatives = np.array(batchGradientFalseNegatives)
batchGradientTrueNegatives  = np.array(batchGradientTrueNegatives)

printMicroMacroResults( batchGradientTruePositives, batchGradientFalsePositives, \
                       batchGradientFalseNegatives, batchGradientTrueNegatives)

-----------------------------------------------------------------
-------------------------MACRO AVERAGING-------------------------
-----------------------------------------------------------------
Macro Precision: 0.8274936821513152
Macro Recall: 0.8755119453924914
Macro Negative Predictive Value: 0.6610217545391299
Macro False Positive Rate: 0.4847784200385356
Macro False Discovery Rate: 0.17250631784868475
Macro F1 Score: 0.8346060067909878
Macro F2 Score: 0.8550021714475964

-----------------------------------------------------------------
-------------------------MICRO AVERAGING-------------------------
-----------------------------------------------------------------
Micro Precision: 0.803083665962276
Micro Recall: 0.8755119453924914
Micro Negative Predictive Value: 0.49841437632135305
Micro False Positive Rate: 0.09152215799614644
Micro False Discovery Rate: 0.06375838926174497
Micro F1 Score: 0.8381256729966073
Micro F2 Score: 0.8525488484824458



<strong>Question 3.2: </strong> You are NOT allowed to use any machine learning libraries to train and test your model for this question. You will implement mini-batch gradient ascent algorithm with batch size = 32 and stochastic gradient ascent algorithm to train your logistic regression model. Initialize all weights to 0. Use the learning rate you have chosen in Question 3.1 and perform 1000 iterations to train your model. Report the accuracies and the confusion matrices using your models on the given test set. Calculate and report micro and macro averages of precision, recall, negative predictive value (NPV), false positive rate (FPR), false discovery rate (FDR), F1 and F2 scores.

In [7]:
# APPLYING MINI BATCH GRADIENT DESCENT WITH SIZE = 32
miniBatchGradientTruePositives  = []
miniBatchGradientFalsePositives = []
miniBatchGradientFalseNegatives = []
miniBatchGradientTrueNegatives  = []


for learningRate in learningRates:
    bias, weights, npTrainFeatures, npTrainLabels, npTestFeatures, npTestLabels,\
    testSampleCount, trainSampleCount, featureCount = importOrRenewDataset()
    
    for j in range(1000):  
        idx = np.random.randint(testSampleCount, size=32)
        npTrainLabels32 = npTrainLabels[idx]
        npTrainFeatures32 = npTrainFeatures[idx,:]
        trainSampleCount32 = 32
        
        derivativeOfWeight, derivativeOfBias = calculateDerivatives\
        (npTrainFeatures32, npTrainLabels32, trainSampleCount32, bias, weights)
    
        # updating weights and bias
        weights = weights + learningRate * derivativeOfWeight
        bias = bias + learningRate * derivativeOfBias
        
    truePositives, falsePositives, falseNegatives, trueNegatives = \
    printTestResults(weights, bias, npTestFeatures, npTestLabels, testSampleCount)
    
    miniBatchGradientTruePositives.append( truePositives )
    miniBatchGradientFalsePositives.append( falsePositives )
    miniBatchGradientFalseNegatives.append( falseNegatives )
    miniBatchGradientTrueNegatives.append( trueNegatives )

Learning rate 1e-05: 
-----------------------------------------------------
True Positives: 1616
False Positives: 209
False Negatives: 726
True Negatives: 827
Acurracy: 72.29450029568302
Precision: 0.8850574712643678
Recall: 0.6898464163822525
Negative Predictive Value: 0.532475884244373
False Positive Rate: 0.2023121387283237
False Discovery Rate: 0.11494252873563218
F1 Score: 0.7753536322224885
F2 Score: 0.7216816924038204

Learning rate 0.0001: 
-----------------------------------------------------
True Positives: 1426
False Positives: 204
False Negatives: 916
True Negatives: 832
Acurracy: 66.82436428149025
Precision: 0.8743872549019608
Recall: 0.6087883959044369
Negative Predictive Value: 0.476
False Positive Rate: 0.197495183044316
False Discovery Rate: 0.1256127450980392
F1 Score: 0.7178068410462777
F2 Score: 0.6481649709302325

Learning rate 0.001: 
-----------------------------------------------------
True Positives: 1619
False Positives: 338
False Negatives: 723
True Negatives

In [8]:
# printing out micro and macro results.
miniBatchGradientTruePositives  = np.array(miniBatchGradientTruePositives)
miniBatchGradientFalsePositives = np.array(miniBatchGradientFalsePositives)
miniBatchGradientFalseNegatives = np.array(miniBatchGradientFalseNegatives)
miniBatchGradientTrueNegatives  = np.array(miniBatchGradientTrueNegatives)

printMicroMacroResults( miniBatchGradientTruePositives, miniBatchGradientFalsePositives, \
                       miniBatchGradientFalseNegatives, miniBatchGradientTrueNegatives)

-----------------------------------------------------------------
-------------------------MACRO AVERAGING-------------------------
-----------------------------------------------------------------
Macro Precision: 0.7945399853080346
Macro Recall: 0.796160409556314
Macro Negative Predictive Value: 0.4685097536985885
Macro False Positive Rate: 0.5433526011560693
Macro False Discovery Rate: 0.20546001469196548
Macro F1 Score: 0.7761305713511042
Macro F2 Score: 0.7831437112096464

-----------------------------------------------------------------
-------------------------MICRO AVERAGING-------------------------
-----------------------------------------------------------------
Micro Precision: 0.7679203357748333
Micro Recall: 0.796160409556314
Micro Negative Predictive Value: 0.14285714285714285
Micro False Positive Rate: 0.9971098265895953
Micro False Discovery Rate: 0.3079440642665873
Micro F1 Score: 0.7781475084634338
Micro F2 Score: 0.7786234121325488



In [9]:
# APPLYING STOCHASTIC GRADIENT DESCENT 
stochasticGradientTruePositives  = []
stochasticGradientFalsePositives = []
stochasticGradientFalseNegatives = []
stochasticGradientTrueNegatives  = []

for learningRate in learningRates:
    bias, weights, npTrainFeatures, npTrainLabels, npTestFeatures, npTestLabels,\
    testSampleCount, trainSampleCount, featureCount = importOrRenewDataset()
    
    for j in range(1000):      
        for k in range(testSampleCount):
            idx = np.array([k])
            npTrainLabelSample = npTrainLabels[idx]
            npTrainFeaturesSample = npTrainFeatures[idx,:]
            trainSampleCountSample = 1
        
            derivativeOfWeight, derivativeOfBias = calculateDerivatives\
            (npTrainFeatures32, npTrainLabels32, trainSampleCount32, bias, weights)
    
            # updating weights and bias
            weights = weights + learningRate * derivativeOfWeight
            bias = bias + learningRate * derivativeOfBias
        
    truePositives, falsePositives, falseNegatives, trueNegatives = \
    printTestResults(weights, bias, npTestFeatures, npTestLabels, testSampleCount)
    
    stochasticGradientTruePositives.append( truePositives )
    stochasticGradientFalsePositives.append( falsePositives )
    stochasticGradientFalseNegatives.append( falseNegatives )
    stochasticGradientTrueNegatives.append( trueNegatives )

Learning rate 1e-05: 
-----------------------------------------------------
True Positives: 2342
False Positives: 1036
False Negatives: 0
True Negatives: 0
Acurracy: 69.30810171496155
Precision: 0.6931952662721893
Recall: 0.9995733788395904
Negative Predictive Value: 0.5
False Positive Rate: 0.9990366088631984
False Discovery Rate: 0.30680473372781064
F1 Score: 0.8186582809224318
F2 Score: 0.9183913452492946

Learning rate 0.0001: 
-----------------------------------------------------
True Positives: 2342
False Positives: 1036
False Negatives: 0
True Negatives: 0
Acurracy: 69.30810171496155
Precision: 0.6931952662721893
Recall: 0.9995733788395904
Negative Predictive Value: 0.5
False Positive Rate: 0.9990366088631984
False Discovery Rate: 0.30680473372781064
F1 Score: 0.8186582809224318
F2 Score: 0.9183913452492946

Learning rate 0.001: 
-----------------------------------------------------
True Positives: 2342
False Positives: 1036
False Negatives: 0
True Negatives: 0
Acurracy: 69.3081

In [10]:
# printing out micro and macro results.
stochasticGradientTruePositives  = np.array(stochasticGradientTruePositives)
stochasticGradientFalsePositives = np.array(stochasticGradientFalsePositives)
stochasticGradientFalseNegatives = np.array(stochasticGradientFalseNegatives)
stochasticGradientTrueNegatives  = np.array(stochasticGradientTrueNegatives)

printMicroMacroResults( stochasticGradientTruePositives, stochasticGradientFalsePositives, \
                       stochasticGradientFalseNegatives, stochasticGradientTrueNegatives)

-----------------------------------------------------------------
-------------------------MACRO AVERAGING-------------------------
-----------------------------------------------------------------
Macro Precision: 0.6931952662721893
Macro Recall: 0.9995733788395904
Macro Negative Predictive Value: 0.5
Macro False Positive Rate: 0.9990366088631986
Macro False Discovery Rate: 0.30680473372781064
Macro F1 Score: 0.8186582809224318
Macro F2 Score: 0.9183913452492944

-----------------------------------------------------------------
-------------------------MICRO AVERAGING-------------------------
-----------------------------------------------------------------
Micro Precision: 0.6931952662721893
Micro Recall: 0.9995733788395904
Micro Negative Predictive Value: 0.5
Micro False Positive Rate: 0.9990366088631984
Micro False Discovery Rate: 0.30680473372781064
Micro F1 Score: 0.8186582809224318
Micro F2 Score: 0.9183913452492946

