<a href="https://colab.research.google.com/drive/1TllRTgcPbXNEn39pcpBmUroVSiwaI2LI#scrollTo=KeuFct5irbCw" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Logistic Regression

## 1. Load dataset & import library

In [None]:
import os
import cv2 as cv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegression

## 2. Load train/test data from CSV file

In [None]:
X_train = pd.read_csv('./dataset/data_ML/X_train.csv', header=None).values
y_train = pd.read_csv('./dataset/data_ML/y_train.csv', header=None).values
X_test = pd.read_csv('./dataset/data_ML/X_test.csv', header=None).values
y_test = pd.read_csv('./dataset/data_ML/y_test.csv', header=None).values
print(f'X_train.shape: {X_train.shape}, X_test.shape: {X_test.shape}')

## 3. Classification using logistic regression

Code utilizing the scikit-learn library and a source code version of the algorithm are prepared. In the source code, optimization methods and loss functions can be specified directly.

## Logistic Regression

### Source Code

In [None]:
# LR classifier using l2 loss and gradient descent to optimise
class LRFromScratch:
    def __init__(self, tol=1e-4, C=1.0, max_iter=10000, alpha=1e-2):
        self.tol = tol
        self.C = C
        self.max_iter = max_iter
        self.alpha = alpha

    # logistic function
    def f_log(self, X):
        return 1 / (1 + np.exp(-X*self.w))


    # l2 loss function
    def l2(self, y_true, y_pred):
        return np.sum(np.array(y_true - y_pred)**2)


    def fit(self, X_train, y_train):
        X_train = np.hstack([np.ones([X_train.shape[0], 1]), X_train])
        X_train = np.asmatrix(X_train)
        y_train = np.asmatrix(y_train.reshape([-1, 1]))
        print('X_train shape:', X_train.shape)
        print('y_train shape:', y_train.shape)

        self.w = np.zeros([X_train.shape[1], 1]) # initialisation of LR weights
        self.w = np.asmatrix(self.w)
        print('w shape:', self.w.shape)

        for i in range(self.max_iter):
            y_pred = self.f_log(X_train) # get prediction values with current weights
            grad = -X_train.T*(y_train-y_pred) # calculate gradients from error for optimisation
            self.w = self.w - self.alpha*grad # gradient descent
            err = self.l2(y_train, y_pred) # measure error with l2 loss function
            if err < self.tol: # stop the iteration if error is below the threshold (tol)
                print(f'converged with err={err}')
                break # stop


    def predict(self, X):
        X = np.hstack([np.ones([X.shape[0], 1]), X])
        X = np.asmatrix(X)
        return self.f_log(X)

#### Train logistic regression model using train data(X_train, Y_train)

In [None]:
clf_lr_scratch = LRFromScratch()
clf_lr_scratch.fit(X_train, y_train)

### Viewing the weights of a Logistic Regression model after training.

In [None]:
clf_lr_scratch.w[-10:]

#### Weight distribution

In [None]:
w = clf_lr_scratch.w[1:]
plt.imshow(w.reshape(int(np.sqrt(w.shape[0])), -1))
plt.colorbar()
plt.title('Logistic Regression Weight Distribution')
plt.show()

#### Confusion Matrix

In [None]:
y_pred = np.asarray((clf_lr_scratch.predict(X_test) >= 0.5).astype(int))

print(y_pred.shape,y_test.shape)
ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
plt.show()

### Logistic regression algorithm using scikit-learn

In [None]:
clf_lr = LogisticRegression()
clf_lr.fit(X_train, y_train)
y_pred = clf_lr.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
plt.show()