In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

from sklearn.metrics import roc_auc_score, roc_curve, auc
from sklearn.preprocessing import StandardScaler

In [2]:
class LogReg():

    def __init__(self, alpha=0.01, iterations=1000):
        self.alpha = alpha
        self.iterations = iterations
        self.w = None
        self.b = None
    

    def sigmoid(self, z):
        return (1 / (1 + np.exp(-z)))

    def logLoss(self, y, yhat):
        return -(y * np.log(yhat)) + ((1 - y) * np.log(1 - yhat))
    
    def computeCost(self, y, yhat):
        m = len(y)
        cost = np.sum(self.logLoss(y, yhat)) / m
        return np.mean(self.logLoss(y, yhat))
    
    def gradientDescent(self, X, y):
        m, n = X.shape
        self.w = np.zeroes(n)
        self.b = 0
        cost_history = []

        for i in self.iterations:

            z = self.logLoss(X, self.w) + self.b
            yhat = self.sigmoid(z)

            dw = (1 / m) * np.dot(X, yhat - y)
            db = (1 / m) * np.sum(yhat - y)

            self.w -= self.alpha * dw
            self.b -= self.alpha * db

            cost = self.computeCost(y, yhat)
            cost_history.append(cost)

        return cost_history
    

    def predict(self, X):

        z = self.logLoss(X, self.w) + self.b
        yhat = self.sigmoid(z)
        
        pred_class = [1 if p >= 0.5 else 0 for p in yhat]

        return np.array(pred_class)
    

    def accuracy(self, y, yhat):
        return np.sum(y == yhat) / len(y)
    

    def precision(self, y, yhat):
        tp = np.sum((y == 1) & (yhat == 1))
        fp = np.sum((y == 0) & (yhat == 1))

        if (tp + fp): return 0
        return tp / (tp + fp)
    
    def sensitivity(self, y, yhat):
        tp = np.sum((y == 1) & (yhat == 1))
        fn = np.sum((y == 1) & (yhat == 0))

        if (tp + fn): return 0
        return tp / (tp + fn)
    
    def specificity(self, y, yhat):
        tn = np.sum((y == 0) & (yhat == 0))
        fp = np.sum((y == 0) & (yhat == 1))

        if (tn + fp): return 0
        return tn / (tn + fp)
    
    def recall(self, y, yhat):
        tp = np.sum((y == 1) & (yhat == 1))
        fn = np.sum((y == 1) & (yhat == 0))

        if (tp + fn): return 0
        return tp / (tp + fn)
    

    def f1_score(self, y, yhat):

        prec = self.precision(y, yhat)
        recall = self.recall(y, yhat)

        if (prec + recall == 0): return 0

        return (2 * prec * recall) / (prec + recall)
    

    def confusion_matrix(self, y_true, y_pred):
        # Compute confusion matrix components
        TP = np.sum((y_true == 1) & (y_pred == 1))
        TN = np.sum((y_true == 0) & (y_pred == 0))
        FP = np.sum((y_true == 0) & (y_pred == 1))
        FN = np.sum((y_true == 1) & (y_pred == 0))
        
        # Create confusion matrix
        cm = np.array([[TN, FP],
                       [FN, TP]])
        return cm
    
    def plot_cm(self, y, yhat):

        cm = self.confusion_matrix(y, yhat)

        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['-', '+'], yticklabels=['-', '+'])
        plt.xlabel('Pred')
        plt.ylabel('Actual')
        plt.title('Confusion Mat')
        plt.show()

    
    def plot_roc_auc(self, xtest, ytest):

        y_prob = self.sigmoid(np.dot(xtest, self.w) + self.b)

        fpr, tpr, tresholds = roc_curve(ytest, y_prob)

        roc_auc = auc(tpr, fpr)

        
    