In [3]:
import numpy as np
import pandas as pd
from typing import Dict, List
from numpy.typing import NDArray
from dataclasses import dataclass

In [17]:
@dataclass
class TrainResult:
    parameters: Dict[str, NDArray[np.float_]]
    loss: List[float]

class LogisticRegression:
    def __init__(self, learning_rate: float):
        self.parameters: Dict[str, float] = {}
        self.learning_rate = learning_rate
    
    def _sigmoid(self, x: np.ndarray):
        return 1 / (1 + np.exp(-x))
    
    def _binary_cross_entropy(self, pred: np.ndarray, targets: np.ndarray) -> float:
        cost = np.mean(targets*np.log(pred + 1e-8) + (1 - targets)*np.log(1 - pred + 1e-8))
        return float(-cost)
    
    def _forward(self, inputs: np.ndarray)-> np.ndarray:
        m = self.parameters['m']
        b = self.parameters['b']
        linear_pred =  inputs @ m + b
        pred = self._sigmoid(linear_pred)
        return pred
    
    def _backpropagation(self, preds: np.ndarray, inputs: np.ndarray, targets: np.ndarray) -> Dict[str, float]:
        derivatives = {}
        m_samples = inputs.shape[0]
        error = (preds - targets)
        dm = (inputs.T @ error) / m_samples
        db = np.asarray(np.mean(preds - targets), dtype=float)
        derivatives['dm'] = dm
        derivatives['db'] = db
        return derivatives
    
    def update_parameters(self, derivatives: Dict[str, float]) -> None:
        self.parameters['m'] -= self.learning_rate*derivatives['dm']
        self.parameters['b'] -= self.learning_rate*derivatives['db']
    
    def array_conversion(self, X, y):
        return np.array(X), np.array(y)
    
    def _initialise_parameters(self, n_features):
        self.parameters['m'] = np.zeros((n_features, 1), dtype=float)
        self.parameters['b'] = np.asarray(0.0, dtype=float)
    
    def train(self, epochs: int, X_train: list, y_train: list) -> TrainResult:
        X_train, y_train = self.array_conversion(X_train, y_train)
        m, n_features = X_train.shape
        y_train = y_train.reshape(m,1)
        self._initialise_parameters(n_features=n_features)
        self.loss = []

        for i in range(epochs):
            preds = self._forward(X_train)
            cost = self._binary_cross_entropy(pred=preds, targets=y_train)
            derivativess = self._backpropagation(preds=preds, inputs=X_train, targets=y_train)
            self.update_parameters(derivatives=derivativess)
            self.loss.append(cost)
            if i % 5 == 0:
                print(f"Epoch: {i+1}, Loss: {cost}")
        return TrainResult(parameters=self.parameters, loss=self.loss)

    def predict(self, inputs: List[float], threshold: float = 0.5) -> int:
        pred = self._forward(inputs)
        return (pred >= 0.5).astype(int)
    
    def accuracy(self, inputs: List[float], target: List[float]) -> float:
        y_pred = self.predict(inputs)
        return np.mean(y_pred.flatten() == target.flatten()) * 100


In [18]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

data = load_breast_cancer()
X, y = data.data, data.target

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = LogisticRegression(learning_rate=0.01)
model.train(100, X_train=X_train, y_train=y_train)

train_acc = model.accuracy(X_train, y_train)
test_acc = model.accuracy(X_test, y_test)

print(f"Training Accuracy: {train_acc:.2f}%")
print(f"Testing Accuracy: {test_acc:.2f}%")


Epoch: 1, Loss: 0.6931471605599454
Epoch: 6, Loss: 0.6075750784999625
Epoch: 11, Loss: 0.5443428443387912
Epoch: 16, Loss: 0.49615433302660344
Epoch: 21, Loss: 0.45826890785539437
Epoch: 26, Loss: 0.4276739657080053
Epoch: 31, Loss: 0.40240845018092547
Epoch: 36, Loss: 0.38115232832903995
Epoch: 41, Loss: 0.36298783213646707
Epoch: 46, Loss: 0.3472582036482186
Epoch: 51, Loss: 0.33348147736396916
Epoch: 56, Loss: 0.32129610793279756
Epoch: 61, Loss: 0.3104256172309446
Epoch: 66, Loss: 0.30065496522191715
Epoch: 71, Loss: 0.2918143629032965
Epoch: 76, Loss: 0.2837679370163419
Epoch: 81, Loss: 0.27640563467657164
Epoch: 86, Loss: 0.2696373388498819
Epoch: 91, Loss: 0.26338852217167935
Epoch: 96, Loss: 0.257596990239278
Training Accuracy: 95.60%
Testing Accuracy: 97.37%
