# Main Concepts

- linear regression --> logistic regression: instead of numerical output, want to get a probability

$$\hat{y} = h_{\theta}(x) = \frac{1}{1 + e^{-wx+b}}$$
$$h_{\theta}(x) = P(Y=1|x,\theta)$$

- loss function: cross entropy (given a ground truth probability distribution + an estimation --> measures how close predicted distribution is to true one)
    - binary classification: $J(w,b) = J(\theta) = \frac{1}{N}\sum^n_{i=1} [y^ilog(h_{\theta}(x^i)) + (1-y^i)log(1-h_{\theta}(x^i))]$

# Code

In [1]:
import numpy as np

class LogisticRegression:

    def __init__(self, lr=0.001, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
    
    def fit(self, X, y):
        # init parameters
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            # linear model
            linear_model = np.dot(X, self.weights) + self.bias

            # sigmoid function
            y_predicted = self.sigmoid(linear_model)

            # computer gradients
            dw = (1/n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1/n_samples) * np.sum(y_predicted - y)

            # update weights and bias
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        y_predicted_cls = [1 if y > 0.5 else 0 for y in y_predicted]

        return y_predicted_cls
    
    def sigmoid(self, x):
        return 1/ (1 + np.exp(-x))


# Test

In [3]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

model = LogisticRegression(lr=0.001, n_iters=1000)
model.fit(X_train, y_train)
y_predicted = model.predict(X_test)
print(f"LR classification accuracy: {accuracy(y_test, y_predicted) * 100:.2f}%")

LR classification accuracy: 94.74%


  return 1/ (1 + np.exp(-x))
