# Logistic Regression

## 1. Initialize the parameters
init weight and biases, since we have 22 features, so we are going to generate 22 random features and a single bias.

In [1]:
import numpy as np

NUM_OF_FEATURES = 22
weights = np.random.random(NUM_OF_FEATURES)
bias = 0  # I randomly init as zero
learning_rate = 0.01
epochs = 10 ** 7

## 2. Define the Prediction Function
$$
\Huge P(x) = \frac{1}{1+e^{-(x \cdot weights + bias)}}
$$
x is the input of 22 features, weight is the matrix of weight for 22 features, P(x) is the Prediction Function

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


# prediction function
def p(x, p_weights, p_bias):
    return sigmoid(np.dot(x, p_weights) + p_bias)

## 3. Define the Loss Function (Cross-Entropy)
$$
\Huge L = -\sum_{k=1}^{K} \left( y_k \ln(p_k) + (1 - y_k) \ln(1 - p_k) \right)
$$
y means the true label, and p means the prediction

In [3]:
def compute_loss(y, y_pred):
    return - np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

## 4. Gradient Descent


In [4]:
def gradient_descent(x, y, gd_weights, gd_bias, gd_learning_rate, gd_epochs):
    n = len(y)
    for eee in range(gd_epochs):
        y_pred = p(x, gd_weights, gd_bias)

        dw = (1 / n) * np.dot(x.T, (y_pred - y))
        db = (1 / n) * np.sum(y_pred - y)

        gd_weights -= gd_learning_rate * dw
        gd_bias -= gd_learning_rate * db

        if eee % 100 == 0:
            loss = compute_loss(y, y_pred)
            print(f"Epoch {eee}/{gd_epochs}, Loss: {loss}")

    return gd_weights, gd_bias

## 5. Model training


In [None]:
weights, bias = gradient_descent(x, y, weights, bias, learning_rate, epochs)