In [106]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

In [107]:
np.random.seed(0) #set random
# Create DataFrame
data = pd.DataFrame()
data['Feature_1'] = np.random.random(40) * 2 + 1
data['Feature_2'] = np.random.random(40) * 3 + 1
data['y'] = np.random.choice([0, 1], size=40)
data.head(2)

Unnamed: 0,Feature_1,Feature_2,y
0,2.097627,2.078524,0
1,2.430379,2.311096,0


In [108]:
X = data[['Feature_1','Feature_2']]
y = data['y']

Odds fomula:

$
\text{Odds} = \frac{p}{1 - p}
$

    
Log Odds fomula:   
$
 \text{Log Odds} = \log \left( \frac{p}{1 - p} \right)
$

Linear Model fomula:   
$
 \text{Linear model} = β_0 + β_ix_i
$



**Log Odds fomula with Linear Model fomula (let z = linear model)**

Sigmoid Function:    
$
 \sigma(z) = \frac{1}{1 + e^{-z}}
$

In [94]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


Sigmoid Function with Maximum Likelihood Estimater


$
\mathcal{L}(\theta) = \sum_{i=1}^{m} \left[ y_i \log(\sigma_θ(z)) + (1 - y_i) \log(1 - \sigma_θ(z)) \right]
$

Graident Descent with Gobal Maximum

$
\frac{\partial \mathcal{L}}{\partial \theta_0} = \sum_{i=1}^{m} \left( y_i - \sigma_θ(z) \right)
$



$
\frac{\partial \mathcal{L}}{\partial \theta_xi} = \sum_{i=1}^{m} \left( y_i - \sigma_θ(z) \right) x_{ij}
$





In [110]:
def calculate_gradient_log_likelihood(theta_0, theta_xi, X, y):
    linear_model = theta_0 + np.dot(X, theta_xi)
    predictions = sigmoid(linear_model)
    errors = y - predictions
    intercept_grad = np.sum(errors)
    coeff_grad = np.dot(X.T, errors)
    return intercept_grad, coeff_grad

Loss Logistics Regression is Negative Log Likelihood     
$
\mathcal{NL}(\theta) = -\sum_{i=1}^{m} \left[ y_i \log(\sigma_θ(z)) + (1 - y_i) \log(1 - \sigma_θ(z)) \right]
$

In [111]:
def log_loss(theta_0, theta_xi, X, y):
    linear_model = theta_0 + np.dot(X, theta_xi)
    predictions = sigmoid(linear_model)
    loss = -np.sum(y * np.log(predictions + 1e-15) + (1 - y) * np.log(1 - predictions + 1e-15))
    return loss

Gradient Descent update parameters   
$
\theta_n += \theta_c + \alpha\frac{\partial \mathcal{L}}{\partial \theta}
$

In [124]:
# Initialize parameters
theta_0 = np.zeros(1)  # Bata_0
theta_xi = np.zeros(X.shape[1])  # Bata_1
print(theta_0) #intercept
print(theta_xi) #coeff
learning_rate = 0.0001
num_iterations = 50000

[0.]
[0. 0.]


In [125]:
for i in range(num_iterations):
    intercept_grad, coeff_grad = calculate_gradient_log_likelihood(theta_0, theta_xi, X, y)
    theta_0 += learning_rate * intercept_grad
    theta_xi += learning_rate * coeff_grad
    if i % 20000 == 0 or i == num_iterations - 1:
        loss = log_loss(theta_0, theta_xi, X, y)
        print(f"Iteration {i}: Loss = {loss}")
print("Theta_0 (Intercept):", theta_0)
print("Theta_xi (Coefficients):", theta_xi)

Iteration 0: Loss = 27.694617822402407
Iteration 20000: Loss = 23.571151519430856
Iteration 40000: Loss = 23.484462424021274
Iteration 49999: Loss = 23.462631524674666
Theta_0 (Intercept): [-1.3003134]
Theta_xi (Coefficients): [-0.45574896  0.72949955]


Logistics Regression in Sklearn

In [113]:
# Logistic Regression using sklearn
log_reg = LogisticRegression(fit_intercept=True, solver='lbfgs', max_iter=10000)
log_reg.fit(X, y)
print("Intercept:", log_reg.intercept_)
print("Coefficients:", log_reg.coef_)


Intercept: [-1.71023705]
Coefficients: [[-0.2415067   0.70582492]]


Code Fast

In [136]:
import numpy as np
import pandas as pd

np.random.seed(0) #set random
# Create DataFrame
data = pd.DataFrame()
data['Feature_1'] = np.random.random(40) * 2 + 1
data['Feature_2'] = np.random.random(40) * 3 + 1
data['y'] = np.random.choice([0, 1], size=40)
data.head(2)

X = data[['Feature_1', 'Feature_2']].values
y = data['y'].values

X = np.hstack((np.ones((X.shape[0], 1)), X))  # x0 = 1 Because intercept * x0

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def calculate_gradient_log_likelihood(theta, X, y):
    linear_model = X @ theta
    predictions = sigmoid(linear_model)
    errors = y - predictions
    gradient = X.T @ errors / len(y)
    return gradient

def log_loss(theta, X, y):
    linear_model = X @ theta
    predictions = sigmoid(linear_model)
    loss = -np.mean(y * np.log(predictions + 1e-15) + (1 - y) * np.log(1 - predictions + 1e-15))
    return loss

# Initialize parameters
theta = np.zeros(X.shape[1])
print("Initial theta:", theta)

learning_rate = 0.00001  # Increased learning rate
num_iterations = 100000  # Reduced iterations

for i in range(num_iterations):
    gradient = calculate_gradient_log_likelihood(theta, X, y)
    theta += learning_rate * gradient
    if i %  10000 == 0 or i == num_iterations - 1:
        loss = log_loss(theta, X, y)
        print(f"Iteration {i}: Loss = {loss}")

print("Final theta:", theta)


Initial theta: [0. 0. 0.]
Iteration 0: Loss = 0.6929510266799117
Iteration 10000: Loss = 0.5975157784497125
Iteration 20000: Loss = 0.5930873800069328
Iteration 30000: Loss = 0.5920554569244227
Iteration 40000: Loss = 0.5913517015324287
Iteration 49999: Loss = 0.5907412745326868
Final theta: [-0.57001399 -0.67131029  0.61715299]
