<a href="https://colab.research.google.com/github/AndresNamm/CostFunctions/blob/main/LogisticRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd


# Dataset
data = {
    'Age': [22, 25, 47, 52, 46, 56, 26, 27, 48, 50],
    'Salary': [50000, 60000, 150000, 200000, 90000, 160000, 80000, 58000, 140000, 135000],
    'Purchased': [0, 0, 1, 1, 1, 1, 0, 0, 1, 1]
}

df = pd.DataFrame(data)

df.dtypes

Unnamed: 0,0
Age,int64
Salary,int64
Purchased,int64


[More on sigmoid and general logistic functions](https://github.com/AndresNamm/CostFunctions/blob/main/ClassPredictionFunctions.ipynb)

In [2]:

# Define a numerically stable sigmoid function
def sigmoid(z):
    z = np.clip(z, -500, 500)  # Prevent overflow in exp
    return 1 / (1 + np.exp(-z))


# Parameters
beta_0 = 0.0
beta_1 = 0.01
beta_2 = 0.0001


# Compute predicted probabilities
df['LinearComb'] = float(beta_0) + beta_1 * df['Age'] + beta_2 * df['Salary'].astype(float)
df['PredictedProb'] = sigmoid(df['LinearComb'])

# Compute binary log-likelihood aka cost
df['LogLikelihood'] = df['Purchased'].astype(float) * np.log(df['PredictedProb']) + (1 - df['Purchased'].astype(float)) * np.log(1 - df['PredictedProb'])
log_likelihood = df['LogLikelihood'].sum()

log_likelihood



-25.8099679936351

[Reference How Negative Binary Log Likelihood Cost function behaves with different examples](https://github.com/AndresNamm/CostFunctions/blob/main/BinaryNegativeLikelihoodVisualization.ipynb)

In [3]:
import numpy as np


learning_rate = 0.001
num_iterations = 1000

# Extract columns from the DataFrame as NumPy arrays for faster computations
age = df['Age'].values
salary = df['Salary'].values
purchased = df['Purchased'].values

# Initialize coefficients: beta_0, beta_1, beta_2
beta = np.array([0.01, 0.01, 0.0001])

for i in range(num_iterations):
    # Compute the linear combination: beta_0 + beta_1 * age + beta_2 * salary
    linear_comb = beta[0] + beta[1] * age + beta[2] * salary

    # Compute predicted probabilities using the stable sigmoid function
    predicted_prob = sigmoid(linear_comb)

    # Calculate error (difference between predictions and actual values)
    error = predicted_prob - purchased

    # Compute gradients for each coefficient
    gradient_beta_0 = np.sum(error)
    gradient_beta_1 = np.dot(error, age)
    gradient_beta_2 = np.dot(error, salary)

    # Update coefficients
    beta[0] -= learning_rate * gradient_beta_0
    beta[1] -= learning_rate * gradient_beta_1
    beta[2] -= learning_rate * gradient_beta_2

# For computing the log-likelihood, clip predicted probabilities to avoid log(0)
epsilon = 1e-15
predicted_prob = np.clip(predicted_prob, epsilon, 1 - epsilon)
log_likelihood = np.sum(purchased * np.log(predicted_prob) +
                          (1 - purchased) * np.log(1 - predicted_prob))

print(f"Final log-likelihood: {log_likelihood}")


Final log-likelihood: -138.15830396936352


In [4]:
df

Unnamed: 0,Age,Salary,Purchased,LinearComb,PredictedProb,LogLikelihood
0,22,50000,0,5.22,0.994622,-5.225393
1,25,60000,0,6.25,0.998073,-6.251929
2,47,150000,1,15.47,1.0,-1.911896e-07
3,52,200000,1,20.52,1.0,-1.225398e-09
4,46,90000,1,9.46,0.999922,-7.790356e-05
5,56,160000,1,16.56,1.0,-6.428111e-08
6,26,80000,0,8.26,0.999741,-8.260259
7,27,58000,0,6.07,0.997694,-6.072309
8,48,140000,1,14.48,0.999999,-5.14536e-07
9,50,135000,1,14.0,0.999999,-8.315284e-07
