In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification
import functions as f

In [None]:
class LogisticRegressionGD:
    def __init__(self, learning_rate=0.01, n_iter=1000, lambda_reg=0.0):
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.lambda_reg = lambda_reg
        self.beta = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def cost_function(self, X, y):
        N = len(y)
        p = self.sigmoid(X @ self.beta)
        cost = (-1 / N) * (y.T @ np.log(p) + (1 - y).T @ np.log(1 - p))
        reg_term = (self.lambda_reg / (2 * N)) * np.sum(self.beta[1:] ** 2)
        return cost + reg_term

    def fit(self, X, y):
        N, m = X.shape
        self.beta = np.zeros(m)
        cost_history = []

        for _ in range(self.n_iter):
            p = self.sigmoid(X @ self.beta)
            gradient = (1 / N) * X.T @ (p - y)
            # Apply regularization (exclude bias term)
            gradient[1:] += (self.lambda_reg / N) * self.beta[1:]
            self.beta -= self.learning_rate * gradient
            cost = self.cost_function(X, y)
            cost_history.append(cost)

        return cost_history

    def predict_prob(self, X):
        return self.sigmoid(X @ self.beta)

    def predict(self, X, threshold=0.5):
        return (self.predict_prob(X) >= threshold).astype(int)


In [None]:
# Sample data (replace with your dataset)
# X: Features, y: Labels
# For demonstration, let's create a synthetic dataset

X, y = make_classification(n_samples=1000, n_features=10, random_state=42)
# Add intercept term
X = np.hstack((np.ones((X.shape[0], 1)), X))

# Split the dataset
X_train, X_test, y_train, y_test = f.scale_train_test(X, y, test_size=0.2)

learning_rates = [0.001, 0.01, 0.1, 1]
lambda_reg = 0.1
n_iter = 1000

for lr in learning_rates:
    model = LogisticRegressionGD(learning_rate=lr, n_iter=n_iter, lambda_reg=lambda_reg)
    cost_history = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Learning Rate: {lr}, Accuracy: {accuracy:.4f}")

    plt.plot(cost_history, label=f'LR={lr}')

plt.xlabel('Iteration')
plt.ylabel('Cost')
plt.title('Cost Function Convergence for Different Learning Rates')
plt.legend()
plt.show()


In [None]:
lambda_values = [0.0, 0.01, 0.1, 1.0]
learning_rate = 0.01

for lam in lambda_values:
    model = LogisticRegressionGD(learning_rate=learning_rate, n_iter=n_iter, lambda_reg=lam)
    cost_history = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Lambda: {lam}, Accuracy: {accuracy:.4f}")

    plt.plot(cost_history, label=f'Lambda={lam}')

plt.xlabel('Iteration')
plt.ylabel('Cost')
plt.title('Cost Function Convergence for Different Regularization Parameters')
plt.legend()
plt.show()
