In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from scipy.special import expit  # Sigmoid function
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
X, y = make_classification(
    n_samples=1000,
    n_features=20,
    n_informative=5,
    n_redundant=5,
    n_classes=2,
    random_state=42
)

dataset = pd.DataFrame(X, columns=[f"Feature_{i}" for i in range(1, 21)])
dataset['Target'] = y

In [None]:
sns.pairplot(dataset, hue="Target")
plt.show()

In [4]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01, max_iter=1000, regularization=None, lambda_reg=0.5):
        self.learning_rate = learning_rate
        self.regularization = regularization
        self.lambda_reg = lambda_reg
        self.max_iter = max_iter
        self.coef_ = None

    def fit(self, X, y):
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        _, n_features = X.shape

        self.coef_ = np.zeros(n_features)

        for _ in range(self.max_iter):
            predictions = expit(X @ self.coef_)
            gradient = X.T @ (y - predictions)

            if self.regularization == 'L1':
                gradient -= self.lambda_reg * np.sign(self.coef_)
            elif self.regularization == 'L2':
                gradient -= 2 * self.lambda_reg * self.coef_

            self.coef_ += self.learning_rate * gradient

    def predict_proba(self, X):
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        return expit(X @ self.coef_)

    def predict(self, X):
        return (self.predict_proba(X) >= 0.5).astype(int)

In [None]:
X = dataset.drop(columns='Target')
y = dataset['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)

print(f"Accuracy:", accuracy_score(y_test, y_pred))


In [None]:
# L2 Regularization
log_reg_l2 = LogisticRegression(regularization='L2')
log_reg_l2.fit(X_train, y_train)
y_pred_l2 = log_reg_l2.predict(X_test)

# L1 Regularization
log_reg_l1 = LogisticRegression(regularization='L1')
log_reg_l1.fit(X_train, y_train)
y_pred_l1 = log_reg_l1.predict(X_test)

print(f"Accuracy with L2 Regularization:", accuracy_score(y_test, y_pred_l2))
print(f"Accuracy with L1 Regularization:", accuracy_score(y_test, y_pred_l1))

In [None]:
# Coefficients from L1 Regularized model
l1_coeffs = log_reg_l1.coef_
selected_features = [i for i, coeff in enumerate(l1_coeffs) if abs(coeff) < 1]

print(f"Selected features by L1 regularization: {selected_features}")

In [8]:
X_selected = X.iloc[:, selected_features[:2]]
log_reg_l1_selected = LogisticRegression(regularization="L1")
log_reg_l1_selected.fit(X_selected, y)

In [None]:
# Generate a grid of points to classify
xx, yy = np.meshgrid(np.linspace(X_selected.iloc[:, 0].min(), X_selected.iloc[:, 0].max(), 100),
                     np.linspace(X_selected.iloc[:, 1].min(), X_selected.iloc[:, 1].max(), 100))

Z = log_reg_l1_selected.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.8)
plt.scatter(X_selected.iloc[:, 0], X_selected.iloc[:, 1], c=y, edgecolors='k', marker='o', cmap=plt.cm.coolwarm)
plt.title("Decision Boundary with L1 Regularization (selected features)")
plt.xlabel(f"Feature {selected_features[0]+1}")
plt.ylabel(f"Feature {selected_features[1]+1}")
plt.show()