Question 8: Logistic Regression with L1 Regularization<br>
Task: Implement logistic regression with L1 regularization and observe the effect on feature selection.

In [2]:
# Implement logistic regression with L1 regularization and observe the effect on feature selection

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Create a synthetic dataset with many features, only a few are informative
np.random.seed(42)
n_samples = 100
n_features = 12

# Only first 3 features are informative
X = np.random.randn(n_samples, n_features)
true_coefs = np.array([2, -3, 1.5] + [0]*(n_features-3))
y = (X @ true_coefs + np.random.randn(n_samples) * 0.5 > 0).astype(int)

df = pd.DataFrame(X, columns=[f'X{i+1}' for i in range(n_features)])

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=42)

# Fit logistic regression with L1 regularization (Lasso)
logreg_l1 = LogisticRegression(penalty='l1', solver='liblinear', C=1.0, random_state=42)
logreg_l1.fit(X_train, y_train)

# Show coefficients
coefs = logreg_l1.coef_[0]
for name, coef in zip(df.columns, coefs):
    print(f"{name}: {coef:.3f}")

# Count number of selected (non-zero) features
num_selected = np.sum(coefs != 0)
print(f"\nNumber of features selected (non-zero coefficients): {num_selected} out of {n_features}")

X1: 2.136
X2: -3.712
X3: 1.615
X4: 0.165
X5: 0.000
X6: -0.144
X7: 0.000
X8: 0.000
X9: 0.087
X10: 0.092
X11: -0.377
X12: 0.000

Number of features selected (non-zero coefficients): 8 out of 12
