Title: Regularization Techniques

Task 1: Ridge Regression on House Prices<br>
Apply Ridge regularization to the Linear Regression model.

In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create pipeline with scaling + Ridge Regression
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('ridge', Ridge(alpha=1.0))  # alpha is the regularization strength
])

# Train model
pipeline.fit(X_train, y_train)

# Predict
y_pred = pipeline.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.4f}")
print(f"R^2 Score: {r2:.4f}")


Mean Squared Error: 0.5305
R^2 Score: 0.5958


Task 2: Lasso Regression for Feature Selection<br>
Use Lasso to select important features in the housing data.

In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load dataset
data = fetch_california_housing()
X, y = data.data, data.target
feature_names = data.feature_names

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create pipeline: scaling + Lasso Regression
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('lasso', Lasso(alpha=0.1))  # alpha controls regularization strength
])

# Train model
pipeline.fit(X_train, y_train)

# Predict
y_pred = pipeline.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.4f}")
print(f"R^2 Score: {r2:.4f}")

# Extract coefficients from Lasso
lasso_coef = pipeline.named_steps['lasso'].coef_

# Show important features (non-zero coefficients)
print("\nFeature selection using Lasso Regression:")
for name, coef in zip(feature_names, lasso_coef):
    print(f"{name}: {coef:.4f}")

print("\nSelected Features:")
selected_features = [name for name, coef in zip(feature_names, lasso_coef) if coef != 0]
print(selected_features)


Mean Squared Error: 0.6648
R^2 Score: 0.4935

Feature selection using Lasso Regression:
MedInc: 0.7083
HouseAge: 0.1066
AveRooms: -0.0000
AveBedrms: 0.0000
Population: -0.0000
AveOccup: -0.0000
Latitude: -0.0104
Longitude: -0.0000

Selected Features:
['MedInc', 'HouseAge', 'Latitude']


Task 3: ElasticNet for Customer Churn<br>
Implement ElasticNet to balance between Ridge and Lasso.

In [3]:
import numpy as np
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score, classification_report, accuracy_score

# Simulate dataset (replace with your actual customer churn data)
np.random.seed(42)
X = np.random.rand(1000, 10)  # 1000 samples, 10 features
y = np.random.choice([0, 1], size=1000, p=[0.7, 0.3])  # Binary churn (0 = no churn, 1 = churn)

# ElasticNet is a regression model; for classification, use LogisticRegression with elastic net penalty.
# But if you want to predict churn probability/regression values, ElasticNet can be used.
# For classification, we can do ElasticNet with LogisticRegression (with l1_ratio).

# Here I’ll show ElasticNet for regression assuming continuous churn score (for demo).
# For binary classification, consider LogisticRegression with 'elasticnet' penalty.

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create pipeline: scaling + ElasticNet regression
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('elasticnet', ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=42))
])

# Train model
pipeline.fit(X_train, y_train)

# Predict
y_pred = pipeline.predict(X_test)

# Evaluate regression metrics (for demo purposes)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.4f}")
print(f"R^2 Score: {r2:.4f}")

# If classification is your goal, consider LogisticRegression with elasticnet penalty:
# from sklearn.linear_model import LogisticRegression
# clf = LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.5, max_iter=1000)


Mean Squared Error: 0.2193
R^2 Score: -0.0022
