Title: Regularization Techniques

Task 1: Ridge Regression on House Prices<br>
Apply Ridge regularization to the Linear Regression model.

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Example synthetic dataset
data = {
    'size_sqft': [1500, 1800, 2400, 3000, 3500, 2000, 2800, 2300, 1700, 2600],
    'bedrooms': [3, 4, 3, 5, 4, 3, 4, 3, 2, 4],
    'price': [400000, 500000, 600000, 650000, 700000, 450000, 620000, 580000, 420000, 610000]
}

df = pd.DataFrame(data)
X = df[['size_sqft', 'bedrooms']]
y = df['price']

# Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Ridge Regression model with alpha (regularization strength)
ridge_model = Ridge(alpha=1.0)  # you can tune alpha

# Train the model
ridge_model.fit(X_train, y_train)

# Predict on test set
y_pred = ridge_model.predict(X_test)

# Evaluate using RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"Test RMSE: {rmse:.2f}")


Task 2: Lasso Regression for Feature Selection<br>
Use Lasso to select important features in the housing data.

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Example synthetic dataset with more features
data = {
    'size_sqft': [1500, 1800, 2400, 3000, 3500, 2000, 2800, 2300, 1700, 2600],
    'bedrooms': [3, 4, 3, 5, 4, 3, 4, 3, 2, 4],
    'age': [10, 15, 20, 5, 8, 12, 18, 7, 11, 9],
    'garage': [1, 2, 2, 3, 2, 1, 3, 2, 1, 2],
    'price': [400000, 500000, 600000, 650000, 700000, 450000, 620000, 580000, 420000, 610000]
}

df = pd.DataFrame(data)
X = df.drop('price', axis=1)
y = df['price']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Lasso regression with alpha (regularization strength)
lasso = Lasso(alpha=1000)  # You can tune alpha (larger alpha means more features shrink to zero)

# Train model
lasso.fit(X_train, y_train)

# Predict on test set
y_pred = lasso.predict(X_test)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Test RMSE: {rmse:.2f}")

# Feature selection - coefficients
coefficients = pd.Series(lasso.coef_, index=X.columns)
print("\nFeature coefficients:")
print(coefficients)

# Important features selected (non-zero coefficients)
important_features = coefficients[coefficients != 0].index.tolist()
print("\nImportant features selected by Lasso:")
print(important_features)


Task 3: ElasticNet for Customer Churn<br>
Implement ElasticNet to balance between Ridge and Lasso.

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Synthetic customer churn dataset with numeric features
data = {
    'monthly_charges': [70, 90, 80, 75, 60, 85, 95, 77, 66, 73, 68, 72, 88, 92, 65],
    'tenure_months': [12, 24, 18, 36, 8, 15, 40, 22, 10, 13, 20, 14, 26, 38, 16],
    'num_complaints': [0, 1, 0, 2, 0, 1, 3, 1, 0, 2, 1, 0, 2, 3, 0],
    'churn_rate': [0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0]  # 0 = no churn, 1 = churn
}

df = pd.DataFrame(data)

X = df.drop('churn_rate', axis=1)
y = df['churn_rate']

# Split dataset into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize ElasticNet model
# l1_ratio = 0 => Ridge only, 1 => Lasso only, between 0 and 1 balances both
elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=42)

# Train the model
elastic_net.fit(X_train, y_train)

# Predict on test set
y_pred = elastic_net.predict(X_test)


rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"Test RMSE: {rmse:.3f}")


coef = pd.Series(elastic_net.coef_, index=X.columns)
print("\nElasticNet feature coefficients:")
print(coef)
