In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import train_test_split

# Sample data generation for demonstration
np.random.seed(0)
X = np.random.rand(100, 1) * 10
y = 3 * X.squeeze() + np.random.randn(100) * 2

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Q1: R-squared in Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)

r2_summary = f"""
R-squared (R²) measures the proportion of variance in the dependent variable that is predictable from the independent variables.
- Calculated as: 1 - (Sum of Squared Errors / Total Sum of Squares)
- Represents the proportion of variance explained by the model.
- R² ranges from 0 to 1. A value closer to 1 indicates a better fit.
"""

# Q2: Adjusted R-squared
def adjusted_r2(r2, n, k):
    """Calculate adjusted R-squared"""
    return 1 - (1 - r2) * (n - 1) / (n - k - 1)

n = len(y_test)
k = X.shape[1]  # Number of predictors
adj_r2 = adjusted_r2(r2, n, k)

adj_r2_summary = f"""
Adjusted R-squared adjusts the R-squared value for the number of predictors in the model.
- It accounts for the number of predictors (k) and sample size (n).
- More appropriate than R² when comparing models with different numbers of predictors.
"""

# Q3: When to use Adjusted R-squared
adjusted_r2_usage = """
Adjusted R-squared is more appropriate when:
- Comparing models with different numbers of predictors.
- Evaluating model performance in the presence of multiple predictors.
- Helps to avoid overfitting by penalizing the addition of non-significant predictors.
"""

# Q4: RMSE, MSE, and MAE in Regression Analysis
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)

metrics_summary = f"""
- RMSE (Root Mean Squared Error): √MSE. Represents the standard deviation of the residuals. Sensitive to large errors.
- MSE (Mean Squared Error): Average of the squared differences between predicted and actual values. Penalizes larger errors more than MAE.
- MAE (Mean Absolute Error): Average of absolute differences between predicted and actual values. Provides a linear score without amplifying the effect of outliers.
"""

# Q5: Advantages and Disadvantages of RMSE, MSE, and MAE
metrics_adv_dis = """
Advantages:
- RMSE: Useful for assessing the variance of residuals. Penalizes large errors more.
- MSE: Simple to compute and interpret. Suitable for cases where large errors are particularly undesirable.
- MAE: Easy to understand and less sensitive to outliers compared to RMSE and MSE.

Disadvantages:
- RMSE: Sensitive to outliers; may give a skewed perspective if outliers are present.
- MSE: Squaring errors makes it sensitive to large deviations, which might not always be desirable.
- MAE: Less sensitive to large errors, which might be a disadvantage if you want to penalize outliers more heavily.
"""

# Q6: Lasso vs. Ridge Regularization
lasso_model = Lasso(alpha=0.5)
ridge_model = Ridge(alpha=0.1)
lasso_model.fit(X_train, y_train)
ridge_model.fit(X_train, y_train)

lasso_summary = """
Lasso Regularization (L1): Adds absolute value of coefficients to the loss function, which can lead to sparsity (some coefficients become zero). Useful for feature selection.
Ridge Regularization (L2): Adds squared value of coefficients to the loss function, which shrinks coefficients but doesn’t force them to zero. Useful for regularizing the model without eliminating features.
"""

# Q7: How Regularized Linear Models Help to Prevent Overfitting
regularization_summary = """
Regularized linear models help prevent overfitting by adding a penalty to the magnitude of coefficients, which discourages overly complex models. 
For example, Ridge regression can reduce the impact of less significant predictors, and Lasso can perform feature selection by setting some coefficients to zero.
"""

# Q8: Limitations of Regularized Linear Models
regularization_limitations = """
Limitations:
- Regularization might not work well for models where interaction terms or polynomial terms are important.
- Over-regularization can lead to underfitting and loss of important information.
- Choice of regularization type (L1 vs. L2) depends on the problem; no one-size-fits-all solution.
"""

# Q9: Comparing RMSE and MAE
comparison_rmse_mae = f"""
Model A (RMSE = 10) vs. Model B (MAE = 8):
- RMSE gives more weight to large errors and is sensitive to outliers.
- MAE provides a more balanced measure of error magnitude.
- The choice depends on whether large errors are more critical or if a more balanced error measure is desired.
"""

# Q10: Comparing Ridge and Lasso Regularization
ridge_lasso_comparison = f"""
Model A (Ridge, alpha=0.1) vs. Model B (Lasso, alpha=0.5):
- Ridge regularization shrinks coefficients but keeps all features, good for handling multicollinearity.
- Lasso regularization can set some coefficients to zero, performing feature selection.
- The choice depends on whether you need feature selection (Lasso) or just regularization (Ridge).
"""

# Display results
print("Q1: R-squared")
print(r2_summary)
print(f"R-squared value: {r2:.3f}")

print("\nQ2: Adjusted R-squared")
print(adj_r2_summary)
print(f"Adjusted R-squared value: {adj_r2:.3f}")

print("\nQ3: When to Use Adjusted R-squared")
print(adjusted_r2_usage)

print("\nQ4: RMSE, MSE, and MAE")
print(metrics_summary)
print(f"RMSE: {rmse:.3f}, MSE: {mse:.3f}, MAE: {mae:.3f}")

print("\nQ5: Advantages and Disadvantages of RMSE, MSE, and MAE")
print(metrics_adv_dis)

print("\nQ6: Lasso vs. Ridge Regularization")
print(lasso_summary)

print("\nQ7: How Regularized Linear Models Help to Prevent Overfitting")
print(regularization_summary)

print("\nQ8: Limitations of Regularized Linear Models")
print(regularization_limitations)

print("\nQ9: Comparing RMSE and MAE")
print(comparison_rmse_mae)

print("\nQ10: Comparing Ridge and Lasso Regularization")
print(ridge_lasso_comparison)
