# Regression Methods Comparison

Comprehensive comparison of various regression techniques on different datasets.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import (
    LinearRegression, Lasso, Ridge, ElasticNet, 
    BayesianRidge, HuberRegressor
)
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

In [None]:
def compare_regressors(X, y, test_size=0.3):
    """Compare multiple regression algorithms"""
    
    # Define regressors
    regressors = {
        'Linear': LinearRegression(),
        'Lasso': Lasso(alpha=0.1),
        'Ridge': Ridge(alpha=1.0),
        'ElasticNet': ElasticNet(alpha=0.1, l1_ratio=0.5),
        'Bayesian Ridge': BayesianRidge(),
        'Huber': HuberRegressor(),
        'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
        'SVR': SVR(kernel='rbf')
    }
    
    # Standardize features for linear models
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    results = {}
    
    for name, regressor in regressors.items():
        # Use scaled data for linear models, original for tree-based
        X_data = X_scaled if name != 'Random Forest' else X
        
        # Cross-validation scores
        cv_scores = cross_val_score(
            regressor, X_data, y, cv=5, 
            scoring='neg_mean_squared_error'
        )
        
        results[name] = {
            'CV_MSE': -cv_scores.mean(),
            'CV_MSE_std': cv_scores.std()
        }
    
    return pd.DataFrame(results).T.sort_values('CV_MSE')

# Test on synthetic dataset
np.random.seed(202)
n_samples, n_features = 200, 15
X_test = np.random.randn(n_samples, n_features)
coef_test = np.random.randn(n_features) * 0.5
coef_test[10:] = 0  # Make some coefficients zero
y_test = X_test @ coef_test + 0.1 * np.random.randn(n_samples)

comparison_results = compare_regressors(X_test, y_test)
print("Regression Comparison Results:")
print(comparison_results.round(4))