# Robust Regression Techniques

Exploring robust regression methods that are less sensitive to outliers.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import (
    LinearRegression, HuberRegressor, 
    TheilSenRegressor, RANSACRegressor
)
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
# Generate dataset with outliers
np.random.seed(303)
n_samples = 100
X = np.random.randn(n_samples, 1)
y = 3 + 2 * X.ravel() + 0.1 * np.random.randn(n_samples)

# Add outliers
outlier_indices = np.random.choice(n_samples, size=10, replace=False)
y[outlier_indices] += np.random.choice([-1, 1], size=10) * np.random.uniform(2, 4, size=10)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Robust regressors
robust_regressors = {
    'OLS': LinearRegression(),
    'Huber': HuberRegressor(epsilon=1.35),
    'Theil-Sen': TheilSenRegressor(random_state=42),
    'RANSAC': RANSACRegressor(random_state=42)
}

results = {}
predictions = {}

for name, regressor in robust_regressors.items():
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    
    results[name] = mse
    predictions[name] = y_pred
    
    print(f"{name} MSE: {mse:.4f}")

print(f"\nBest robust method: {min(results, key=results.get)}")