# Ridge Regression Analysis

Ridge regression uses L2 regularization to prevent overfitting.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [None]:
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Generate dataset for Ridge regression
",
    "np.random.seed(123)
",
    "n_samples, n_features = 150, 15
",
    "X = np.random.randn(n_samples, n_features)
",
    "true_coef = np.random.randn(n_features) * 0.5
",
    "y = X @ true_coef + 0.2 * np.random.randn(n_samples)
",
    "
",
    "# Standardize features
",
    "scaler = StandardScaler()
",
    "X_scaled = scaler.fit_transform(X)
",
    "
",
    "X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Cross-validation for alpha selection
",
    "alphas = np.logspace(-4, 2, 20)
",
    "ridge_cv = Ridge()
",
    "
",
    "scores = []
",
    "for alpha in alphas:
",
    "    ridge_cv.set_params(alpha=alpha)
",
    "    cv_scores = cross_val_score(ridge_cv, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
",
    "    scores.append(-cv_scores.mean())
",
    "
",
    "best_alpha = alphas[np.argmin(scores)]
",
    "print(f"Best alpha: {best_alpha:.4f}")
",
    "
",
    "# Train final model
",
    "ridge_final = Ridge(alpha=best_alpha)
",
    "ridge_final.fit(X_train, y_train)
",
    "y_pred = ridge_final.predict(X_test)
",
    "
",
    "print(f"Ridge MSE: {mean_squared_error(y_test, y_pred):.4f}")
",
    "print(f"Ridge R²: {r2_score(y_test, y_pred):.4f}")"
   ]
  }