# 🧩 Практикум: Понимание L2-регуляризации (Ridge Regression)
Этот ноутбук поможет тебе глубоко понять, как работает L2-регуляризация в линейной регрессии. Используется датасет [Boston Housing](https://raw.githubusercontent.com/SENATOROVAI/ridge-regression/refs/heads/main/boston.csv).

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.metrics import mean_squared_error

url = "https://raw.githubusercontent.com/SENATOROVAI/ridge-regression/refs/heads/main/boston.csv"
df = pd.read_csv(url)
df.head()


## Задание 1: Базовая Ridge-регрессия

In [None]:

X = df.drop(columns=['MEDV'])
y = df['MEDV']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

alphas = [0, 0.01, 0.1, 1, 10, 100]
results = []

for a in alphas:
    model = Ridge(alpha=a)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    results.append((a, rmse))

pd.DataFrame(results, columns=['alpha', 'RMSE'])


## Задание 2: График коэффициентов в зависимости от λ

In [None]:

coefs = []
alphas = np.logspace(-3, 3, 50)

for a in alphas:
    ridge = Ridge(alpha=a)
    ridge.fit(X_train, y_train)
    coefs.append(ridge.coef_)

plt.figure(figsize=(8,6))
plt.plot(alphas, coefs)
plt.xscale('log')
plt.xlabel('log(alpha)')
plt.ylabel('коэффициенты')
plt.title('Зависимость коэффициентов от λ')
plt.show()


## Задание 3: Стандартизация признаков

In [None]:

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

ridge_no_scaling = Ridge(alpha=1).fit(X_train, y_train)
ridge_scaled = Ridge(alpha=1).fit(X_train_scaled, y_train)

print("Без стандартизации:", mean_squared_error(y_test, ridge_no_scaling.predict(X_test), squared=False))
print("Со стандартизацией:", mean_squared_error(y_test, ridge_scaled.predict(X_test_scaled), squared=False))


## Задание 4: Влияние дисперсии признаков

In [None]:

variances = X.var()
ridge = Ridge(alpha=1).fit(X_train, y_train)
coef_variance = pd.DataFrame({'feature': X.columns, 'variance': variances, 'coef': ridge.coef_})
coef_variance.sort_values('variance', ascending=False)


## Задание 5: Оптимальное λ через кросс-валидацию

In [None]:

params = {'alpha': np.logspace(-3, 3, 50)}
grid = GridSearchCV(Ridge(), params, cv=5, scoring='neg_root_mean_squared_error')
grid.fit(X_train, y_train)
print("Лучшее λ:", grid.best_params_)
print("RMSE:", -grid.best_score_)


## Задание 6: Сравнение OLS и Ridge

In [None]:

ols = LinearRegression().fit(X_train, y_train)
ridge_best = Ridge(alpha=grid.best_params_['alpha']).fit(X_train, y_train)

print("OLS RMSE:", mean_squared_error(y_test, ols.predict(X_test), squared=False))
print("Ridge RMSE:", mean_squared_error(y_test, ridge_best.predict(X_test), squared=False))


## Задание 7: Мультиколлинеарность

In [None]:

corr = X.corr()
plt.figure(figsize=(10,8))
plt.imshow(corr, cmap='coolwarm', vmin=-1, vmax=1)
plt.colorbar()
plt.title('Корреляция признаков')
plt.show()


## Задание 8: Интерпретация коэффициентов при разном λ

In [None]:

for a in [0.1, 1, 10]:
    model = Ridge(alpha=a).fit(X_train, y_train)
    coefs = pd.Series(model.coef_, index=X.columns).sort_values(ascending=False)
    print(f"λ={a}")
    display(coefs.head(3))


## Задание 9: Кривые обучения

In [None]:

train_sizes, train_scores, test_scores = learning_curve(Ridge(alpha=1), X, y, cv=5, scoring='r2',
                                                        train_sizes=np.linspace(0.1, 1, 10))
plt.plot(train_sizes, np.mean(train_scores, axis=1), label='Train')
plt.plot(train_sizes, np.mean(test_scores, axis=1), label='Validation')
plt.xlabel('Размер обучающей выборки')
plt.ylabel('R²')
plt.legend()
plt.title('Кривая обучения Ridge (λ=1)')
plt.show()


## Задание 10: Модификация регуляризации (feature-specific λ)

In [None]:

var = X.var()
lambdas = 1 / (var + 1e-5)
ridge_custom = Ridge(alpha=1)
ridge_custom.fit(X_train * lambdas.values, y_train)
print("Коэффициенты с учётом весов:", ridge_custom.coef_)
