# Работа с load_diabetes()

In [104]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score

# Загрузка и разделение данных
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target, test_size=0.2, random_state=42)


### Модель KNeighborsRegressor

In [105]:
# Настройка параметров для KNeighborsRegressor
param_grid_knn = {'n_neighbors': range(1, 11)}
grid_search_knn = GridSearchCV(KNeighborsRegressor(), param_grid_knn, scoring='neg_mean_squared_error', cv=5)
grid_search_knn.fit(X_train, y_train)

# Лучшая модель и её параметры
best_knn = grid_search_knn.best_estimator_
y_pred_knn = best_knn.predict(X_test)
mse_knn = mean_squared_error(y_test, y_pred_knn)
r2_knn = r2_score(y_test, y_pred_knn)

print("KNeighborsRegressor:")
print(f"Best Params: {grid_search_knn.best_params_}")
print(f"MSE: {mse_knn:.2f}, R2 Score: {r2_knn:.2f}")


KNeighborsRegressor:
Best Params: {'n_neighbors': 9}
MSE: 3082.94, R2 Score: 0.42


### Модель LinearRegression

In [106]:
# Обучение модели LinearRegression
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)

# Оценка модели
y_pred_lr = linear_reg.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print("LinearRegression:")
print(f"MSE: {mse_lr:.2f}, R2 Score: {r2_lr:.2f}")


LinearRegression:
MSE: 2900.19, R2 Score: 0.45


### Модель Ridge

In [107]:
# Настройка параметров для Ridge
param_grid_ridge = {'alpha': np.logspace(-3, 3, 7)}
grid_search_ridge = GridSearchCV(Ridge(), param_grid_ridge, scoring='neg_mean_squared_error', cv=5)
grid_search_ridge.fit(X_train, y_train)

# Лучшая модель и её параметры
best_ridge = grid_search_ridge.best_estimator_
y_pred_ridge = best_ridge.predict(X_test)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print("Ridge:")
print(f"Best Params: {grid_search_ridge.best_params_}")
print(f"MSE: {mse_ridge:.2f}, R2 Score: {r2_ridge:.2f}")


Ridge:
Best Params: {'alpha': np.float64(0.1)}
MSE: 2856.49, R2 Score: 0.46


### Модель Lasso

In [108]:
# Настройка параметров для Lasso
param_grid_lasso = {'alpha': np.logspace(-3, 3, 7)}
grid_search_lasso = GridSearchCV(Lasso(), param_grid_lasso, scoring='neg_mean_squared_error', cv=5)
grid_search_lasso.fit(X_train, y_train)

# Лучшая модель и её параметры
best_lasso = grid_search_lasso.best_estimator_
y_pred_lasso = best_lasso.predict(X_test)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

print("Lasso:")
print(f"Best Params: {grid_search_lasso.best_params_}")
print(f"MSE: {mse_lasso:.2f}, R2 Score: {r2_lasso:.2f}")


Lasso:
Best Params: {'alpha': np.float64(0.1)}
MSE: 2798.19, R2 Score: 0.47


# Работа с load_linnerud()

In [109]:
import numpy as np
from sklearn.datasets import load_linnerud
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score

# Загрузка данных
linnerud = load_linnerud()
X = linnerud.data
y = linnerud.target[:, 0]  # Выбираем первую целевую переменную (Weight)

# Разделение на тренировочную и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


### Модель KNeighborsRegressor

In [110]:
# Настройка параметров для KNeighborsRegressor
param_grid_knn = {'n_neighbors': range(1, 11)}
grid_search_knn = GridSearchCV(KNeighborsRegressor(), param_grid_knn, scoring='neg_mean_squared_error', cv=5)
grid_search_knn.fit(X_train, y_train)

# Лучшая модель и её параметры
best_knn = grid_search_knn.best_estimator_
y_pred_knn = best_knn.predict(X_test)
mse_knn = mean_squared_error(y_test, y_pred_knn)
r2_knn = r2_score(y_test, y_pred_knn)

print("KNeighborsRegressor:")
print(f"Best Params: {grid_search_knn.best_params_}")
print(f"MSE: {mse_knn:.2f}, R2 Score: {r2_knn:.2f}")


KNeighborsRegressor:
Best Params: {'n_neighbors': 7}
MSE: 435.56, R2 Score: -0.55


### Модель LinearRegression

In [111]:
# Обучение модели LinearRegression
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)

# Оценка модели
y_pred_lr = linear_reg.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print("LinearRegression:")
print(f"MSE: {mse_lr:.2f}, R2 Score: {r2_lr:.2f}")


LinearRegression:
MSE: 630.78, R2 Score: -1.24


### Модель Ridge

In [112]:
# Настройка параметров для Ridge
param_grid_ridge = {'alpha': np.logspace(-3, 3, 7)}
grid_search_ridge = GridSearchCV(Ridge(), param_grid_ridge, scoring='neg_mean_squared_error', cv=5)
grid_search_ridge.fit(X_train, y_train)

# Лучшая модель и её параметры
best_ridge = grid_search_ridge.best_estimator_
y_pred_ridge = best_ridge.predict(X_test)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print("Ridge:")
print(f"Best Params: {grid_search_ridge.best_params_}")
print(f"MSE: {mse_ridge:.2f}, R2 Score: {r2_ridge:.2f}")


Ridge:
Best Params: {'alpha': np.float64(1000.0)}
MSE: 553.12, R2 Score: -0.97


### Модель Lasso

In [113]:
# Настройка параметров для Lasso
param_grid_lasso = {'alpha': np.logspace(-3, 3, 7)}
grid_search_lasso = GridSearchCV(Lasso(), param_grid_lasso, scoring='neg_mean_squared_error', cv=5)
grid_search_lasso.fit(X_train, y_train)

# Лучшая модель и её параметры
best_lasso = grid_search_lasso.best_estimator_
y_pred_lasso = best_lasso.predict(X_test)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

print("Lasso:")
print(f"Best Params: {grid_search_lasso.best_params_}")
print(f"MSE: {mse_lasso:.2f}, R2 Score: {r2_lasso:.2f}")


Lasso:
Best Params: {'alpha': np.float64(100.0)}
MSE: 501.32, R2 Score: -0.78


# Вывод
KNeighborsRegressor показал хорошие результаты при подборе числа соседей, особенно на простом датасете load_linnerud. Однако его эффективность снижается на более сложных данных, как load_diabetes.

LinearRegression обеспечивает базовое качество предсказаний, но ограничен в задачах с нелинейной зависимостью.

Ridge и Lasso справились лучше на сложном датасете load_diabetes благодаря регуляризации, что снижает переобучение и помогает при мультиколлинеарности.

Рекомендации:

Для сложных данных лучше подходят регуляризованные модели (Ridge, Lasso).
Для простых, линейных зависимостей хорошо работает LinearRegression и KNeighborsRegressor при настройке параметров.