In [27]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score


In [28]:
data = load_diabetes()
X, y = data.data, data.target

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y

print(df.head())
print(df.describe())

        age       sex       bmi        bp        s1        s2        s3  \
0  0.038076  0.050680  0.061696  0.021872 -0.044223 -0.034821 -0.043401   
1 -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163  0.074412   
2  0.085299  0.050680  0.044451 -0.005670 -0.045599 -0.034194 -0.032356   
3 -0.089063 -0.044642 -0.011595 -0.036656  0.012191  0.024991 -0.036038   
4  0.005383 -0.044642 -0.036385  0.021872  0.003935  0.015596  0.008142   

         s4        s5        s6  target  
0 -0.002592  0.019907 -0.017646   151.0  
1 -0.039493 -0.068332 -0.092204    75.0  
2 -0.002592  0.002861 -0.025930   141.0  
3  0.034309  0.022688 -0.009362   206.0  
4 -0.002592 -0.031988 -0.046641   135.0  
                age           sex           bmi            bp            s1  \
count  4.420000e+02  4.420000e+02  4.420000e+02  4.420000e+02  4.420000e+02   
mean  -2.511817e-19  1.230790e-17 -2.245564e-16 -4.797570e-17 -1.381499e-17   
std    4.761905e-02  4.761905e-02  4.761905e-02  4.761905e-0

Модель KNeighborsRegressor

In [31]:
knn_params = {'n_neighbors': [3, 5, 7, 9]}
knn = GridSearchCV(KNeighborsRegressor(), knn_params, cv=5)
knn.fit(X_train, y_train)

y_pred_knn = knn.predict(X_test)
mse_knn = mean_squared_error(y_test, y_pred_knn)
r2_knn = r2_score(y_test, y_pred_knn)

print("KNeighborsRegressor")
print("Лучшие параметры:", knn.best_params_)
print("MSE:", mse_knn)
print("R2:", r2_knn)

KNeighborsRegressor
Лучшие параметры: {'n_neighbors': 9}
MSE: 3082.937300596477
R2: 0.41811079656983197


In [32]:
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

y_pred_lin = lin_reg.predict(X_test)
mse_lin = mean_squared_error(y_test, y_pred_lin)
r2_lin = r2_score(y_test, y_pred_lin)

print("LinearRegression")
print("MSE:", mse_lin)
print("R2:", r2_lin)

LinearRegression
MSE: 2900.193628493482
R2: 0.4526027629719195


Модель Ridge

In [33]:
ridge_params = {'alpha': [0.1, 1, 10, 100]}
ridge = GridSearchCV(Ridge(), ridge_params, cv=5)
ridge.fit(X_train, y_train)

y_pred_ridge = ridge.predict(X_test)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print("Ridge")
print("Лучшие параметры:", ridge.best_params_)
print("MSE:", mse_ridge)
print("R2:", r2_ridge)


Ridge
Лучшие параметры: {'alpha': 0.1}
MSE: 2856.4868876706537
R2: 0.46085219464119265


In [34]:
lasso_params = {'alpha': [0.01, 0.1, 1, 10]}
lasso = GridSearchCV(Lasso(), lasso_params, cv=5)
lasso.fit(X_train, y_train)

y_pred_lasso = lasso.predict(X_test)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

print("Lasso")
print("Лучшие параметры:", lasso.best_params_)
print("MSE:", mse_lasso)
print("R2:", r2_lasso)


Lasso
Лучшие параметры: {'alpha': 0.1}
MSE: 2798.193485169719
R2: 0.4718547867276227


Модель LogisticRegression 

In [35]:
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train > np.median(y_train))  

y_pred_log = log_reg.predict(X_test)
y_true_binary = (y_test > np.median(y_train)).astype(int)
y_pred_binary = y_pred_log.astype(int)


mse_log = mean_squared_error(y_true_binary, y_pred_binary)
r2_log = r2_score(y_true_binary, y_pred_binary)

print("LogisticRegression")
print("MSE:", mse_log)
print("R2:", r2_log)


LogisticRegression
MSE: 0.24719101123595505
R2: 0.0010204081632650963


In [36]:
results = pd.DataFrame({
    "Model": ["KNeighborsRegressor", "LinearRegression", "Ridge", "Lasso", "LogisticRegression"],
    "MSE": [mse_knn, mse_lin, mse_ridge, mse_lasso, mse_log],
    "R2 Score": [r2_knn, r2_lin, r2_ridge, r2_lasso, r2_log]
})

print(results)


                 Model          MSE  R2 Score
0  KNeighborsRegressor  3082.937301  0.418111
1     LinearRegression  2900.193628  0.452603
2                Ridge  2856.486888  0.460852
3                Lasso  2798.193485  0.471855
4   LogisticRegression     0.247191  0.001020


Вывод:

Модели с более низким значением MSE обладают лучшей предсказательной способностью.