In [11]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_linnerud
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score


In [12]:
data = load_linnerud()
X, y = data.data, data.target

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y[:, 0] 

In [15]:
print(df.head())
print(df.describe())

   Chins  Situps  Jumps  target
0    5.0   162.0   60.0   191.0
1    2.0   110.0   60.0   189.0
2   12.0   101.0  101.0   193.0
3   12.0   105.0   37.0   162.0
4   13.0   155.0   58.0   189.0
           Chins      Situps      Jumps      target
count  20.000000   20.000000   20.00000   20.000000
mean    9.450000  145.550000   70.30000  178.600000
std     5.286278   62.566575   51.27747   24.690505
min     1.000000   50.000000   25.00000  138.000000
25%     4.750000  101.000000   39.50000  160.750000
50%    11.500000  122.500000   54.00000  176.000000
75%    13.250000  210.000000   85.25000  191.500000
max    17.000000  251.000000  250.00000  247.000000


In [16]:
knn_params = {'n_neighbors': [3, 5, 7, 9]}
knn = GridSearchCV(KNeighborsRegressor(), knn_params, cv=5)
knn.fit(X_train, y_train[:, 0]) 

y_pred_knn = knn.predict(X_test)
mse_knn = mean_squared_error(y_test[:, 0], y_pred_knn)
r2_knn = r2_score(y_test[:, 0], y_pred_knn)

print("KNeighborsRegressor")
print("Лучшие параметры:", knn.best_params_)
print("MSE:", mse_knn)
print("R2:", r2_knn)

KNeighborsRegressor
Лучшие параметры: {'n_neighbors': 7}
MSE: 435.5612244897957
R2: -0.5490063551537521


Модель LinearRegression

In [17]:
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train[:, 0])

y_pred_lin = lin_reg.predict(X_test)
mse_lin = mean_squared_error(y_test[:, 0], y_pred_lin)
r2_lin = r2_score(y_test[:, 0], y_pred_lin)

print("LinearRegression")
print("MSE:", mse_lin)
print("R2:", r2_lin)

LinearRegression
MSE: 630.7757097870915
R2: -1.243256580705371


Модель Ridge

In [18]:
ridge_params = {'alpha': [0.1, 1, 10, 100]}
ridge = GridSearchCV(Ridge(), ridge_params, cv=5)
ridge.fit(X_train, y_train[:, 0])

y_pred_ridge = ridge.predict(X_test)
mse_ridge = mean_squared_error(y_test[:, 0], y_pred_ridge)
r2_ridge = r2_score(y_test[:, 0], y_pred_ridge)

print("Ridge")
print("Лучшие параметры:", ridge.best_params_)
print("MSE:", mse_ridge)
print("R2:", r2_ridge)

Ridge
Лучшие параметры: {'alpha': 100}
MSE: 600.4859253658086
R2: -1.1355356314409732


Модель Lasso

In [19]:
lasso_params = {'alpha': [0.01, 0.1, 1, 10]}
lasso = GridSearchCV(Lasso(), lasso_params, cv=5)
lasso.fit(X_train, y_train[:, 0])

y_pred_lasso = lasso.predict(X_test)
mse_lasso = mean_squared_error(y_test[:, 0], y_pred_lasso)
r2_lasso = r2_score(y_test[:, 0], y_pred_lasso)

print("Lasso")
print("Лучшие параметры:", lasso.best_params_)
print("MSE:", mse_lasso)
print("R2:", r2_lasso)

Lasso
Лучшие параметры: {'alpha': 10}
MSE: 548.5429686983891
R2: -0.9508085128193435


Модель LogisticRegression

In [20]:
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train[:, 0] > np.median(y_train[:, 0]))

y_pred_log = log_reg.predict(X_test)
y_true_binary = (y_test[:, 0] > np.median(y_train[:, 0])).astype(int)
y_pred_binary = y_pred_log.astype(int)

mse_log = mean_squared_error(y_true_binary, y_pred_binary)
r2_log = r2_score(y_true_binary, y_pred_binary)

print("LogisticRegression")
print("MSE:", mse_log)
print("R2:", r2_log)

LogisticRegression
MSE: 0.5
R2: -1.6666666666666665


In [21]:
results = pd.DataFrame({
    "Model": ["KNeighborsRegressor", "LinearRegression", "Ridge", "Lasso", "LogisticRegression"],
    "MSE": [mse_knn, mse_lin, mse_ridge, mse_lasso, mse_log],
    "R2 Score": [r2_knn, r2_lin, r2_ridge, r2_lasso, r2_log]
})

print(results)

                 Model         MSE  R2 Score
0  KNeighborsRegressor  435.561224 -0.549006
1     LinearRegression  630.775710 -1.243257
2                Ridge  600.485925 -1.135536
3                Lasso  548.542969 -0.950809
4   LogisticRegression    0.500000 -1.666667


Вывод:

Модели с более низким значением MSE обладают лучшей предсказательной способностью.