In [2]:
from sklearn import datasets
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np

# Load the diabetes dataset
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

# Store results
results = []

for degree in range(9):
    # Create polynomial features
    polynomial_features = PolynomialFeatures(degree=degree)
    X_poly = polynomial_features.fit_transform(X)
    
    # Linear regression model
    model = LinearRegression()
    
    # Cross-validation predictions
    y_pred = cross_val_predict(model, X_poly, y, cv=10)
    
    # Calculate metrics
    r2 = r2_score(y, y_pred)
    mae = mean_absolute_error(y, y_pred)
    mape = np.mean(np.abs((y - y_pred) / y)) * 100
    
    results.append((degree, r2, mae, mape))


In [3]:
import pandas as pd

# Convert results to DataFrame
# Convert results to DataFrame
df = pd.DataFrame(
    results,
    columns=['Degree', 'R-Squared', 'MAE', 'MAPE']
)
print(df)



   Degree  R-Squared         MAE        MAPE
0       0  -0.005690   65.921696   62.275447
1       1   0.494250   44.214469   39.464995
2       2   0.417632   45.865449   39.232201
3       3 -26.723490  192.860137  156.617763
4       4 -68.671594  328.856742  270.366725
5       5 -61.533344  312.237970  256.653278
6       6 -61.412675  312.045035  256.498316
7       7 -61.410800  312.041500  256.496771
8       8 -61.413403  312.044204  256.501218


In [4]:
# Criteria for best model
best_idx = df['R-Squared'].idxmax()

best_degree = df.iloc[best_idx]['Degree']
print(f"The best model is of degree {best_degree}.")



The best model is of degree 1.0.
