In [76]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_validate

In [77]:
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

degrees = np.arange(9)

r_squared = []
mae = []
mape = []

# Perform cross-validation for each degree
for degree in degrees:
    poly_features = PolynomialFeatures(degree=degree)
    X_poly = poly_features.fit_transform(X)
    
    model = LinearRegression()
    
    scores = cross_validate(model, X_poly, y, cv=5, scoring=('r2', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error'))
    
    r_squared.append(scores['test_r2'].mean())
    mae.append(-scores['test_neg_mean_absolute_error'].mean())
    mape.append(-scores['test_neg_mean_absolute_percentage_error'].mean())



## Table Summarizing Cross-Validation Results

In [78]:
mean_r_squared = np.mean(r_squared)
std_r_squared = np.std(r_squared)
mean_mae = np.mean(mae)
std_mae = np.std(mae)
mean_mape = np.mean(mape)
std_mape = np.std(mape)

table = {'Degree': degrees, 'R-Squared': r_squared, 'MAE': mae, 'MAPE': mape}
table_df = pd.DataFrame(table)
table_df 


Unnamed: 0,Degree,R-Squared,MAE,MAPE
0,0,-0.027506,66.045624,0.623622
1,1,0.482316,44.276499,0.39486
2,2,0.391502,46.612882,0.402669
3,3,-182.293189,342.397076,2.331786
4,4,-70.667516,303.158461,2.453685
5,5,-67.387407,295.686026,2.405233
6,6,-67.447482,295.631865,2.404954
7,7,-67.448529,295.630403,2.404952
8,8,-67.442355,295.583604,2.404613


## Table Summarizing Cross-Validation Results Including Mean and Std Values

In [79]:
table2 = {'Degree': degrees, 'R-Squared': r_squared, 'MAE': mae, 'MAPE': mape}
table2_df = pd.DataFrame(table2)
table2_df.loc['mean'] = table2_df.mean()
table2_df.loc['std'] = table2_df.std()
table2_df.iloc[-2:, 0] = ''
table2_df 

Unnamed: 0,Degree,R-Squared,MAE,MAPE
0,0.0,-0.027506,66.045624,0.623622
1,1.0,0.482316,44.276499,0.39486
2,2.0,0.391502,46.612882,0.402669
3,3.0,-182.293189,342.397076,2.331786
4,4.0,-70.667516,303.158461,2.453685
5,5.0,-67.387407,295.686026,2.405233
6,6.0,-67.447482,295.631865,2.404954
7,7.0,-67.448529,295.630403,2.404952
8,8.0,-67.442355,295.583604,2.404613
mean,,-57.98224,220.558049,1.758486


## Finding which degree of model exhibits highest performance

In [86]:
best_r_squared_index = table_df['R-Squared'].idxmax()
best_mae_index = table_df['MAE'].idxmin()
best_mape_index = table_df['MAPE'].idxmin()

if best_r_squared_index == best_mae_index == best_mape_index:
    best_model_index = best_r_squared_index
else:
    best_model_index = set([best_r_squared_index, best_mae_index, best_mape_index]).pop()

best_model_degree = table_df.loc[best_model_index, 'Degree']
best_model_degree

1

### Here, degree of 1 has highest performance beacuse it has highest R-squared values and Lowest MAE and MAPE

## Finding Which degree of model has lowest performance

In [85]:
worst_r_squared_index = table_df['R-Squared'].idxmin()
worst_mae_index = table_df['MAE'].idxmax()
worst_mape_index = table_df['MAPE'].idxmax()

if worst_r_squared_index == worst_mae_index == worst_mape_index:
    worst_model_index = worst_r_squared_index
else:
    worst_model_index = set([worst_r_squared_index, worst_mae_index, worst_mape_index]).pop()

worst_model_degree = table_df.loc[worst_model_index, 'Degree']
worst_model_degree

3

### Here, degree of 3 has lowest performance