In [1]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

# Load the Diabetes dataset with scaled features as a DataFrame
diabetes = load_diabetes(as_frame=True, scaled=True)
X, y = diabetes.data, diabetes.target

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [2]:
from sklearn.linear_model import LinearRegression

# Create and train a multivariate linear regression model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)


In [4]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression




In [7]:
poly_features = PolynomialFeatures(degree=2, include_bias=False)

x_poly_bmi = poly_features.fit_transform(X_train[['bmi']])

 

poly_model_bmi = LinearRegression()

poly_model_bmi.fit(x_poly_bmi, y_train)

 

x_bmi_valid = poly_features.transform(X_valid[['bmi']])


In [8]:
poly_features = PolynomialFeatures(degree=2, include_bias=False)

x_poly_all = poly_features.fit_transform(X_train)

 

poly_model_all = LinearRegression()

poly_model_all.fit(x_poly_all, y_train)

 

x_all_valid = poly_features.transform(X_valid)


In [9]:
from sklearn.metrics import r2_score, mean_absolute_error
import numpy as np

# Define a function to calculate MAPE
def calculate_mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Predictions on the validation set
y_valid_pred_linear = linear_model.predict(X_valid)
y_valid_pred_poly_bmi = poly_model_bmi.predict(x_bmi_valid)
y_valid_pred_poly_all = poly_model_all.predict(x_all_valid)

# Calculate R-squared for each model
r2_linear = r2_score(y_valid, y_valid_pred_linear)
r2_poly_bmi = r2_score(y_valid, y_valid_pred_poly_bmi)
r2_poly_all = r2_score(y_valid, y_valid_pred_poly_all)

# Calculate MAE for each model
mae_linear = mean_absolute_error(y_valid, y_valid_pred_linear)
mae_poly_bmi = mean_absolute_error(y_valid, y_valid_pred_poly_bmi)
mae_poly_all = mean_absolute_error(y_valid, y_valid_pred_poly_all)

# Calculate MAPE for each model
mape_linear = calculate_mape(y_valid, y_valid_pred_linear)
mape_poly_bmi = calculate_mape(y_valid, y_valid_pred_poly_bmi)
mape_poly_all = calculate_mape(y_valid, y_valid_pred_poly_all)

# Print the evaluation metrics
print(f"R-squared (Linear): {r2_linear:.4f}")
print(f"R-squared (Poly BMI): {r2_poly_bmi:.4f}")
print(f"R-squared (Poly All): {r2_poly_all:.4f}")
print(f"MAE (Linear): {mae_linear:.4f}")
print(f"MAE (Poly BMI): {mae_poly_bmi:.4f}")
print(f"MAE (Poly All): {mae_poly_all:.4f}")
print(f"MAPE (Linear): {mape_linear:.4f}%")
print(f"MAPE (Poly BMI): {mape_poly_bmi:.4f}%")
print(f"MAPE (Poly All): {mape_poly_all:.4f}%")


R-squared (Linear): 0.5810
R-squared (Poly BMI): 0.3623
R-squared (Poly All): 0.4176
MAE (Linear): 38.2213
MAE (Poly BMI): 48.9093
MAE (Poly All): 47.2770
MAPE (Linear): 34.8018%
MAPE (Poly BMI): 44.2695%
MAPE (Poly All): 44.3607%
