In [24]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge, Lasso

In [18]:
diabetes = load_diabetes()
X=diabetes.data
feature_names = diabetes.feature_names
y=diabetes.target
print("Feature shape", X.shape)
print("Target shape", y.shape)

Feature shape (442, 10)
Target shape (442,)


In [14]:
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
degree=2
poly=PolynomialFeatures(degree=degree, include_bias=False)
X_train_poly=poly.fit_transform(X_train)
X_test_poly=poly.transform(X_test)
print("Original features: ", X_train.shape[1])
print("Transformed Features: ", X_train_poly.shape[1])

Original features:  10
Transformed Features:  65


In [26]:
model=LinearRegression().fit(X_train_poly, y_train)

#Prediction
y_pred_train=model.predict(X_train_poly)
y_pred_test=model.predict(X_test_poly)


In [27]:
print("Train MSE", mean_squared_error(y_train, y_pred_train))
print("Test MSE", mean_squared_error(y_test, y_pred_test))
print("Train R2", r2_score(y_train, y_pred_train))
print("Test R2", r2_score(y_test, y_pred_test))

Train MSE 2393.138618059786
Test MSE 3096.0283073442843
Train R2 0.6061583502354678
Test R2 0.41563993364079777


In [36]:
for d in [1, 2, 3]:
    poly = PolynomialFeatures(degree=d, include_bias=False)
    X_train_poly = poly.fit_transform(X_train)
    X_test_poly = poly.transform(X_test)

    model = LinearRegression().fit(X_train_poly, y_train)
    r2 = r2_score(y_test, model.predict(X_test_poly))
    print(f"Degree {d} | Test R2 Score: {r2:.3f}") #d=3 has worst R2 score

Degree 1 | Test R2 Score: 0.453
Degree 2 | Test R2 Score: 0.416
Degree 3 | Test R2 Score: -14.561


In [37]:
degree = 2
poly = PolynomialFeatures(degree=degree, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Train three models
lin_model = LinearRegression().fit(X_train_poly, y_train)
ridge_model = Ridge(alpha=1.0).fit(X_train_poly, y_train)
lasso_model = Lasso(alpha=0.01, max_iter=10000).fit(X_train_poly, y_train)

In [40]:
def evaluate_model(name, model, X_train_poly, y_train, X_test_poly, y_test, degree):
    y_pred_train = model.predict(X_train_poly)
    y_pred_test = model.predict(X_test_poly)

    print(f"\n{name} Regression (degree={degree})")
    print("-" * 40)
    print("Train MSE:", mean_squared_error(y_train, y_pred_train))
    print("Test MSE:", mean_squared_error(y_test, y_pred_test))
    print("Train R2:", r2_score(y_train, y_pred_train))
    print("Test R2:", r2_score(y_test, y_pred_test))

In [41]:
# Degree = 2 example
degree = 2
poly = PolynomialFeatures(degree=degree, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

lin_model = LinearRegression().fit(X_train_poly, y_train)
ridge_model = Ridge(alpha=1.0).fit(X_train_poly, y_train)
lasso_model = Lasso(alpha=0.01, max_iter=10000).fit(X_train_poly, y_train)

# Evaluate all three models (with matching data)
evaluate_model("Linear", lin_model, X_train_poly, y_train, X_test_poly, y_test, degree)
evaluate_model("Ridge", ridge_model, X_train_poly, y_train, X_test_poly, y_test, degree)
evaluate_model("Lasso", lasso_model, X_train_poly, y_train, X_test_poly, y_test, degree)



Linear Regression (degree=2)
----------------------------------------
Train MSE: 2393.138618059786
Test MSE: 3096.0283073442843
Train R2: 0.6061583502354678
Test R2: 0.41563993364079777

Ridge Regression (degree=2)
----------------------------------------
Train MSE: 3384.7281675307836
Test MSE: 3075.1322263562033
Train R2: 0.4429712865585862
Test R2: 0.41958396582030866

Lasso Regression (degree=2)
----------------------------------------
Train MSE: 2783.525427182105
Test MSE: 2698.0266419350723
Train R2: 0.5419119318329695
Test R2: 0.49076078413751045
