# 3_Polynomial_Regression.ipynb

In [1]:
# --- Imports ---
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# --- Load dataset ---
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

In [3]:
# --- Use only 1 feature to simulate Polynomial Regression ---
X = X[["AveRooms"]]  # Simple univariate polynomial regression

In [4]:
# --- Split ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# --- Build Pipeline: Scaling + Polynomial + Regression ---
model = Pipeline([
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures(degree=3)),
    ('regressor', LinearRegression())
])

In [6]:
# --- Fit Model ---
model.fit(X_train, y_train)

In [7]:
# --- Predict ---
y_pred = model.predict(X_test)

In [8]:
# --- Evaluation ---
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error (MSE):", mse)
print("R2 Score:", r2)

Mean Squared Error (MSE): 1.211719462374095
R2 Score: 0.07531199610619721


In [9]:
# --- k-Fold CV ---
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(model, X, y, cv=kfold, scoring='r2')

print("\nCross-validation R2 scores:", cv_scores)
print("Average R2 CV Score:", np.mean(cv_scores))


Cross-validation R2 scores: [0.075312   0.08113173 0.06813913 0.0642831  0.05902476]
Average R2 CV Score: 0.06957814321799241
