# Q26: Linear and Polynomial Regression on Housing Prices

- Use a housing prices dataset.
- Build regression models (Linear and Polynomial).
- Evaluate models using R², RMSE, and MAE.
- Discuss overfitting and apply regularization techniques if needed.

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load housing dataset
housing = fetch_california_housing()
X = pd.DataFrame(housing.data, columns=housing.feature_names)
y = housing.target

# Use only one feature for polynomial regression simplicity
X_feat = X[['MedInc']]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_feat, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X_train_scaled, y_train)
y_pred_lin = lin_reg.predict(X_test_scaled)

# Polynomial Regression (degree=2)
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)
y_pred_poly = poly_reg.predict(X_test_poly)

# Ridge Regression (regularization)
ridge_reg = Ridge(alpha=1.0)
ridge_reg.fit(X_train_poly, y_train)
y_pred_ridge = ridge_reg.predict(X_test_poly)

# Evaluation function
def eval_model(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    return r2, rmse, mae

print('Linear Regression:', eval_model(y_test, y_pred_lin))
print('Polynomial Regression:', eval_model(y_test, y_pred_poly))
print('Ridge Regression (Poly):', eval_model(y_test, y_pred_ridge))

# Overfitting discussion
print('Polynomial regression may overfit if degree is too high. Ridge regularization helps control overfitting.')