In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn

In [None]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings('ignore')

## Load Boston Housing Dataset
- We are going to load the Boston Housing dataset from sklearn directly.

In [None]:
# Note: load_boston is deprecated; using alternative loading method
data = load_boston()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
df.head()

In [None]:
df.info()

In [None]:
# Prepare features and target
X = df.drop('target', axis=1)
y = df['target']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=1)
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

## Linear Regression without Regularization

In [None]:
# Train Linear Regression model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predict
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

# Evaluate
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

print("Linear Regression without Regularization:")
print(f"Train MSE: {train_mse:.4f}")
print(f"Test MSE: {test_mse:.4f}")
print(f"Train R²: {train_r2:.4f}")
print(f"Test R²: {test_r2:.4f}")

## Linear Regression with Polynomial Features (Inducing Overfitting)

In [None]:
# Create polynomial features
poly = PolynomialFeatures(degree=3)
X_poly = poly.fit_transform(X_scaled)

# Split polynomial data
X_poly_train, X_poly_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=1)

# Train Linear Regression on polynomial features
lr_poly = LinearRegression()
lr_poly.fit(X_poly_train, y_train)

# Predict
y_poly_train_pred = lr_poly.predict(X_poly_train)
y_poly_test_pred = lr_poly.predict(X_poly_test)

# Evaluate
poly_train_mse = mean_squared_error(y_train, y_poly_train_pred)
poly_test_mse = mean_squared_error(y_test, y_poly_test_pred)
poly_train_r2 = r2_score(y_train, y_poly_train_pred)
poly_test_r2 = r2_score(y_test, y_poly_test_pred)

print("\nLinear Regression with Polynomial Features (Degree=3):")
print(f"Train MSE: {poly_train_mse:.4f}")
print(f"Test MSE: {poly_test_mse:.4f}")
print(f"Train R²: {poly_train_r2:.4f}")
print(f"Test R²: {poly_test_r2:.4f}")

## Linear Regression with Regularization (Ridge)

In [None]:
# Train Ridge Regression on polynomial features
ridge = Ridge(alpha=1.0)
ridge.fit(X_poly_train, y_train)

# Predict
y_ridge_train_pred = ridge.predict(X_poly_train)
y_ridge_test_pred = ridge.predict(X_poly_test)

# Evaluate
ridge_train_mse = mean_squared_error(y_train, y_ridge_train_pred)
ridge_test_mse = mean_squared_error(y_test, y_ridge_test_pred)
ridge_train_r2 = r2_score(y_train, y_ridge_train_pred)
ridge_test_r2 = r2_score(y_test, y_ridge_test_pred)

print("\nRidge Regression with Polynomial Features (Degree=3):")
print(f"Train MSE: {ridge_train_mse:.4f}")
print(f"Test MSE: {ridge_test_mse:.4f}")
print(f"Train R²: {ridge_train_r2:.4f}")
print(f"Test R²: {ridge_test_r2:.4f}")

In [None]:
# Plot predictions vs actual values
plt.figure(figsize=(10,6))
plt.scatter(y_test, y_test_pred, alpha=0.5, label='Linear Regression')
plt.scatter(y_test, y_poly_test_pred, alpha=0.5, label='Polynomial Regression')
plt.scatter(y_test, y_ridge_test_pred, alpha=0.5, label='Ridge Regression')
ymin = y.min()
ymax = y.max()
plt.plot([ymin, ymax], [ymin, ymax], 'r--', lw=2)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Predictions vs Actual Values')
plt.legend()
plt.savefig('boston_predictions_comparison.png')
plt.show()