# MSCS 634 Lab 4: Regression Analysis with Regularization Techniques
**Name**: Your Name  
**Course**: MSCS 634  
**Lab**: Regression Analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

sns.set(style='whitegrid')


In [None]:
# Load Diabetes dataset
diabetes = load_diabetes()
X = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
y = pd.Series(diabetes.target, name='target')
X.head()


In [None]:
# Simple Linear Regression using 'bmi'
X_bmi = X[['bmi']]
X_train_bmi, X_test_bmi, y_train_bmi, y_test_bmi = train_test_split(X_bmi, y, test_size=0.2, random_state=42)

lr = LinearRegression()
lr.fit(X_train_bmi, y_train_bmi)
y_pred_bmi = lr.predict(X_test_bmi)

mae = mean_absolute_error(y_test_bmi, y_pred_bmi)
mse = mean_squared_error(y_test_bmi, y_pred_bmi)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_bmi, y_pred_bmi)

print(f"Simple Linear Regression with BMI\nMAE: {mae:.2f}  MSE: {mse:.2f}  RMSE: {rmse:.2f}  R²: {r2:.2f}")

# Visualization
plt.figure(figsize=(8, 5))
plt.scatter(X_test_bmi, y_test_bmi, color='blue', label='Actual')
plt.plot(X_test_bmi, y_pred_bmi, color='red', linewidth=2, label='Predicted')
plt.title('Simple Linear Regression (BMI vs Target)')
plt.xlabel('BMI')
plt.ylabel('Target')
plt.legend()
plt.show()


In [None]:
# Multiple Linear Regression
X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(X, y, test_size=0.2, random_state=42)

lr_multi = LinearRegression()
lr_multi.fit(X_train_multi, y_train_multi)
y_pred_multi = lr_multi.predict(X_test_multi)

mae = mean_absolute_error(y_test_multi, y_pred_multi)
mse = mean_squared_error(y_test_multi, y_pred_multi)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_multi, y_pred_multi)

print(f"Multiple Linear Regression\nMAE: {mae:.2f}  MSE: {mse:.2f}  RMSE: {rmse:.2f}  R²: {r2:.2f}")

# Visualization
plt.figure(figsize=(8, 5))
plt.scatter(y_test_multi, y_pred_multi, alpha=0.6)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Multiple Linear Regression: Actual vs Predicted')
plt.show()


In [None]:
# Polynomial Regression
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X_bmi)

X_train_poly, X_test_poly, y_train_poly, y_test_poly = train_test_split(X_poly, y, test_size=0.2, random_state=42)

lr_poly = LinearRegression()
lr_poly.fit(X_train_poly, y_train_poly)
y_pred_poly = lr_poly.predict(X_test_poly)

mae = mean_absolute_error(y_test_poly, y_pred_poly)
mse = mean_squared_error(y_test_poly, y_pred_poly)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_poly, y_pred_poly)

print(f"Polynomial Regression (degree=2)\nMAE: {mae:.2f}  MSE: {mse:.2f}  RMSE: {rmse:.2f}  R²: {r2:.2f}")


In [None]:
# Ridge and Lasso Regression
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_multi, y_train_multi)
y_pred_ridge = ridge.predict(X_test_multi)

lasso = Lasso(alpha=0.1)
lasso.fit(X_train_multi, y_train_multi)
y_pred_lasso = lasso.predict(X_test_multi)

def evaluate(name, y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    print(f"{name}\nMAE: {mae:.2f}  MSE: {mse:.2f}  RMSE: {rmse:.2f}  R²: {r2:.2f}\n")

evaluate("Ridge Regression", y_test_multi, y_pred_ridge)
evaluate("Lasso Regression", y_test_multi, y_pred_lasso)


In [None]:
# Summary Table
models = ['Simple Linear', 'Multiple Linear', 'Polynomial', 'Ridge', 'Lasso']
maes = [mean_absolute_error(y_test_bmi, y_pred_bmi),
        mean_absolute_error(y_test_multi, y_pred_multi),
        mean_absolute_error(y_test_poly, y_pred_poly),
        mean_absolute_error(y_test_multi, y_pred_ridge),
        mean_absolute_error(y_test_multi, y_pred_lasso)]

mses = [mean_squared_error(y_test_bmi, y_pred_bmi),
        mean_squared_error(y_test_multi, y_pred_multi),
        mean_squared_error(y_test_poly, y_pred_poly),
        mean_squared_error(y_test_multi, y_pred_ridge),
        mean_squared_error(y_test_multi, y_pred_lasso)]

rmses = [np.sqrt(mse) for mse in mses]
r2s = [r2_score(y_test_bmi, y_pred_bmi),
       r2_score(y_test_multi, y_pred_multi),
       r2_score(y_test_poly, y_pred_poly),
       r2_score(y_test_multi, y_pred_ridge),
       r2_score(y_test_multi, y_pred_lasso)]

summary_df = pd.DataFrame({
    'Model': models,
    'MAE': maes,
    'MSE': mses,
    'RMSE': rmses,
    'R²': r2s
})
summary_df
