Assignment 3

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV
from sklearn.metrics import make_scorer, mean_absolute_error

# Neural network imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

# Load and prepare data
url = "https://raw.githubusercontent.com/Patrick0481/Intro-to-modeling/refs/heads/main/1654308boston.csv"
BostonData = pd.read_csv(url)
X = pd.get_dummies(BostonData.drop(columns=['MEDV']), drop_first=True)
y = BostonData['MEDV']

# Define 5-fold CV and MAD scorer
kf = KFold(n_splits=5, shuffle=True, random_state=42)
mad_scorer = make_scorer(mean_absolute_error, greater_is_better=False)

# ======================================
# 1️⃣ Linear Regression
# ======================================
linear_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('lr', LinearRegression())
])

linear_mad = -cross_val_score(linear_pipeline, X, y, scoring=mad_scorer, cv=kf)
print("Linear Regression MAD per fold:", np.round(linear_mad, 3))
print("Average Linear Regression MAD:", np.round(np.mean(linear_mad), 3))

# ======================================
# 2️⃣ Ridge and Lasso Regression
# ======================================
alphas = np.logspace(-3, 3, 100)

ridge = RidgeCV(alphas=alphas, cv=kf)
ridge_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('ridge', ridge)
])
ridge_pipeline.fit(X, y)
ridge_alpha = ridge_pipeline.named_steps['ridge'].alpha_
ridge_mad = -cross_val_score(ridge_pipeline, X, y, scoring=mad_scorer, cv=kf)
print("\nOptimal Ridge alpha:", ridge_alpha)
print("Average Ridge MAD:", np.round(np.mean(ridge_mad), 3))

lasso = LassoCV(alphas=alphas, cv=kf, random_state=42, max_iter=10000)
lasso_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('lasso', lasso)
])
lasso_pipeline.fit(X, y)
lasso_alpha = lasso_pipeline.named_steps['lasso'].alpha_
lasso_mad = -cross_val_score(lasso_pipeline, X, y, scoring=mad_scorer, cv=kf)
print("\nOptimal Lasso alpha:", lasso_alpha)
print("Average Lasso MAD:", np.round(np.mean(lasso_mad), 3))

# ======================================
# 3️⃣ Neural Network
# ======================================
def build_nn():
    model = Sequential([
        Dense(512, activation='relu', input_shape=(X.shape[1],)),
        Dense(512, activation='relu'),
        Dense(512, activation='relu'),
        Dense(512, activation='relu'),
        Dense(1)  # Output layer for regression
    ])
    model.compile(optimizer=Adam(), loss='mean_absolute_error', metrics=['mae'])
    return model

# Wrap Keras model for scikit-learn
nn = KerasRegressor(build_fn=build_nn, epochs=100, batch_size=16, verbose=0)

nn_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('nn', nn)
])

nn_mad = -cross_val_score(nn_pipeline, X, y, scoring=mad_scorer, cv=kf)
print("\nNeural Network MAD per fold:", np.round(nn_mad, 3))
print("Average Neural Network MAD:", np.round(np.mean(nn_mad), 3))