Assignment 3 

In [1]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, make_scorer
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler


# Load data
url = "https://raw.githubusercontent.com/Patrick0481/Intro-to-modeling/refs/heads/main/1654308boston.csv"
BostonData = pd.read_csv(url)

In [2]:
#Linear Regression
# Define features and target
X = BostonData.drop(columns=['MEDV'])
y = BostonData['MEDV']

# Convert categorical variables to dummy variables
X = pd.get_dummies(X, drop_first=True)

# Define 5-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Define MAD scorer
mad_scorer = make_scorer(mean_absolute_error, greater_is_better=False)

# Linear Regression with scaling
linear_pipeline = Pipeline([
    ('scaler', StandardScaler()),   
    ('linear', LinearRegression())])

# Perform 5-fold CV
mad_scores = -cross_val_score(linear_pipeline, X, y, scoring=mad_scorer, cv=kf)

print("MAD for each fold:", np.round(mad_scores, 3))
print("Average MAD across 5 folds:", np.round(np.mean(mad_scores), 3))

MAD for each fold: [50.178 56.615 49.909 49.486 41.446]
Average MAD across 5 folds: 49.527


In [3]:
from sklearn.linear_model import RidgeCV, LassoCV

X = BostonData.drop(columns=['MEDV'])
y = BostonData['MEDV']

# One-hot encode categorical variables
X = pd.get_dummies(X, drop_first=True)

alphas = np.logspace(-3, 3, 100)  # from 0.001 to 1000

# Define 5-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
mad_scorer = make_scorer(mean_absolute_error, greater_is_better=False)

#Lasso Regression
lasso_pipeline = Pipeline([
    ('scaler', StandardScaler()),                # normalize features
    ('lasso', LassoCV(alphas=alphas, cv=kf, random_state=42, max_iter=10000))])
lasso_pipeline.fit(X, y)
lasso_alpha = lasso_pipeline.named_steps['lasso'].alpha_

lasso_mad = -cross_val_score(LassoCV(alphas=[lasso_alpha], cv=kf, random_state=42),
                             X, y, scoring=mad_scorer, cv=kf)
print("\nOptimal Lasso alpha:", lasso_alpha)
print("Lasso MAD for each fold:", lasso_mad)
print("Average Lasso MAD:", np.mean(lasso_mad))


Optimal Lasso alpha: 0.6135907273413176
Lasso MAD for each fold: [51.23015929 59.58740563 48.49890866 50.32796533 43.01358695]
Average Lasso MAD: 50.531605169325914


In [4]:
#Ridge Regression
alphas = np.logspace(-3, 3, 50)
mad_scorer = make_scorer(mean_absolute_error, greater_is_better=False)

# Ridge Regression with scaling inside a Pipeline
ridge_pipeline = Pipeline([
    ('scaler', StandardScaler()),            # normalize features
    ('ridge', RidgeCV(alphas=alphas, cv=5)) # alpha selection
])

# Fit Ridge pipeline on full data to get best alpha
ridge_pipeline.fit(X, y)
ridge_alpha = ridge_pipeline.named_steps['ridge'].alpha_

# Evaluate Ridge with 5-fold CV
ridge_mad = -cross_val_score(ridge_pipeline, X, y, scoring=mad_scorer, cv=5)

print("Optimal Ridge alpha:", ridge_alpha)
print("Ridge MAD for each fold:", np.round(ridge_mad, 3))
print("Average Ridge MAD:", np.round(np.mean(ridge_mad), 3))

Optimal Ridge alpha: 0.001
Ridge MAD for each fold: [52.383 59.525 42.376 47.873 45.486]
Average Ridge MAD: 49.529


In [5]:
#%pip install tensorflow

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Load and prepare data
url = "https://raw.githubusercontent.com/Patrick0481/Intro-to-modeling/refs/heads/main/1654308boston.csv"
BostonData = pd.read_csv(url)

X = BostonData.drop(columns=['MEDV'])
y = BostonData['MEDV']

# One-hot encode categorical variables
X = pd.get_dummies(X, drop_first=True)

# Standardize numeric features (important for neural networks)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define 5-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
mad_scores = []

# Function to build the model
def build_model(input_dim):
    model = Sequential([
        Dense(512, activation='relu', input_dim=input_dim),
        Dense(512, activation='relu'),
        Dense(512, activation='relu'),
        Dense(512, activation='relu'),
        Dense(1)  # output layer (regression -> no activation)
    ])
    model.compile(optimizer=Adam(),
                  loss='mean_absolute_error')  # MAD loss
    return model

# 5-fold CV training
for train_idx, test_idx in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    
    model = build_model(X_train.shape[1])
    model.fit(X_train, y_train,
              epochs=100,
              batch_size=16,
              verbose=0)  # silent training
    
    y_pred = model.predict(X_test).flatten()
    mad = mean_absolute_error(y_test, y_pred)
    mad_scores.append(mad)

print("MAD for each fold:", mad_scores)
print("Average MAD across 5 folds:", np.mean(mad_scores))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
MAD for each fold: [36.97802734375, 47.80286407470703, 39.39806365966797, 44.63582229614258, 47.475582122802734]
Average MAD across 5 folds: 43.25807189941406
