# Linear Regression with Regularization (GridSearchCV)
This notebook follows the exact lab specification.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV, cross_validate
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

## Load Dataset

In [None]:
df = df.copy()  # assumes df already loaded
df.head()

## Train-Test Split

In [None]:
X = df.drop('Loan Sanction Amount (USD)', axis=1)
y = df['Loan Sanction Amount (USD)']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## Baseline Linear Regression

In [None]:
lin_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LinearRegression())
])

lin_pipe.fit(X_train, y_train)
y_pred_lin = lin_pipe.predict(X_test)

lin_results = {
    'MAE': mean_absolute_error(y_test, y_pred_lin),
    'MSE': mean_squared_error(y_test, y_pred_lin),
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_lin)),
    'R2': r2_score(y_test, y_pred_lin)
}

lin_results

## GridSearchCV â€“ Ridge, Lasso, ElasticNet

In [None]:
models = {
    'Ridge': (Ridge(), {'model__alpha':[0.01,0.1,1,10,100]}),
    'Lasso': (Lasso(max_iter=100000), {'model__alpha':[0.001,0.01,0.1,1,10]}),
    'ElasticNet': (ElasticNet(max_iter=100000),
                    {'model__alpha':[0.01,0.1,1,10],
                     'model__l1_ratio':[0.2,0.5,0.8]})
}

grid_results = {}
best_models = {}

for name,(model,params) in models.items():
    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('model', model)
    ])
    grid = GridSearchCV(pipe, params, cv=5, scoring='r2')
    grid.fit(X_train, y_train)
    grid_results[name] = {
        'Best Params': grid.best_params_,
        'Best CV R2': grid.best_score_
    }
    best_models[name] = grid.best_estimator_

pd.DataFrame(grid_results).T

## Cross-Validation Performance (5-Fold)

In [None]:
cv_metrics = {}

for name,model in best_models.items():
    scores = cross_validate(
        model, X_train, y_train,
        cv=5,
        scoring=('neg_mean_absolute_error',
                 'neg_mean_squared_error',
                 'r2')
    )
    cv_metrics[name] = {
        'MAE': -scores['test_neg_mean_absolute_error'].mean(),
        'MSE': -scores['test_neg_mean_squared_error'].mean(),
        'RMSE': np.sqrt(-scores['test_neg_mean_squared_error'].mean()),
        'R2': scores['test_r2'].mean()
    }

pd.DataFrame(cv_metrics).T

## Test Set Performance

In [None]:
test_metrics = {}

for name,model in best_models.items():
    preds = model.predict(X_test)
    test_metrics[name] = {
        'MAE': mean_absolute_error(y_test, preds),
        'MSE': mean_squared_error(y_test, preds),
        'RMSE': np.sqrt(mean_squared_error(y_test, preds)),
        'R2': r2_score(y_test, preds)
    }

pd.DataFrame(test_metrics).T

## Effect of Regularization on Coefficients

In [None]:
coef_df = pd.DataFrame({
    'Linear': lin_pipe.named_steps['model'].coef_,
    'Ridge': best_models['Ridge'].named_steps['model'].coef_,
    'Lasso': best_models['Lasso'].named_steps['model'].coef_,
    'ElasticNet': best_models['ElasticNet'].named_steps['model'].coef_
}, index=X.columns)

coef_df