In [1]:
################### standard ###################
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

################### allow import from src ###################
import sys, os
sys.path.append(os.path.abspath(".."))

################### pipeline step ###################
import src.data_prep as data
import src.models.poly_reg as poly
import src.config.feature_def as feature_def
import src.models.utils as utils

In [2]:
monthly = data.prepare_monthly_dataset("../data/Major_Crime_Indicators_Open_Data.csv")

################ define features ################
feature_cols = feature_def.FEATURE_COLS
target_col = feature_def.TARGET_COL

In [10]:
################### Train Polynomial Regression ###################

model, transformer, y_test, y_pred, metrics = poly.train_poly_model(
    monthly,
    feature_cols,
    target_col,
    scale=True,
    degree=4,
    interaction_only=False
)

print("Polynomial Regression (degree=2) Metrics:")
metrics

Polynomial Regression (degree=2) Metrics:


{'R2': 0.6301756730528936,
 'MAE': 0.06085891687962718,
 'MSE': 0.00619210396531278,
 'RMSE': 0.0786899228955829}

In [5]:
################### Hyperparameter Exploration ###################

results = poly.explore_poly_params(
    monthly,
    feature_cols,
    target_col,
    degrees=range(1,7),              # Test degree = 1 ~ 6
    interaction_flags=[True, False], # With & without interactions
    scale=True
)

print("Total combinations tested:", len(results))
results[:5]   # inspect first few rows

Total combinations tested: 12


[{'degree': 1,
  'interaction_only': True,
  'R2': 0.7572051650847141,
  'RMSE': 0.06375894227921874,
  'MAE': 0.046994748992940684},
 {'degree': 1,
  'interaction_only': False,
  'R2': 0.7572051650847141,
  'RMSE': 0.06375894227921874,
  'MAE': 0.046994748992940684},
 {'degree': 2,
  'interaction_only': True,
  'R2': 0.7606812945836701,
  'RMSE': 0.06330087371440504,
  'MAE': 0.0466618346627931},
 {'degree': 2,
  'interaction_only': False,
  'R2': 0.7596887983838344,
  'RMSE': 0.06343199776040187,
  'MAE': 0.04696775874979588},
 {'degree': 3,
  'interaction_only': True,
  'R2': 0.7597954579523106,
  'RMSE': 0.06341791938964136,
  'MAE': 0.04670353407595823}]

In [6]:
best = max(results, key=lambda x: x["R2"])
print("Best configuration found:")
best

Best configuration found:


{'degree': 2,
 'interaction_only': True,
 'R2': 0.7606812945836701,
 'RMSE': 0.06330087371440504,
 'MAE': 0.0466618346627931}