In [2]:
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import joblib
import os

# Ensure models directory exists
os.makedirs("../models", exist_ok=True)

# 1. Load Data
df = pd.read_csv("../data/processed/feature_engineered_holdout.csv")

print("Training Forecasting Model...")

# 2. Fix: Create a 'date' column from 'year' and 'month'
# We add 'day=1' because pandas needs a full date (YYYY-MM-DD)
df['date'] = pd.to_datetime(dict(year=df.year, month=df.month, day=1))

# 3. Aggregate Data by Month
# We predict the Market Average Price (not individual houses)
monthly_data = df.set_index('date').resample('M')['price'].mean()

# Fill gaps if any months have no sales (Linear interpolation)
monthly_data = monthly_data.interpolate(method='linear')

# 4. Train Holt-Winters Model
# Trend='add' (linear trend), Seasonal='add' (recurring pattern)
# We use seasonal_periods=12 for yearly seasonality
model = ExponentialSmoothing(
    monthly_data, 
    trend='add', 
    seasonal='add', 
    seasonal_periods=12
).fit()

# 5. Save Model
joblib.dump(model, "../models/forecasting.joblib")
print("✅ Forecasting Model Saved")

Training Forecasting Model...
✅ Forecasting Model Saved
