In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
import matplotlib.pyplot as plt

# Load call data
calls = pd.read_csv('../data/raw/calls.csv', parse_dates=['call_date'])

# Aggregate daily call volume
calls['date'] = pd.to_datetime(calls['call_date']).dt.date
daily_calls = calls.groupby('date').size().reset_index(name='call_volume')
daily_calls['date'] = pd.to_datetime(daily_calls['date'])
daily_calls = daily_calls.set_index('date').sort_index()

# Create time features
daily_calls['day_of_week'] = daily_calls.index.dayofweek
daily_calls['day_of_month'] = daily_calls.index.day
daily_calls['month'] = daily_calls.index.month
daily_calls['is_weekend'] = (daily_calls['day_of_week'] >= 5).astype(int)

# Create lag features
for lag in [1, 7, 14]:
    daily_calls[f'lag_{lag}'] = daily_calls['call_volume'].shift(lag)

# Create rolling mean features
for window in [7, 14, 30]:
    daily_calls[f'rolling_mean_{window}'] = daily_calls['call_volume'].rolling(window=window).mean()

# Drop NaN
daily_calls = daily_calls.dropna()

# Train-test split (80-20)
split_idx = int(len(daily_calls) * 0.8)
train = daily_calls.iloc[:split_idx]
test = daily_calls.iloc[split_idx:]

feature_cols = ['day_of_week', 'day_of_month', 'month', 'is_weekend',
                'lag_1', 'lag_7', 'lag_14',
                'rolling_mean_7', 'rolling_mean_14', 'rolling_mean_30']

X_train = train[feature_cols]
y_train = train['call_volume']
X_test = test[feature_cols]
y_test = test['call_volume']

# XGBoost Forecasting Model
import xgboost as xgb

xgb_forecast = xgb.XGBRegressor(
    max_depth=5,
    learning_rate=0.1,
    n_estimators=100,
    objective='reg:squarederror',
    random_state=42
)

xgb_forecast.fit(X_train, y_train)
y_pred = xgb_forecast.predict(X_test)

# Evaluate
mape = mean_absolute_percentage_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"MAPE: {mape:.2%}")
print(f"RMSE: {rmse:.2f}")

# Plot predictions
plt.figure(figsize=(15, 5))
plt.plot(test.index, y_test, label='Actual', marker='o')
plt.plot(test.index, y_pred, label='Predicted', marker='x')
plt.xlabel('Date')
plt.ylabel('Call Volume')
plt.title(f'Call Volume Forecast (MAPE: {mape:.2%})')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('../reports/figures/call_volume_forecast.png')

# Save model
joblib.dump(xgb_forecast, '../models/saved_models/call_volume_forecast.pkl')
print("\nâœ… Forecasting model saved!")