In [17]:
# %pip install --force-reinstall --no-deps torch==2.2.2 torchvision==0.17.2

In [18]:
# %pip install statsforecast mlforecast pandas polars pyarrow holidays lightgbm neuralforecast

In [19]:
import pandas as pd
from utils.constants import FREQ_DAILY, FREQ_MONTHLY, FAMILY_BASELINE, FAMILY_STATISTICAL, FAMILY_MACHINE_LEARNING, FAMILY_DEEP_LEARNING, SPLIT_VAL
from utils.preprocessing import load_daily_data, load_monthly_data
from utils.baseline import run_baseline_forecast_daily, run_baseline_forecast_monthly
from utils.statistical import run_statistical_forecast_daily, run_statistical_forecast_monthly
from utils.machine_learning import run_machine_learning_forecast_daily, run_machine_learning_forecast_monthly
from utils.deep_learning import run_deep_learning_forecast_daily, run_deep_learning_forecast_monthly
from utils.metrics import calculate_metrics
from utils.metric_utils import load_overall_metrics, merge_prediction_dfs, find_n_best_models

### Loading Data:

In [20]:
daily_train, daily_val, daily_test = load_daily_data(use_existing=True)
monthly_train, monthly_val, monthly_test = load_monthly_data(use_existing=True)

### Forecasts:

##### Baseline Forecast:

In [21]:
base_daily_val, base_daily_test = run_baseline_forecast_daily(daily_train, daily_val, daily_test, use_existing=True)
base_monthly_val, base_monthly_test = run_baseline_forecast_monthly(monthly_train, monthly_val, monthly_test, use_existing=True)

##### Statistical Forecast:

In [22]:
stat_daily_val, stat_daily_test = run_statistical_forecast_daily(daily_train, daily_val, daily_test, use_existing=True)
stat_monthly_val, stat_monthly_test = run_statistical_forecast_monthly(monthly_train, monthly_val, monthly_test, use_existing=True)

##### Machine Learning Forecast:

In [23]:
ml_daily_val, ml_daily_test = run_machine_learning_forecast_daily(daily_train, daily_val, daily_test, use_existing=True)
ml_monthly_val, ml_monthly_test = run_machine_learning_forecast_monthly(monthly_train, monthly_val, monthly_test, use_existing=True)

##### Deep Learning Forecast:

In [24]:
dl_daily_val, dl_daily_test = run_deep_learning_forecast_daily(daily_train, daily_val, daily_test, use_existing=True)
dl_monthly_val, dl_monthly_test = run_deep_learning_forecast_monthly(monthly_train, monthly_val, monthly_test, use_existing=True)

### Metrics:

##### Baseline Metrics:

In [25]:
base_metrics = pd.concat([
    calculate_metrics(base_daily_val, FAMILY_BASELINE, FREQ_DAILY, SPLIT_VAL, use_existing=True),
    calculate_metrics(base_monthly_val, FAMILY_BASELINE, FREQ_MONTHLY, SPLIT_VAL, use_existing=True)
], ignore_index=True).sort_values(by=["MAPE"]).reset_index(drop=True)

##### Statistical Metrics:

In [26]:
stat_metrics = pd.concat([
    calculate_metrics(stat_daily_val, FAMILY_STATISTICAL, FREQ_DAILY, SPLIT_VAL, use_existing=True),
    calculate_metrics(stat_monthly_val, FAMILY_STATISTICAL, FREQ_MONTHLY, SPLIT_VAL, use_existing=True)
], ignore_index=True).sort_values(by=["MAPE"]).reset_index(drop=True)

##### Machine Learning Metrics:

In [27]:
ml_metrics = pd.concat([
    calculate_metrics(ml_daily_val, FAMILY_MACHINE_LEARNING, FREQ_DAILY, SPLIT_VAL, use_existing=True),
    calculate_metrics(ml_monthly_val, FAMILY_MACHINE_LEARNING, FREQ_MONTHLY, SPLIT_VAL, use_existing=True)
], ignore_index=True).sort_values(by=["MAPE"]).reset_index(drop=True)

##### Deep Learning Metrics:

In [28]:
dl_metrics = pd.concat([
    calculate_metrics(dl_daily_val, FAMILY_DEEP_LEARNING, FREQ_DAILY, SPLIT_VAL, use_existing=True),
    calculate_metrics(dl_monthly_val, FAMILY_DEEP_LEARNING, FREQ_MONTHLY, SPLIT_VAL, use_existing=True)
], ignore_index=True).sort_values(by=["MAPE"]).reset_index(drop=True)

##### Overall metrics:

In [29]:
overall_metrics = load_overall_metrics()

In [30]:
merged_daily = merge_prediction_dfs(base_daily_val, base_daily_test, stat_daily_val, stat_daily_test, ml_daily_val, ml_daily_test, dl_daily_val, dl_daily_test)
merged_monthly = merge_prediction_dfs(base_monthly_val, base_monthly_test, stat_monthly_val, stat_monthly_test, ml_monthly_val, ml_monthly_test, dl_monthly_val, dl_monthly_test)

  base_df = pd.concat([base_df_val, base_df_test], ignore_index=True)
  stat_df = pd.concat([stat_df_val, stat_df_test], ignore_index=True)
  ml_df = pd.concat([ml_df_val, ml_df_test], ignore_index=True)
  dl_df = pd.concat([dl_df_val, dl_df_test], ignore_index=True)
  base_df = pd.concat([base_df_val, base_df_test], ignore_index=True)
  stat_df = pd.concat([stat_df_val, stat_df_test], ignore_index=True)
  ml_df = pd.concat([ml_df_val, ml_df_test], ignore_index=True)
  dl_df = pd.concat([dl_df_val, dl_df_test], ignore_index=True)


In [31]:
find_n_best_models(overall_metrics, 3, merged_daily, merged_monthly)

{0: {'name': 'LGBMRegressor_Lag',
  'frequency': 'D',
  'data':     unique_id         ds  LGBMRegressor_Lag
  0     Austria 2025-01-01         101.688448
  1     Austria 2025-01-02         101.802452
  2     Austria 2025-01-03          96.010350
  3     Austria 2025-01-04          89.030283
  4     Austria 2025-01-05          89.496594
  ..        ...        ...                ...
  725   Austria 2026-12-18         104.596315
  726   Austria 2026-12-19          78.149141
  727   Austria 2026-12-20          63.008524
  728   Austria 2026-12-21          95.061265
  729   Austria 2026-12-22          98.135814
  
  [730 rows x 3 columns]},
 1: {'name': 'RandomForestRegressor_Lag',
  'frequency': 'MS',
  'data':    unique_id         ds  RandomForestRegressor_Lag
  0    Austria 2025-01-01                 129.622953
  1    Austria 2025-02-01                  99.973828
  2    Austria 2025-03-01                  91.462505
  3    Austria 2025-04-01                  94.605130
  4    Austria 2025-