In [14]:
import logging
import os
import random
import time
import warnings
warnings.filterwarnings("ignore")
from itertools import product
from multiprocessing import cpu_count, Pool # for prophet

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from prophet import Prophet
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA, _TS
from statsmodels.graphics.tsaplots import plot_acf
from sklearn.model_selection import ParameterGrid
from utilsforecast.plotting import plot_series

from importlib import reload
import utils
reload(utils)
from utils import prepare_data, TimeMoEPredictor, chronosPredictor, LLM, wape


from utilsforecast.losses import *

from functools import partial


from utilsforecast.evaluation import evaluate

In [2]:
train, test = prepare_data(sample_size=1000,series_cutoff=48)

In [3]:
train["unique_id"].nunique()

969

In [4]:
def fit(index, ts):
    df = ts.drop(columns='unique_id', axis=1)
    
    model = Prophet(
                    weekly_seasonality=False,
                    daily_seasonality=False,
                    yearly_seasonality=True,
                    )
    
    model = model.fit(df)
    
    return model


In [5]:
init = time.time()
with Pool(cpu_count()) as pool:
    forecast_prophet = pool.starmap(fit, train.groupby('unique_id'))
end = time.time()
time_prophet = end - init
time_prophet


DEBUG:cmdstanpy:input tempfile: /tmp/tmpz1pz5ygr/2wdnbzfm.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp9_34mno3/ppgymnc6.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpz1pz5ygr/d2x_uru6.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:input tempfile: /tmp/tmpsc7_003v/x2u9adg1.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp4vro7tuy/dcd_gc4f.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqp8pp7jl/_4754dmc.json
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:input tempfile: /tmp/tmp4g17ohql/opdpvvx4.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpsc7_003v/bxx107im.json
DEBUG:cmdstanpy:CmdStan args: ['/home/jan/m4_tests/.venv/lib/python3.12/site-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=3161', 'data', 'file=/tmp/tmpz1pz5ygr/2wdnbzfm.json', 'init=/tmp/tmpz1pz5ygr/d2x_uru6.json', 'output', 'file=/tmp/tmpz1pz5ygr/prophet_modelb71cknhk/prophet_model-20241207162357.csv', 'method=optimize', 'algorithm=newton', 'iter=10000']
DEBUG:cmdstanpy:input tempfile: /tmp/tmpetir1y

23.729613542556763

In [6]:
def predict_prophet(model):
    fc = (model.make_future_dataframe(periods=3, 
                                            include_history=False, 
                                            freq='M'))
    
    return model.predict(fc)

with Pool(cpu_count()) as pool:
    forecasts = pool.map(predict_prophet, forecast_prophet)


In [7]:
t = pd.concat([test[["unique_id","y"]].reset_index(),pd.concat(forecasts).reset_index()],axis=1).drop(columns="index").rename(columns = {"yhat":"prophet"})[["unique_id", "y", "ds", "prophet"]]

In [15]:
metrics = [
    mape,
    wape,
    rmse,
    partial(mase, seasonality=12),
    partial(rmsse, seasonality=12),
]

In [16]:
eval = evaluate(t, metrics=metrics, train_df=train)

In [17]:
metrics = eval.set_index(["unique_id","metric"]).stack().rename_axis(index={None: 'Model'}).rename("value")

In [18]:
metrics.groupby(level=[1,2]).median().unstack()

Model,prophet
metric,Unnamed: 1_level_1
mape,0.064713
mase,0.799822
rmse,262.093248
rmsse,0.714715
wape,0.062349


In [19]:
weights = (
    train
        .groupby("unique_id",)["y"]
        .sum()
        .rename("weights")
)

weights = weights/weights.sum()

In [20]:
(
    metrics[metrics.index.get_level_values(level=1).isin(["rmsse","rmse"])]
        .reset_index().merge(weights, on = "unique_id")
        .assign(metric = lambda x : "w"+x["metric"],
                value = lambda x : x.value*x.weights
                )
        .groupby(["metric","Model"])["value"]
        .sum()
        .sort_values()
)

metric  Model  
wrmsse  prophet      0.885253
wrmse   prophet    924.081849
Name: value, dtype: float64