In [13]:
import pandas as pd 
import numpy as np 

from statsforecast import StatsForecast
from statsforecast.models import (
    AutoETS,
    DynamicOptimizedTheta,
    AutoCES,
    SeasonalWindowAverage,
    MSTL,
    AutoARIMA
    )

skip = [f"d_{i}" for i in range(1, 700 + 1)] 
HORIZON = 28 # horizon de forecast

# chargement des données format colonnes
train = pd.read_csv("/home/jupyter/mawa/data/sales_train_evaluation.csv") # .drop(skip, axis=1)

# mapping entre les valeurs "d_1 ... d_2 ..etc." et une date physique
date_range = pd.DatetimeIndex(pd.date_range("2011-01-29", periods=train.shape[1], freq="D"))
date_mapper = {f'd_{i+1}':date for i, date in enumerate(date_range)}

# formattage ligne et renommage des colonnes pour respecter les conventions de la librairie.
train = (train
         # .loc[:ntimeseries]
         .pipe(pd.melt, 
               id_vars=["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"],
               var_name='d',
               value_name="sold"
               )
         .assign(date=lambda x : x['d'].map(date_mapper))
         .rename(columns={
             'id': 'unique_id', 
             'date': 'ds', 
             'sold': 'y'
             }) 
         )[["unique_id", "ds", "y"]]

# extract all series id
train.head(1)

Unnamed: 0,unique_id,ds,y
0,HOBBIES_1_001_CA_1_evaluation,2011-01-29,0


In [None]:
# on s'attend à un effet hebdomadaire.
EXPECTED_SEASONAL_VALUE = 7
HORIZON = 28

# Les modèles qui vont être "fitté"
models = [
    AutoETS(season_length=EXPECTED_SEASONAL_VALUE),
    DynamicOptimizedTheta(season_length=EXPECTED_SEASONAL_VALUE),
    AutoCES(season_length=EXPECTED_SEASONAL_VALUE),
    MSTL(
        season_length=[EXPECTED_SEASONAL_VALUE, 90], # seasonalities of the time series
        trend_forecaster=AutoARIMA() # model used to forecast trend
    )
]

# le wrapper.
wrapper_models = StatsForecast( 
    models=models,
    freq='D', 
    n_jobs=-1,
    fallback_model=SeasonalWindowAverage(season_length=EXPECTED_SEASONAL_VALUE, window_size=HORIZON)
)

fcst_df = wrapper_models.forecast(df=train, h=HORIZON)
fcst_df.head()

In [19]:
fcst_df.head()

Unnamed: 0_level_0,ds,AutoETS,DynamicOptimizedTheta,CES,MSTL
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
FOODS_1_001_CA_1_evaluation,2016-05-23,0.716969,0.489668,0.711507,-0.472595
FOODS_1_001_CA_1_evaluation,2016-05-24,0.684697,0.452394,0.684231,2.292164
FOODS_1_001_CA_1_evaluation,2016-05-25,0.742144,0.511703,0.68918,-0.167892
FOODS_1_001_CA_1_evaluation,2016-05-26,0.670335,0.4395,0.626392,0.697744
FOODS_1_001_CA_1_evaluation,2016-05-27,0.955799,0.725693,0.849806,0.890653


In [None]:
fcst_df.to_pickle('fcst.pkl')

In [None]:
def make_submission(test, fcst_col="", filename=""):
    # private leaderboard
    submission = pd.read_csv("/home/jupyter/mawa/data/sample_submission.csv")
    predictions = test[['id', 'ds', fcst_col]]
    predictions = pd.pivot(predictions, index='id', columns='ds', values=fcst_col).reset_index()
    predictions.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]

    # public leaderboard set to 0
    evaluation_rows = [row for row in submission['id'] if 'evaluation' in row]
    evaluation = submission[submission['id'].isin(evaluation_rows)]
    evaluation["id"] = evaluation["id"].str.replace("evaluation", "validation")

    validation = submission[['id']].merge(predictions, on='id')
    final = pd.concat([validation, evaluation])
    final.iloc[:, 1:] = final.iloc[:, 1:].astype(np.float32)
    final.to_csv(f'submission_{filename}.csv.gz', index=False, compression="gzip")


for fcst_model in ['AutoETS', "DynamicOptimizedTheta", "CES",  "MSTL"]:
    
    make_submission((fcst_df
                     .reset_index()
                     .assign(yhat=lambda x: pd.to_numeric(np.around(x[fcst_model], 2).clip(0, None)))
                     .assign(id=lambda x : x['unique_id'])
                    ),
                    fcst_col="yhat", filename=f"statsforecast_{fcst_model}"
                   )
               

In [None]:
valid = train[['id']+ train.columns[-28:].tolist()]

valid = (valid
         .pipe(lambda ds : ds.loc[ds['id'].isin(train['unique_id'].unique())])
         .pipe(pd.melt, 
               id_vars=["id"],
               var_name='d',
               value_name="sold"
               )
         .assign(date=lambda x : x['d'].map(date_mapper))
         .rename(columns={
             'id': 'unique_id', 
             'date': 'ds', 
             'sold': 'y'
             }) 
         )[["unique_id", "ds", "y"]]

In [None]:

# Plot to unique_ids and some selected models
wrapper_models.plot(valid, fcst_df, 
        models=["AutoARIMA","DynamicOptimizedTheta", "MSTL"], 
        unique_ids=valid['unique_id'].unique()[:5], 
        # level=[90], 
        engine='plotly'
        )