# Forecasting at Scale using ETS and ray

> Forecast the M5 dataset

In [None]:
%%capture
!pip install "statsforecast[ray]" neuralforecast s3fs pyarrow

In [None]:
%%capture
from time import time

import pandas as pd
from neuralforecast.data.datasets.m5 import M5, M5Evaluation
from statsforecast import StatsForecast
from statsforecast.models import ETS

In [None]:
Y_df = pd.read_parquet('s3://m5-benchmarks/data/train/target.parquet')
Y_df = Y_df.rename(columns={
    'item_id': 'unique_id', 
    'timestamp': 'ds', 
    'demand': 'y'
})
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

In [None]:
Y_df.head()

Unnamed: 0,unique_id,ds,y
0,FOODS_1_001_CA_1,2011-01-29,3.0
1,FOODS_1_001_CA_1,2011-01-30,0.0
2,FOODS_1_001_CA_1,2011-01-31,0.0
3,FOODS_1_001_CA_1,2011-02-01,1.0
4,FOODS_1_001_CA_1,2011-02-02,4.0


In [None]:
constant = 10
Y_df['y'] += constant

In [None]:
fcst = StatsForecast(
    df=Y_df, 
    models=[ETS(season_length=7, model='ZNA')], 
    freq='D', 
    #n_jobs=-1
    ray_address='ray://ADDRESS:10001'
)

In [None]:
init = time()
Y_hat = fcst.forecast(28)
end = time()
print(f'Minutes taken by StatsForecast using: {(end - init) / 60}')



Minutes taken by StatsForecast using: 5.4817593971888225


In [None]:
Y_hat['ETS'] -= constant

In [None]:
Y_hat = Y_hat.reset_index().set_index(['unique_id', 'ds']).unstack()
Y_hat = Y_hat.droplevel(0, 1).reset_index()

In [None]:
*_, S_df = M5.load('./data')
Y_hat = S_df.merge(Y_hat, how='left', on=['unique_id'])

100%|███████████████████████████████████████████████████████████| 50.2M/50.2M [00:00<00:00, 77.1MiB/s]


In [None]:
M5Evaluation.evaluate(y_hat=Y_hat, directory='./data')

Unnamed: 0,wrmsse
Total,0.677233
Level1,0.435558
Level2,0.522863
Level3,0.582109
Level4,0.488484
Level5,0.567825
Level6,0.587605
Level7,0.662774
Level8,0.647712
Level9,0.732107
