# Modelando e Realizando o Forecasting

In [None]:
import pandas as pd
import numpy as np
from statsforecast import StatsForecast
from statsforecast.models import Naive, SeasonalNaive, SeasonalWindowAverage, AutoARIMA

### Informações adiconais:

Para essa aula utilizamos o arquivo **train.csv** dessa base aqui: https://www.kaggle.com/competitions/store-sales-time-series-forecasting/data?select=train.csv

In [116]:
url= 'data/train.csv'

df_competicao = pd.read_csv(url, index_col='id', parse_dates=['date'])

df_competicao

Unnamed: 0_level_0,date,store_nbr,family,sales,onpromotion
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2013-01-01,1,AUTOMOTIVE,0.000,0
1,2013-01-01,1,BABY CARE,0.000,0
2,2013-01-01,1,BEAUTY,0.000,0
3,2013-01-01,1,BEVERAGES,0.000,0
4,2013-01-01,1,BOOKS,0.000,0
...,...,...,...,...,...
3000883,2017-08-15,9,POULTRY,438.133,0
3000884,2017-08-15,9,PREPARED FOODS,154.553,1
3000885,2017-08-15,9,PRODUCE,2419.729,148
3000886,2017-08-15,9,SCHOOL AND OFFICE SUPPLIES,121.000,8


Para não precisarmos trabalhar com todas as lojas presentes na colune **store_nbr**, iremos escolher apenas uma.

In [117]:
df_competicao['store_nbr'].nunique()

54

In [118]:
df_loja_um = df_competicao.loc[df_competicao['store_nbr'] == 1, ['date', 'family', 'sales']]

Quando estamos trabalhando com times series, precisamos que a nossa base de dados siga um padrão de nomeclatura.

In [119]:
df_loja_um = df_loja_um.rename(columns={'date': 'ds', 'sales': 'y', 'family': 'unique_id'})

In [120]:
df_loja_um

Unnamed: 0_level_0,ds,unique_id,y
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,2013-01-01,AUTOMOTIVE,0.000000
1,2013-01-01,BABY CARE,0.000000
2,2013-01-01,BEAUTY,0.000000
3,2013-01-01,BEVERAGES,0.000000
4,2013-01-01,BOOKS,0.000000
...,...,...,...
2999134,2017-08-15,POULTRY,234.892000
2999135,2017-08-15,PREPARED FOODS,42.822998
2999136,2017-08-15,PRODUCE,2240.230000
2999137,2017-08-15,SCHOOL AND OFFICE SUPPLIES,0.000000


## Tipos de Validação

Iremos pegar 2013 completo para treino e os 3 primeiros meses de 2014 para validar.

In [121]:
treino = df_loja_um.loc[df_loja_um['ds'] < '2014-01-01']

valid = df_loja_um[(df_loja_um['ds'] >= '2014-01-01') & (df_loja_um['ds'] <= '2014-04-01')]

h = valid['ds'].nunique()

In [122]:
h

91

In [123]:
def wmape(y_true, y_pred):
    return np.abs(y_true - y_pred).sum() / np.abs(y_true).sum()

## Criando o modelo

In [124]:
model = StatsForecast(models=[Naive()], freq='D', n_jobs=-1)

model.fit(treino)

forecast_df = model.predict(h=h, level=[90])

forecast_df = forecast_df.reset_index().merge(valid, on=['ds', 'unique_id'], how='left')

wmape1 = wmape(forecast_df['y'].values, forecast_df['Naive'].values)

print(f'WMAPE: {wmape1: .2%}')

WMAPE:  57.36%


## Modelo de sazonalidade

Agora iremos levar em consideração no nosso modelo de base a nossa sazonalidade.

In [125]:
modelS = StatsForecast(models=[SeasonalNaive(season_length=7)], freq='D', n_jobs=-1)

modelS.fit(treino)

forecast_dfs = modelS.predict(h=h, level=[90])

forecast_dfs = forecast_dfs.reset_index().merge(valid, on=['ds', 'unique_id'], how='left')

wmape2 = wmape(forecast_dfs['y'].values, forecast_dfs['SeasonalNaive'].values)

print(f'WMAPE: {wmape2: .2%}')

WMAPE:  50.84%


## Modelo de sazonalidade junto com a média model

Agora iremos levar em consideração a sazonalidade e a média model.

In [126]:
modelSM = StatsForecast(models=[SeasonalWindowAverage(season_length=7, window_size=2)], freq='D', n_jobs=-1)

modelSM.fit(treino)

forecast_dfsm = modelSM.predict(h=h)

forecast_dfsm = forecast_dfsm.reset_index().merge(valid, on=['ds', 'unique_id'], how='left')

wmape3 = wmape(forecast_dfsm['y'].values, forecast_dfsm['SeasWA'].values)

print(f'WMAPE: {wmape3: .2%}')

WMAPE:  52.74%


## Modelo ARIMA

In [127]:
modelA = StatsForecast(models=[AutoARIMA(season_length=7)], freq='D', n_jobs=-1)

modelA.fit(treino)

forecast_dfA = modelA.predict(h=h, level=[90])

forecast_dfA = forecast_dfA.reset_index().merge(valid, on=['ds', 'unique_id'], how='left')

wmape4 = wmape(forecast_dfA['y'].values, forecast_dfA['AutoARIMA'].values)

print(f'WMAPE: {wmape4: .2%}')

WMAPE:  53.20%
