In [1]:
import pandas as pd
from statsforecast import StatsForecast
from statsforecast.models import HoltWinters,AutoARIMA
from utils import preprocess
from tqdm import tqdm

df = pd.read_csv('data/01_input_history.csv')
df.head()
df_train_null, df_train_inactive, df_train_active, df_validation = preprocess.preprocess_ex1(df)

In [None]:
# # Scaler
# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()
# df_train_active_scaled = df_train_active.copy()
# df_train_active_scaled['Quantity'] = scaler.fit_transform(df_train_active_scaled[['Quantity']])

In [2]:
# Forecasting horizon
H = 12  # 12 months ahead

# Set up the model
sf = StatsForecast(
    models=[
        HoltWinters(season_length=12, error_type="A")
        #GARCH(p=1, q=1),
        #AutoARIMA(season_length=12)
    ],
    freq='MS',
    n_jobs=-1
)

# Fit the model
sf.fit(
    df_train_active[['unique_id', 'ds', 'Quantity']],
    target_col='Quantity',
    )

StatsForecast(models=[HoltWinters])

In [3]:
y_hat = sf.predict(h=H)
y_hat

Unnamed: 0,unique_id,ds,HoltWinters
0,Australia_BrightBreeze Insect Repellent,2023-01-01,846.067078
1,Australia_BrightBreeze Insect Repellent,2023-02-01,842.904175
2,Australia_BrightBreeze Insect Repellent,2023-03-01,857.736328
3,Australia_BrightBreeze Insect Repellent,2023-04-01,879.504761
4,Australia_BrightBreeze Insect Repellent,2023-05-01,896.190125
...,...,...,...
4855,United Kingdom_SunShield SPF 50 Lotion,2023-08-01,1880.781982
4856,United Kingdom_SunShield SPF 50 Lotion,2023-09-01,1998.426636
4857,United Kingdom_SunShield SPF 50 Lotion,2023-10-01,1931.850586
4858,United Kingdom_SunShield SPF 50 Lotion,2023-11-01,1937.193604


In [4]:
# Inverse transform the scaled predictions
# y_hat['HoltWinters'] = scaler.inverse_transform(y_hat[['HoltWinters']]).astype(int)
# y_hat['MSTL'] = scaler.inverse_transform(y_hat[['MSTL']]).astype(int)
y_hat['HoltWinters'] = y_hat[['HoltWinters']].astype(int)
#y_hat['AutoARIMA'] = y_hat[['AutoARIMA']].astype(int)
#y_hat['MSTL'] = y_hat[['MSTL']].astype(int)

In [5]:
df_validation['Forecast'] = 0

In [6]:
for id in tqdm(df_train_active['unique_id'].unique()):
    df_validation.loc[df_validation['unique_id'] == id, 'Forecast'] = y_hat.loc[y_hat['unique_id'] == id, 'HoltWinters'].values

100%|██████████| 405/405 [00:00<00:00, 1278.97it/s]


In [7]:
def restore_original_format(date_column):
    return date_column.dt.strftime('%b%Y')
df_validation['Month'] = restore_original_format(df_validation['ds'])

In [8]:
df_validation[['Country','Product','Month','Quantity']].to_csv('submissions/validation_classical.csv', index=False)

df_forecastHW = df_validation[['Country','Product','Month','Forecast']].copy()
df_forecastHW.rename(columns={'Forecast': 'Quantity'}, inplace=True)
df_forecastHW[['Country','Product','Month','Quantity']].to_csv('submissions/submission_HW.csv', index=False)

# df_forecastMSTL = df_validation[['ForecastMSTL','Country','Product','Month']].copy()
# df_forecastMSTL.rename(columns={'ForecastMSTL': 'Quantity'}, inplace=True)
# df_forecastMSTL[['Quantity','Country','Product','Month']].to_csv('submissions/submission_MSTL.csv', index=False)

# df_forecastGARCH = df_validation[['ForecastGARCH(1,1)','Country','Product','Month']].copy()
# df_forecastGARCH.rename(columns={'ForecastGARCH(1,1)': 'Quantity'}, inplace=True)
# df_forecastGARCH[['Quantity','Country','Product','Month']].to_csv('submissions/submission_GARCH.csv', index=False)

In [9]:
df_forecastHW

Unnamed: 0,Country,Product,Month,Quantity
0,Japan,MorningMint,Jan2023,0
1,Japan,MorningMint,Feb2023,0
2,Japan,MorningMint,Mar2023,0
3,Japan,MorningMint,Apr2023,0
4,Japan,MorningMint,May2023,0
...,...,...,...,...
11995,Russia,HydratingHoney Pet Wipes,Aug2023,0
11996,Russia,HydratingHoney Pet Wipes,Sep2023,0
11997,Russia,HydratingHoney Pet Wipes,Oct2023,0
11998,Russia,HydratingHoney Pet Wipes,Nov2023,0
