In [1]:
import pandas as pd
from statsforecast import StatsForecast
from statsforecast.models import HoltWinters
from utils import preprocess
from tqdm import tqdm

df = pd.read_csv('data/01_input_history.csv')
df.head()
df_train_null, df_train_inactive, df_train_active = preprocess.preprocess_ex1_final_sub(df)

In [2]:
# Forecasting horizon
H = 12  

sf = StatsForecast(
    models=[
        HoltWinters(season_length=12, error_type="A"),
    ],
    freq='MS',
    n_jobs=-1
)

# Fit the model
sf.fit(
    df_train_active[['unique_id', 'ds', 'Quantity']],
    target_col='Quantity',
    )

StatsForecast(models=[HoltWinters])

In [3]:
y_hat = sf.predict(h=H)
y_hat['Quantity'] = y_hat[['HoltWinters']].astype(int)

In [4]:
import numpy as np
zeros_ids = list(df_train_null['unique_id'].unique()) + list(df_train_inactive['unique_id'].unique())

# Generate monthly dates for 2024
date_range = pd.date_range(start='2024-01-01', periods=12, freq='MS')

# Create the expanded dataframe
df_zeros = pd.DataFrame({
    'unique_id': np.repeat(zeros_ids, len(date_range)),
    'ds': np.tile(date_range, len(zeros_ids)),
    'Quantity': 0
})

In [5]:
final_df = pd.concat([y_hat.drop(columns='HoltWinters'), df_zeros], ignore_index=True)
final_df.shape

(12000, 3)

In [6]:
def restore_original_format(date_column):
    return date_column.dt.strftime('%b%Y')
final_df['Month'] = restore_original_format(final_df['ds'])

In [7]:
for id in final_df['unique_id'].unique():
    final_df.loc[final_df['unique_id'] == id, 'Country'] = id.split('_')[0]
    final_df.loc[final_df['unique_id'] == id, 'Product'] = id.split('_')[1]

In [8]:
final_df[['Country','Product','Month','Quantity']].to_csv('submissions/01_output_prediction_1239.csv', index=False)