In [89]:
import pandas as pd
import matplotlib.pyplot as plt
# Modeling
from neuralforecast import NeuralForecast
from neuralforecast.models import LSTM #DeepAR,NHITS,RNN,

import torch
from typing import Union
from neuralforecast.losses.pytorch import MSE
from neuralforecast.losses.pytorch import BasePointLoss
from neuralforecast import NeuralForecast
from neuralforecast.models import LSTM

from neuralforecast.utils import augment_calendar_df



from utils.losses import customLoss

In [90]:
df = pd.read_csv('data/01_input_history.csv')
df.head()

Unnamed: 0,Country,Product,Month,Quantity
0,Japan,MorningMint,Jan2004,0
1,Japan,MorningMint,Feb2004,0
2,Japan,MorningMint,Mar2004,0
3,Japan,MorningMint,Apr2004,0
4,Japan,MorningMint,May2004,0


In [91]:
# pre process
from utils import preprocess

df_train_null, df_train_inactive, df_train_active, df_validation = preprocess.preprocess_ex1(df)

In [4]:
# merge the inactive to the active
# print(df_active.shape)
# print(df_inactive.shape)

df_train_merged = pd.merge(df_train_active, df_train_inactive, how='outer', on=['unique_id', 'ds', 'Quantity', 'Country', 'Product'])
# df_merged.head()


# create a static dataframe
df_train_static = df_train_merged[['unique_id', 'Country', 'Product']].drop_duplicates().reset_index(drop=True)
df_train_static = pd.get_dummies(df_train_static, columns=['Country', 'Product'], drop_first=True)
assert df_train_static.shape[0] == df_train_merged['unique_id'].nunique(), 'The number of unique_id in static and merged dataframes do not match!'
# df_static.head()

In [5]:
df_train_merged = augment_calendar_df(df_train_merged, freq='M')[0]
df_validation = augment_calendar_df(df_validation, freq='M')[0]

In [6]:
FORECASTING_HORIZON = 12 # one year ahead forecast

In [8]:
custom_loss = customLoss()

nf = NeuralForecast(
    models=[
        # Model 1: Long-short term memory
        LSTM(
            h =FORECASTING_HORIZON,
            input_size= FORECASTING_HORIZON*2,
            loss=custom_loss,
            encoder_n_layers = 2,
            encoder_hidden_size= 128,
            decoder_hidden_size=  128,
            decoder_layers= 2,
            futr_exog_list = ['month'],
            stat_exog_list = df_train_static.columns.tolist()[1:],
            batch_size = 128,
            learning_rate= 1e-3,
            max_steps = 2000,
            scaler_type = None,
            random_seed=42
        )
        # Model 2: 
        
    ],
    freq='MS',

)

Seed set to 42


In [10]:
# nf.fit(
#     df = df_train_merged[['unique_id','ds','Quantity',  'month']],
#     static_df= df_train_static,
#     #val_size= 12
#     # id_col = 'unique_id'
#     target_col = 'Quantity'
# )

In [11]:
# save the model 
# nf.save('models/lstm_model')

In [13]:
# nf = NeuralForecast.load('models/lstm_model')

In [17]:
future_df = nf.make_future_dataframe()

In [None]:
future_df = augment_calendar_df(future_df, freq='M')[0]

In [20]:
future_df

Unnamed: 0,unique_id,ds,month
0,Australia_BrightBreeze Insect Repellent,2023-01-01,-0.500000
1,Australia_BrightBreeze Insect Repellent,2023-02-01,-0.409091
2,Australia_BrightBreeze Insect Repellent,2023-03-01,-0.318182
3,Australia_BrightBreeze Insect Repellent,2023-04-01,-0.227273
4,Australia_BrightBreeze Insect Repellent,2023-05-01,-0.136364
...,...,...,...
6595,United Kingdom_SunShield SPF 50 Lotion,2023-08-01,0.136364
6596,United Kingdom_SunShield SPF 50 Lotion,2023-09-01,0.227273
6597,United Kingdom_SunShield SPF 50 Lotion,2023-10-01,0.318182
6598,United Kingdom_SunShield SPF 50 Lotion,2023-11-01,0.409091


In [92]:
y_hat = nf.predict(
    futr_df = future_df,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

In [93]:
y_hat=y_hat.loc[y_hat['unique_id'].isin(df_train_active['unique_id'].unique()),:]

In [94]:
df_forecast = df_validation[['unique_id', 'ds']].copy()
df_forecast['Quantity'] = 0

for id in y_hat['unique_id'].unique():
    df_forecast.loc[df_forecast['unique_id'] == id, 'Quantity'] = y_hat.loc[y_hat['unique_id'] == id, 'LSTM'].values.astype(int)
    

In [95]:
def restore_original_format(date_column):
    return date_column.dt.strftime('%b%Y')

In [96]:
def submission_formatter(df):
    restored_df = df.copy()
    restored_df['Country'] = df['unique_id'].str.split('_').str[0]
    restored_df['Product'] = df['unique_id'].str.split('_').str[1]
    restored_df['Month'] = restore_original_format(restored_df['ds'])
    restored_df.drop(columns=['unique_id', 'ds'], inplace=True)
    return restored_df

    
df_forecast = submission_formatter(df_forecast)
df_validation = submission_formatter(df_validation)

In [None]:
# save to csv
df_forecast.to_csv('submissions/submission_lstm.csv', index=False)
df_validation.to_csv('submissions/validation_lstm.csv', index=False)

Unnamed: 0,Quantity,Country,Product,Month
0,0,Japan,MorningMint,Jan2023
1,0,Japan,MorningMint,Feb2023
2,0,Japan,MorningMint,Mar2023
3,0,Japan,MorningMint,Apr2023
4,0,Japan,MorningMint,May2023
...,...,...,...,...
11995,0,Russia,HydratingHoney Pet Wipes,Aug2023
11996,0,Russia,HydratingHoney Pet Wipes,Sep2023
11997,0,Russia,HydratingHoney Pet Wipes,Oct2023
11998,0,Russia,HydratingHoney Pet Wipes,Nov2023
