In [1]:
# notebook configuration
# if '/sf/' in pwd:
#     COLAB, SAGE = False, False
# elif 'google.colab' in str(get_ipython()):
#     COLAB, SAGE = True, False # do colab-specific installs later
# else:
#     COLAB, SAGE = False, True
    
CONTEXT = 'local' # or 'colab', 'sage', 'kaggle'
USE_GPU = True 
%config Completer.use_jedi = False

In [2]:
# basic imports
from pathlib import Path
import os
import math
from datetime import datetime
import random

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


import requests # for telegram notifications
from tqdm.notebook import tqdm

from joblib import dump, load

In [3]:
# time series
import tsfresh

import darts
from darts import TimeSeries
# from darts.models import ExponentialSmoothing, AutoARIMA, ARIMA, Prophet, RandomForest, RegressionEnsembleModel, RegressionModel, TFTModel, TCNModel, TransformerModel, NBEATSModel
from darts.metrics import smape

import holidays


import torch

# tracking 
import wandb
from wandb.xgboost import wandb_callback
from wandb.lightgbm import wandb_callback
os.environ['WANDB_NOTEBOOK_NAME'] = f"nb_{datetime.now().strftime('%Y%m%d')}.ipynb"

In [4]:
from darts.models import (
    NaiveSeasonal,
    NaiveDrift,
#     Prophet, # on 20220108 postponing this due to df vs ts object wrinkles
    ExponentialSmoothing,
    ARIMA,
    AutoARIMA,
    RegressionEnsembleModel,
    RegressionModel,
    Theta,
    FFT
)

from prophet import Prophet # for now, just imporing the native API
from neuralprophet import NeuralProphet

In [5]:
# deep learning
import torch
# from torch.optim import Adam, AdamW, Adagrad, SGD, RMSprop, LBFGS
# from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts, CyclicLR, OneCycleLR, StepLR, CosineAnnealingLR

# widedeep
# from pytorch_widedeep import Trainer
# from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor
# from pytorch_widedeep.models import Wide, TabMlp, WideDeep, SAINT#, TabTransformer, TabNet, TabFastFormer, TabResnet
# from pytorch_widedeep.metrics import Accuracy
# from pytorch_widedeep.callbacks import EarlyStopping, LRHistory, ModelCheckpoint

In [6]:
if CONTEXT == 'colab':
    # mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')
    
    # handling datapath
    # datapath = Path('/content/drive/MyDrive/kaggle/tabular_playgrounds/dec2021/')
    root = Path('') # TODO

elif CONTEXT == 'sage':
    root = Path('') # TODO
    
elif CONTEXT == 'kaggle':
    root = Path('') # TODO
    
else: # if on local machine
    root = Path('/media/sf/easystore/kaggle_data/tabular_playgrounds/jan2022/')
    datapath = root/'datasets'
    # edapath = root/'EDA'
    # modelpath = Path('/media/sf/easystore/kaggle_data/tabular_playgrounds/oct2021/models/')
    predpath = root/'preds'
    subpath = root/'submissions'
    studypath = root/'studies'
    
    for pth in [datapath, predpath, subpath, studypath]:
        pth.mkdir(exist_ok=True)

In [7]:
SEED = 42

# Function to seed everything but the models
def seed_everything(seed, pytorch=True, reproducible=True):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    if pytorch:
        torch.manual_seed(seed) # set torch CPU seed
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(seed) # set torch GPU(s) seed(s)
        if reproducible and torch.backends.cudnn.is_available():
            torch.backends.cudnn.deterministic = True
            torch.backends.cudnn.benchmark = False

seed_everything(seed=SEED)

In [8]:
def reduce_memory_usage(df, verbose=True):
    """
    Function to reduce memory usage by downcasting datatypes in a Pandas DataFrame when possible.
    
    h/t to Bryan Arnold (https://www.kaggle.com/puremath86/label-correction-experiments-tps-nov-21)
    """
    
    numerics = ["int8", "int16", "int32", "int64", "float16", "float32", "float64"]
    start_mem = df.memory_usage().sum() / 1024 ** 2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == "int":
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if (
                    c_min > np.finfo(np.float16).min
                    and c_max < np.finfo(np.float16).max
                ):
                    df[col] = df[col].astype(np.float16)
                elif (
                    c_min > np.finfo(np.float32).min
                    and c_max < np.finfo(np.float32).max
                ):
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024 ** 2
    if verbose:
        print(
            "Mem. usage decreased to {:.2f} Mb ({:.1f}% reduction)".format(
                end_mem, 100 * (start_mem - end_mem) / start_mem
            )
        )
    return df

In [9]:
tg_api_token = 'your_api_token' # for Galileo (jupyter_watcher_bot) on Telegram
tg_chat_id = 'your_chat_id'

import requests

def send_tg_message(text='Cell execution completed.'):  
    """
    h/t Ivan Dembicki Jr. for the base version 
    (https://medium.com/@ivan.dembicki.jr/notifications-in-jupyter-notebook-with-telegram-f2e892c55173)
    """
    requests.post('https://api.telegram.org/' +  'bot{}/sendMessage'.format(tg_api_token),
                  params=dict(chat_id=tg_chat_id, text=text))

In [10]:
def SMAPE(y_true, y_pred):
    '''
    h/t Jean-François Puget (@CPMP) -- see https://www.kaggle.com/c/web-traffic-time-series-forecasting/discussion/36414
    '''
    denominator = (y_true + np.abs(y_pred)) / 200.0
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0.0
    return np.mean(diff)

In [11]:
# dataset_params will initially include either trivial class instances or loaded, precomputed artifacts
dataset_params = {
    'train_source': str(datapath/'train.csv'),
#     'target_source': str(datapath/'train.csv'),
    'test_source': str(datapath/'test.csv'),
    # 'scaler': str(RobustScaler()),
    # 'pca': str(load(datapath/'pca_mle-RobustScaled_orig_trainset.joblib')),
    # 'umap': str(load(datapath/'umap_reducer-20211107-n_comp10-n_neighbors15-rs42-pca_mle-RobustScaled_orig_trainset.joblib')),
}   

# referring back to the already-entered attributes, specify how the pipeline was sequenced
# dataset_params['preprocessing_pipeline'] = str([dataset_params['scaler'], dataset_params['pca'], dataset_params['umap']]) # ACTUALLY this is unwieldy
# dataset_params['preprocessing_pipeline'] = '[scaler, pca, umap]' # more fragile, but also more readable

# now, load the datasets and generate more metadata from them
df_train = pd.read_csv(datapath/'train.csv')
df_test = pd.read_csv(datapath/'test.csv')

In [12]:
df_train['date'] = pd.to_datetime(df_train['date'])
df_test['date'] = pd.to_datetime(df_test['date'])

countries = ['Sweden', 'Finland', 'Norway']
stores = ['KaggleMart', 'KaggleRama']
products = ['Kaggle Mug', 'Kaggle Hat', 'Kaggle Sticker']

In [13]:
# Check if date is a holiday    
def isHoliday(country, date):
    # h/t @sumeetbohra for following (https://www.kaggle.com/sumeetbohra/eda-dataviz-fe-lightgbm)
    country_holidays = holidays.CountryHoliday(country, years = date.year)
    return int(date in country_holidays)

In [14]:
# df_train['isHoliday'] = df_train.apply(lambda x: isHoliday(x['country'], x['date'].date()), axis = 1)
# df_test['isHoliday'] = df_test.apply(lambda x: isHoliday(x['country'], x['date'].date()), axis = 1)

In [15]:
# norway_2018 = holidays.CountryHoliday('Norway', years=2018)

In [16]:
# norway_2018.keys()

In [17]:
# # in native Prophet API, holidays must be a DataFrame with "ds" and "holiday" columns.
# holidays_train = pd.DataFrame({
#     'ds': df_train['date'],
#     'holiday': df_train['isHoliday']
# })

In [18]:
df_train.tail(20)

       row_id       date  country       store         product  num_sold
26278   26278 2018-12-30   Sweden  KaggleRama      Kaggle Hat      2138
26279   26279 2018-12-30   Sweden  KaggleRama  Kaggle Sticker       587
26280   26280 2018-12-31  Finland  KaggleMart      Kaggle Mug       469
26281   26281 2018-12-31  Finland  KaggleMart      Kaggle Hat       822
26282   26282 2018-12-31  Finland  KaggleMart  Kaggle Sticker       238
26283   26283 2018-12-31  Finland  KaggleRama      Kaggle Mug       831
26284   26284 2018-12-31  Finland  KaggleRama      Kaggle Hat      1231
26285   26285 2018-12-31  Finland  KaggleRama  Kaggle Sticker       360
26286   26286 2018-12-31   Norway  KaggleMart      Kaggle Mug       728
26287   26287 2018-12-31   Norway  KaggleMart      Kaggle Hat      1124
26288   26288 2018-12-31   Norway  KaggleMart  Kaggle Sticker       351
26289   26289 2018-12-31   Norway  KaggleRama      Kaggle Mug      1383
26290   26290 2018-12-31   Norway  KaggleRama      Kaggle Hat   

In [19]:
training_params = {
    'general_random_state': SEED,
    'model_goal': 'forecasting', # or 'residual', in boosted hybrids
#     'cross_validation_type': 'holdout',
#     'validation_set_size': 0.25,
}

# following are only applicable for residual models in time series context
# folds = 5
# training_params['cross_val_strategy'] = StratifiedKFold(n_splits=folds, shuffle=True, random_state=SEED)

In [20]:
# will add kwargs later
model_params = {
    'architecture': 'NeuralProphet',
    'library': 'neuralprophet',
    'hyperparams': {}
}

In [21]:
# wandb config:
wandb_config = {
    'name': f"{os.environ['WANDB_NOTEBOOK_NAME'][:-6]}_{datetime.now().strftime('%H%M%S')}", # just removes the .ipynb extension, leaving the notebook filename's stem
    'tags': ['baseline', 'forecasting', 'neuralprophet'],
    'notes': "Baseline run for Neural Prophet via native API as forecaster, following @gunesevitan's notebook."
}

In [22]:
folds = [
    ('2015-01-01', '2018-01-01'),
    ('2018-01-01', '2019-01-01'),
]

In [23]:
prophet_kwargs = {
    'growth':'linear',
#     'holidays':holidays_train, # will add this in-function
    'n_changepoints':10,
    'changepoint_range':0.4,
    'yearly_seasonality':True,
    'weekly_seasonality':True,
    'daily_seasonality':False,
    'seasonality_mode':'additive',
    'seasonality_prior_scale':25,
    'holidays_prior_scale':100,
    'changepoint_prior_scale':0.01,
    'interval_width':0.5,
    'uncertainty_samples':False
}

neuralprophet_kwargs = {
    'growth':'linear',
    'n_changepoints':10,
    'changepoints_range':0.4,
    'trend_reg':1,
    'trend_reg_threshold':False,
    'yearly_seasonality':True,
    'weekly_seasonality':True,
    'daily_seasonality':False,
    'seasonality_mode':'additive',
    'seasonality_reg':1,
    'n_forecasts':365,
    'normalize':'off'
}

model_params['hyperparams'] = str(neuralprophet_kwargs)
model_params['holiday_source'] = 'Prophet builtin for each country'

In [24]:
# baseline -- alter as needed later
exmodel_config = {
    **dataset_params,
    **training_params,
    **model_params 
}

In [25]:
df_train

       row_id       date  country       store         product  num_sold
0           0 2015-01-01  Finland  KaggleMart      Kaggle Mug       329
1           1 2015-01-01  Finland  KaggleMart      Kaggle Hat       520
2           2 2015-01-01  Finland  KaggleMart  Kaggle Sticker       146
3           3 2015-01-01  Finland  KaggleRama      Kaggle Mug       572
4           4 2015-01-01  Finland  KaggleRama      Kaggle Hat       911
...       ...        ...      ...         ...             ...       ...
26293   26293 2018-12-31   Sweden  KaggleMart      Kaggle Hat       823
26294   26294 2018-12-31   Sweden  KaggleMart  Kaggle Sticker       250
26295   26295 2018-12-31   Sweden  KaggleRama      Kaggle Mug      1004
26296   26296 2018-12-31   Sweden  KaggleRama      Kaggle Hat      1441
26297   26297 2018-12-31   Sweden  KaggleRama  Kaggle Sticker       388

[26298 rows x 6 columns]

In [26]:
model = NeuralProphet(**neuralprophet_kwargs)
model.add_country_holidays(country_name=country)

In [27]:
model = NeuralProphet(**neuralprophet_kwargs)
model.add_country_holidays(country_name='finland')

In [28]:
model = NeuralProphet(**neuralprophet_kwargs)
model = model.add_country_holidays(country_name='finland')

In [29]:
model = NeuralProphet(**neuralprophet_kwargs)

In [30]:
model = model.add_country_holidays(country_name='finland')

In [31]:
model = model.add_country_holidays(country_name='Finland')

In [32]:
model

<neuralprophet.forecaster.NeuralProphet at 0x7f008beee430>

In [33]:
model.fit(train, freq='D')

In [34]:
model.fit(df_train, freq='D')

In [35]:
train = df_train.rename(columns={'date': 'ds', 'num_sold': 'y'})

In [36]:
train

       row_id         ds  country       store         product     y
0           0 2015-01-01  Finland  KaggleMart      Kaggle Mug   329
1           1 2015-01-01  Finland  KaggleMart      Kaggle Hat   520
2           2 2015-01-01  Finland  KaggleMart  Kaggle Sticker   146
3           3 2015-01-01  Finland  KaggleRama      Kaggle Mug   572
4           4 2015-01-01  Finland  KaggleRama      Kaggle Hat   911
...       ...        ...      ...         ...             ...   ...
26293   26293 2018-12-31   Sweden  KaggleMart      Kaggle Hat   823
26294   26294 2018-12-31   Sweden  KaggleMart  Kaggle Sticker   250
26295   26295 2018-12-31   Sweden  KaggleRama      Kaggle Mug  1004
26296   26296 2018-12-31   Sweden  KaggleRama      Kaggle Hat  1441
26297   26297 2018-12-31   Sweden  KaggleRama  Kaggle Sticker   388

[26298 rows x 6 columns]

In [37]:
model.fit(train, freq='D')

In [38]:
country, store, product = 'Finland', 'KaggleRama', 'Kaggle Mug'
train_idx = (df_train['country'] == country) &\
            (df_train['store'] == store) &\
            (df_train['product'] == product)

train = df_train.loc[train_idx, ['date', 'num_sold']].reset_index(drop=True)
train = train.rename(columns={'date': 'ds', 'num_sold': 'y'})

In [39]:
train

             ds     y
0    2015-01-01   572
1    2015-01-02   544
2    2015-01-03   579
3    2015-01-04   582
4    2015-01-05   423
...         ...   ...
1456 2018-12-27   652
1457 2018-12-28   895
1458 2018-12-29  1398
1459 2018-12-30  1241
1460 2018-12-31   831

[1461 rows x 2 columns]

In [40]:
model.fit(train, freq='D')

     SmoothL1Loss         MAE        RMSE   RegLoss
0      351.387625  351.887628  363.100771  0.000000
1      343.869949  344.369951  355.944993  0.000000
2      334.414336  334.914334  346.301075  0.000000
3      321.596716  322.096714  333.332584  0.000000
4      304.170013  304.670015  316.462346  0.000000
..            ...         ...         ...       ...
159     25.333783   25.825522   50.915792  0.533805
160     25.329484   25.820796   50.420712  0.535413
161     25.328154   25.819373   51.387434  0.536585
162     25.325885   25.817041   49.360127  0.537391
163     25.325257   25.816413   50.275099  0.537793

[164 rows x 4 columns]

In [41]:
# country, store, product = 'Finland', 'KaggleRama', 'Kaggle Mug'
store, product = 'KaggleRama', 'Kaggle Mug'
for country in ['Sweden', 'Finland']:
    
    train_idx = (df_train['country'] == country) &\
                (df_train['store'] == store) &\
                (df_train['product'] == product)

    train = df_train.loc[train_idx, ['date', 'num_sold']].reset_index(drop=True)
    train = train.rename(columns={'date': 'ds', 'num_sold': 'y'})
    
    model = NeuralProphet(**neuralprophet_kwargs)
    model = model.add_country_holidays(country_name=country)
    model.fit(train, freq='D')

In [42]:
def trainer(model_kwargs=neuralprophet_kwargs, countries=countries, stores=stores, products=products, folds=folds, 
            df_train=df_train, df_test=df_test, wandb_tracked=False):
    train_smape = 0
    val_smape = 0
    
    if wandb_tracked:
#         exmodel_config['arch'] = arch
#         exmodel_config[f'{arch}_params'] = str(model_params)
        wandb.init(
            project="202201_Kaggle_tabular_playground",
            save_code=True,
            tags=wandb_config['tags'],
            name=wandb_config['name'],
            notes=wandb_config['notes'],
            config=exmodel_config
    )
    
    for country in countries:
        for store in stores:
            for product in products:
                for fold, (start, end) in enumerate(folds):
                    # Skip iteration if it's the last fold
                    if fold == len(folds) - 1:
                        continue

                    # put only those rows in that are in the training window and have the correct country, store, and product
                    train_idx = (df_train['date'] >= start) &\
                                (df_train['date'] < end) &\
                                (df_train['country'] == country) &\
                                (df_train['store'] == store) &\
                                (df_train['product'] == product)

                    # redefine the training set in the local (holdout) sense
                    train = df_train.loc[train_idx, ['date', 'num_sold']].reset_index(drop=True)

                    val_idx = (df_train['date'] >= folds[fold + 1][0]) &\
                              (df_train['date'] < folds[fold + 1][1]) &\
                              (df_train['country'] == country) &\
                              (df_train['store'] == store) &\
                              (df_train['product'] == product)

                    val = df_train.loc[val_idx, ['date', 'num_sold']].reset_index(drop=True)

                    # rename the columns for standardization (this seems conventional)
                    train = train.rename(columns={'date': 'ds', 'num_sold': 'y'})
                    val = val.rename(columns={'date': 'ds', 'num_sold': 'y'})

#                     model = Prophet(**prophet_kwargs)
                    model = NeuralProphet(**model_kwargs)

                    model = model.add_country_holidays(country_name=country) # uses FacebookProphet or NeuralProphet API to add holidays
                    print(train.columns)
                    model.fit(train, freq='D') # neuralprophet
                    # prophet
#                     train_predictions = model.predict(train[['ds']])['yhat']
#                     val_predictions = model.predict(val[['ds']])['yhat']
                    # neuralprophet
                    train_predictions = model.predict(train)['yhat1']
                    val_predictions = model.predict(val)['yhat1']
                    df_train.loc[train_idx, 'prophet_forecast'] = train_predictions.values
                    df_train.loc[val_idx, 'prophet_forecast'] =  val_predictions.values

                    train_score = SMAPE(train['y'].values, train_predictions.values)
                    val_score = SMAPE(val['y'].values, val_predictions.values)
            
                    if wandb_tracked:
                        wandb.log({f"{(country,store,product)}_valid_smape": val_score})
            
                    train_smape += train_score
                    val_smape += val_score
            
                    print(f'\nTraining Range [{start}, {end}) - {country} - {store} - {product} - Train SMAPE: {train_score:4f}')
                    print(f'Validation Range [{folds[fold + 1][0]}, {folds[fold + 1][1]}) - {country} - {store} - {product} - Validation SMAPE: {val_score:4f}\n')

                    test_idx = (df_test['country'] == country) &\
                               (df_test['store'] == store) &\
                               (df_test['product'] == product)
                    test = df_test.loc[test_idx, ['date']].reset_index(drop=True)
                    
                    test = test.rename(columns={'date': 'ds'})
                    test_predictions = model.predict(test[['ds']])['yhat']
                    
                    
                    df_test.loc[test_idx, 'neuralprophet_forecast'] = test_predictions.values
    
    train_smape /= (3*2*3)
    val_smape /= (3*2*3)
    
    if wandb_tracked:
        wandb.log({'overall_train_smape': train_smape, 'overall_valid_smape': val_smape})
        wandb.finish()
    return df_train, df_test, train_smape, val_smape

In [43]:
# df_train_prophet, df_test_prophet = exdarts_trainer(model)
# df_train_prophet, df_test_prophet, train_smape_prophet, val_smape_prophet = prophet_trainer(wandb_tracked=True)
df_train_preds, df_test_preds, train_smape, val_smape = trainer(model_kwargs=neuralprophet_kwargs, wandb_tracked=True)

  0%|          | 0/252 [00:00<?, ?it/s]

  0%|          | 0/252 [00:00<?, ?it/s]

  0%|          | 0/252 [00:00<?, ?it/s]

In [44]:
def trainer(model_kwargs=neuralprophet_kwargs, countries=countries, stores=stores, products=products, folds=folds, 
            df_train=df_train, df_test=df_test, wandb_tracked=False):
    train_smape = 0
    val_smape = 0
    
    if wandb_tracked:
#         exmodel_config['arch'] = arch
#         exmodel_config[f'{arch}_params'] = str(model_params)
        wandb.init(
            project="202201_Kaggle_tabular_playground",
            save_code=True,
            tags=wandb_config['tags'],
            name=wandb_config['name'],
            notes=wandb_config['notes'],
            config=exmodel_config
    )
    
    for country in countries:
        for store in stores:
            for product in products:
                for fold, (start, end) in enumerate(folds):
                    # Skip iteration if it's the last fold
                    if fold == len(folds) - 1:
                        continue

                    # put only those rows in that are in the training window and have the correct country, store, and product
                    train_idx = (df_train['date'] >= start) &\
                                (df_train['date'] < end) &\
                                (df_train['country'] == country) &\
                                (df_train['store'] == store) &\
                                (df_train['product'] == product)

                    # redefine the training set in the local (holdout) sense
                    train = df_train.loc[train_idx, ['date', 'num_sold']].reset_index(drop=True)

                    val_idx = (df_train['date'] >= folds[fold + 1][0]) &\
                              (df_train['date'] < folds[fold + 1][1]) &\
                              (df_train['country'] == country) &\
                              (df_train['store'] == store) &\
                              (df_train['product'] == product)

                    val = df_train.loc[val_idx, ['date', 'num_sold']].reset_index(drop=True)

                    # rename the columns for standardization (this seems conventional)
                    train = train.rename(columns={'date': 'ds', 'num_sold': 'y'})
                    val = val.rename(columns={'date': 'ds', 'num_sold': 'y'})

#                     model = Prophet(**prophet_kwargs)
                    model = NeuralProphet(**model_kwargs)

                    model = model.add_country_holidays(country_name=country) # uses FacebookProphet or NeuralProphet API to add holidays
                    print(train.columns)
                    model.fit(train, freq='D') # neuralprophet
                    # prophet
#                     train_predictions = model.predict(train[['ds']])['yhat']
#                     val_predictions = model.predict(val[['ds']])['yhat']
                    # neuralprophet
                    train_predictions = model.predict(train)['yhat1']
                    val_predictions = model.predict(val)['yhat1']
                    df_train.loc[train_idx, 'prophet_forecast'] = train_predictions.values
                    df_train.loc[val_idx, 'prophet_forecast'] =  val_predictions.values

                    train_score = SMAPE(train['y'].values, train_predictions.values)
                    val_score = SMAPE(val['y'].values, val_predictions.values)
            
                    if wandb_tracked:
                        wandb.log({f"{(country,store,product)}_valid_smape": val_score})
            
                    train_smape += train_score
                    val_smape += val_score
            
                    print(f'\nTraining Range [{start}, {end}) - {country} - {store} - {product} - Train SMAPE: {train_score:4f}')
                    print(f'Validation Range [{folds[fold + 1][0]}, {folds[fold + 1][1]}) - {country} - {store} - {product} - Validation SMAPE: {val_score:4f}\n')

                    test_idx = (df_test['country'] == country) &\
                               (df_test['store'] == store) &\
                               (df_test['product'] == product)
                    test = df_test.loc[test_idx, ['date']].reset_index(drop=True)
                    
                    test = test.rename(columns={'date': 'ds'})
                    test_predictions = model.predict(test)['yhat']
                    
                    
                    df_test.loc[test_idx, 'neuralprophet_forecast'] = test_predictions.values
    
    train_smape /= (3*2*3)
    val_smape /= (3*2*3)
    
    if wandb_tracked:
        wandb.log({'overall_train_smape': train_smape, 'overall_valid_smape': val_smape})
        wandb.finish()
    return df_train, df_test, train_smape, val_smape

In [45]:
# df_train_prophet, df_test_prophet = exdarts_trainer(model)
# df_train_prophet, df_test_prophet, train_smape_prophet, val_smape_prophet = prophet_trainer(wandb_tracked=True)
df_train_preds, df_test_preds, train_smape, val_smape = trainer(model_kwargs=neuralprophet_kwargs, wandb_tracked=True)