In [2]:
!pip install torch -f https://download.pytorch.org/whl/torch_stable.html.
!pip install pytorch-forecasting
!pip install pandas --upgrade
!pip install numexpr==2.6.1

In [3]:
prefix_path = "../input/texas-sales-forecasting/"

In [4]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot
import time
from datetime import datetime 
from tqdm import tqdm
import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
#import torch
from pytorch_forecasting import Baseline, DeepAR, TemporalFusionTransformer, NBeats, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer, NaNLabelEncoder
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss, NormalDistributionLoss, RMSE
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
import tensorflow as tf 
import tensorboard as tb 
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile
# Options
import logging
logging.getLogger('prophet').setLevel(logging.WARNING) 
#pd.set_option("display.max_rows", None, "display.max_columns", None)

In [18]:
# Read Data
train = pd.read_csv(prefix_path+"sales_train_validation_afcs2021.csv",index_col=0, header=None, low_memory=False).T
calendar = pd.read_csv(prefix_path+"calendar_afcs2021.csv")
prices = pd.read_csv(prefix_path+"sell_prices_afcs2021.csv")
test = pd.read_csv(prefix_path+"sales_test_validation_afcs2021.csv",index_col=0, header=None, low_memory=False).T

In [6]:
data = train.append(test)

## Calendar Dataframe

In [7]:
# Calendar Pre-Processing
event_names_1 = list(calendar.event_name_1.unique())
event_names_2 = list(calendar.event_name_2.unique())
event_names = event_names_1 + [i for i in event_names_2 if i not in event_names_1]
event_names = event_names[1:]
event_type_1 = list(calendar.event_type_1.unique())
event_type_2 = list(calendar.event_type_2.unique())
event_type = event_type_1 + [i for i in event_type_2 if i not in event_type_1]
event_type = event_type[1:]
# One Hot Encode
calendar = pd.get_dummies(calendar, columns=['event_name_1', 'event_type_1'], prefix='one', prefix_sep='_')
calendar = pd.get_dummies(calendar, columns=['event_name_2', 'event_type_2'], prefix='two', prefix_sep='_')
# Event Types
calendar['Religious'] = calendar['one_Religious'] | calendar['two_Religious']
calendar['Cultural'] = calendar['one_Cultural'] | calendar['two_Cultural']
calendar['Sporting'] = calendar['one_Sporting']
calendar['National'] = calendar['one_National']
# Event Names
for event in event_names:
    one_event = "one_"+event
    two_event = "two_"+event
    if one_event in calendar.columns and two_event in calendar.columns:
        calendar[event] = calendar[one_event] | calendar [two_event]
    elif one_event in calendar.columns and two_event not in calendar.columns:
        calendar[event] = calendar[one_event]
    else:
        calendar[event] = calendar[two_event]
# Drop Columns
to_drop = [col for col in calendar.columns if (col.startswith("one_")) or col.startswith("two_")]
calendar.drop(columns=to_drop, inplace=True)  

In [8]:
data

## Sales Dataframe

In [9]:
# Assert Columns as Intergers
food_columns = [col for col in data.columns if col.startswith("FOODS")]
map_types = {}
for c in food_columns:
    data[c] = data[c].astype('int64')
# Merge Train Dataframe with Calendar
data = calendar.merge(data, left_on="d", right_on="id", how="left", suffixes=["_right",""])
data["date"] = pd.DatetimeIndex(data['date'])
# Get Cumulative Sum on FOODS
#data[food_columns] = data[food_columns].cumsum() 

In [10]:
data = data.iloc[:len(data)-28]

# Preprocess to DeepAR

In [11]:
# Create Time Index - Including Events
frames = []
data['time_idx'] = data.index
filter_columns = ['time_idx']+[col for col in data.columns if col.startswith("FOODS")]
for col in data[filter_columns].columns[1:]:
    # Create aux df to append to test data
    aux = data.loc[:,[col,'time_idx']+event_names+event_type]
    aux['group'] = col
    aux.rename(columns={col:"value"}, inplace=True)
    frames += [aux]
pf_data = pd.concat(frames, ignore_index=True,axis=0)
pf_data.fillna(0, inplace=True)
# Reverse
pf_data[event_names+event_type] = pf_data[event_names+event_type].apply(lambda x: x.map({0: "-", 1: x.name})).astype("category")
pf_data['value'] = pf_data['value'].astype('int64')
pf_data['time_idx'] = pf_data['time_idx'].astype('int64')

In [14]:
pf_data = pd.DataFrame(columns=['value', 'group', 'time_idx'])
# Create Time Index - Not Including Events
data['time_idx'] = data.index
filter_columns = ['time_idx']+[col for col in data.columns if col.startswith("FOODS")]
for col in data[filter_columns].columns[1:]:
  # Create aux df to append to test data
  aux = data.loc[:,[col, 'time_idx']]
  aux['group'] = col
  aux.rename(columns={col:"value"}, inplace=True)
  pf_data = pf_data.append(aux, ignore_index=True)
pf_data.fillna(0, inplace=True)
pf_data['value'] = pf_data['value'].astype('int64')
pf_data['time_idx'] = pf_data['time_idx'].astype('int64')

In [15]:
pf_data.head(10)

In [12]:
max_prediction_length = 28
max_encoder_length = 28*4
training_cutoff = pf_data["time_idx"].max() - max_prediction_length
#training_cutoff_test = pf_data["time_idx"].max() - max_prediction_length
# create the training_validation dataset from the pandas dataframe
training = TimeSeriesDataSet(
    pf_data[lambda x: x.time_idx <= training_cutoff],
    group_ids=["group"],
    target="value",
    time_idx="time_idx",
    max_encoder_length=max_encoder_length,
    min_prediction_length=max_prediction_length,
    max_prediction_length=max_prediction_length,
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["value"],
    time_varying_known_categoricals=["events"],
    variable_groups={"events": event_names+event_type},  # group of categorical variables can be treated as one variable
    target_normalizer=GroupNormalizer(groups=["group"]),  # use softplus and normalize by group
    add_relative_time_idx=False,
    add_target_scales=True,
    randomize_length=None,
)
'''
# create the training_test dataset from the pandas dataframe
training_test = TimeSeriesDataSet(
    pf_data[lambda x: x.time_idx <= training_cutoff_test],
    group_ids=["group"],
    target="value",
    time_idx="time_idx",
    max_encoder_length=max_encoder_length,
    min_prediction_length=max_prediction_length,
    max_prediction_length=max_prediction_length,
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["value"],
    #time_varying_known_categoricals=["events"],
    #variable_groups={"events": event_names+event_type},  # group of categorical variables can be treated as one variable
    target_normalizer=GroupNormalizer(groups=["group"]),  # use softplus and normalize by group
    add_relative_time_idx=False,
    randomize_length=None,
)
'''
# create validation set (predict=True) which means to predict the last max_prediction_length points in time
# for each series
validation = TimeSeriesDataSet.from_dataset(training, pf_data, predict=True, stop_randomization=True)
#test =  TimeSeriesDataSet.from_dataset(training_test, pf_data, predict=True, stop_randomization=True)
# create dataloaders for model
batch_size = 256  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=1)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=1)
#test_dataloader = test.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=1)

In [None]:
# find optimal learning rate
#res = trainer.tuner.lr_find(
#    tft, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader, early_stop_threshold=1000.0)

#print(f"suggested learning rate: {res.suggestion()}")
#fig = res.plot(show=True, suggest=True)
#fig.show()

In [13]:
# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=50,
    gpus=1,
    weights_summary="top",
    gradient_clip_val=0.1,
    limit_train_batches=batch_size,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
)


tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.025,
    hidden_size=32,
    attention_head_size=3,
    dropout=0.2,
    hidden_continuous_size=32,
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss()
)

deepar = DeepAR.from_dataset(
    training,
    learning_rate=0.025,
    hidden_size=128,
    dropout=0.2,
    loss=NormalDistributionLoss()
)



print(f"Number of parameters in DeepAR: {deepar.size()/1e3:.1f}k")
print(f"Number of parameters in TFT: {tft.size()/1e3:.1f}k")

In [14]:
torch.set_num_threads(10)
# fit network
trainer.fit(
    deepar,
    train_dataloader=train_dataloader,
    val_dataloaders=val_dataloader,
)

# load the best model according to the validation loss
# (given that we use early stopping, this is not necessarily the last epoch)
best_model_path = trainer.checkpoint_callback.best_model_path
best_deepar = DeepAR.load_from_checkpoint(best_model_path)

In [None]:
torch.set_num_threads(10)
trainer_tft = pl.Trainer(
    max_epochs=50,
    gpus=1,
    weights_summary="top",
    gradient_clip_val=0.1,
    limit_train_batches=batch_size,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
)

# fit network
trainer_tft.fit(
    tft,
    train_dataloader=train_dataloader,
    val_dataloaders=val_dataloader,
)

# load the best model according to the validation loss
# (given that we use early stopping, this is not necessarily the last epoch)
best_model_tft_path = trainer_tft.checkpoint_callback.best_model_path
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_tft_path)

In [15]:
# DeepAR calcualte mean absolute error on validation set
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = best_deepar.predict(val_dataloader)

In [16]:
print('MAE: ', (actuals - predictions).abs().mean())
print('RMSE: ', ((actuals - predictions)**2).mean().sqrt())

In [None]:
# TFT - calcualte mean absolute error on validation set
actuals_tft = torch.cat([y[0] for x, y in iter(test_dataloader)])
predictions_tft = best_tft.predict(test_dataloader)
(actuals_tft - predictions_tft).abs().mean()

In [None]:
# NBeats - calcualte mean absolute error on validation set
actuals_nbeats = torch.cat([y[0] for x, y in iter(test_dataloader)])
predictions_nbeats = best_nbeats.predict(test_dataloader)
(actuals_nbeats - actuals_nbeats).abs().mean()

## Creating Submission File

In [25]:
# Create Submission File
F_columns = ['id']+["F"+str(i) for i in range(1,29)]
submission = pd.DataFrame(columns=F_columns)

In [26]:
# Create submission file to DeepAR
predictions_list = predictions.tolist()
for idx in range(len(predictions_list)):
    df_aux = pd.DataFrame([[food_columns[idx]]+predictions_list[idx]], columns=submission.columns)
    submission = submission.append(df_aux, ignore_index=True)
submission[submission.columns[1:]] = submission[submission.columns[1:]].clip(lower=0)

In [27]:
submission.to_csv("deepar.csv", index=False)
submission

In [None]:
# Rounded Predictions - don't usually help
#submission[submission.columns[1:]] = submission[submission.columns[1:]].astype('int64')
#submission.to_csv("deepar_ints.csv", index=False)

In [None]:
# Create Submission File to TFT
submission_tft = pd.DataFrame(columns=F_columns)
predictions_tft_list = predictions_tft.tolist()
for idx in range(len(predictions_tft_list)):
    df_aux = pd.DataFrame([[food_columns[idx]]+predictions_tft_list[idx]], columns=submission_tft.columns)
    submission_tft = submission_tft.append(df_aux, ignore_index=True)
submission_tft[submission_tft.columns[1:]] = submission_tft[submission_tft.columns[1:]].clip(lower=0)
submission_tft.to_csv("tft.csv", index=False)

In [None]:
# Create Submission File to NBeats
submission_nbeats = pd.DataFrame(columns=F_columns)
predictions_nb_list = predictions_nbeats.tolist()
for idx in range(len(predictions_nb_list)):
    df_aux = pd.DataFrame([[food_columns[idx]]+predictions_nb_list[idx]], columns=submission_nbeats.columns)
    submission_nbeats = submission_nbeats.append(df_aux, ignore_index=True)
submission_nbeats[submission_nbeats.columns[1:]] = submission_nbeats[submission_nbeats.columns[1:]].clip(lower=0)
submission_nbeats.to_csv("nbeats.csv", index=False)

#### Emsemble

In [None]:
# Emsemble Methods
submission_emsemble = pd.DataFrame(columns=F_columns)
submission_emsemble['id'] = submission['id']
submission_emsemble.loc[:,'F1':] = submission.loc[:,'F1':]*(1/2)+submission_tft.loc[:,'F1':]*(1/2)
submission_emsemble.to_csv("emsemble.csv", index=False)

In [None]:
submission_emsemble

### Predicting on New Data

In [19]:
data = train.append(test)
# Calendar Pre-Processing
event_names_1 = list(calendar.event_name_1.unique())
event_names_2 = list(calendar.event_name_2.unique())
event_names = event_names_1 + [i for i in event_names_2 if i not in event_names_1]
event_names = event_names[1:]
event_type_1 = list(calendar.event_type_1.unique())
event_type_2 = list(calendar.event_type_2.unique())
event_type = event_type_1 + [i for i in event_type_2 if i not in event_type_1]
event_type = event_type[1:]
# One Hot Encode
calendar = pd.get_dummies(calendar, columns=['event_name_1', 'event_type_1'], prefix='one', prefix_sep='_')
calendar = pd.get_dummies(calendar, columns=['event_name_2', 'event_type_2'], prefix='two', prefix_sep='_')
# Event Types
calendar['Religious'] = calendar['one_Religious'] | calendar['two_Religious']
calendar['Cultural'] = calendar['one_Cultural'] | calendar['two_Cultural']
calendar['Sporting'] = calendar['one_Sporting']
calendar['National'] = calendar['one_National']
# Event Names
for event in event_names:
    one_event = "one_"+event
    two_event = "two_"+event
    if one_event in calendar.columns and two_event in calendar.columns:
        calendar[event] = calendar[one_event] | calendar [two_event]
    elif one_event in calendar.columns and two_event not in calendar.columns:
        calendar[event] = calendar[one_event]
    else:
        calendar[event] = calendar[two_event]
# Drop Columns
to_drop = [col for col in calendar.columns if (col.startswith("one_")) or col.startswith("two_")]
calendar.drop(columns=to_drop, inplace=True)  
# Assert Columns as Intergers
food_columns = [col for col in data.columns if col.startswith("FOODS")]
map_types = {}
for c in food_columns:
    data[c] = data[c].astype('int64')
# Merge Train Dataframe with Calendar
data = calendar.merge(data, left_on="d", right_on="id", how="left", suffixes=["_right",""])
data["date"] = pd.DatetimeIndex(data['date']) 
# Create Time Index - Including Events
frames = []
data['time_idx'] = data.index
filter_columns = ['time_idx']+[col for col in data.columns if col.startswith("FOODS")]
for col in data[filter_columns].columns[1:]:
    # Create aux df to append to test data
    aux = data.loc[:,[col,'time_idx']+event_names+event_type]
    aux['group'] = col
    aux.rename(columns={col:"value"}, inplace=True)
    frames += [aux]
pf_data = pd.concat(frames, ignore_index=True,axis=0)
pf_data.fillna(0, inplace=True)
# Reverse
pf_data[event_names+event_type] = pf_data[event_names+event_type].apply(lambda x: x.map({0: "-", 1: x.name})).astype("category")
pf_data['value'] = pf_data['value'].astype('int64')
pf_data['time_idx'] = pf_data['time_idx'].astype('int64')
max_prediction_length = 28
max_encoder_length = 28*4
training_cutoff = pf_data["time_idx"].max() - max_prediction_length
#training_cutoff_test = pf_data["time_idx"].max() - max_prediction_length
# create the training_validation dataset from the pandas dataframe
training = TimeSeriesDataSet(
    pf_data[lambda x: x.time_idx <= training_cutoff],
    group_ids=["group"],
    target="value",
    time_idx="time_idx",
    max_encoder_length=max_encoder_length,
    min_prediction_length=max_prediction_length,
    max_prediction_length=max_prediction_length,
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["value"],
    time_varying_known_categoricals=["events"],
    variable_groups={"events": event_names+event_type},  # group of categorical variables can be treated as one variable
    target_normalizer=GroupNormalizer(groups=["group"]),  # use softplus and normalize by group
    add_relative_time_idx=False,
    add_target_scales=True,
    randomize_length=None,
)
# create validation set (predict=True) which means to predict the last max_prediction_length points in time
# for each series
validation = TimeSeriesDataSet.from_dataset(training, pf_data, predict=True, stop_randomization=True)
#test =  TimeSeriesDataSet.from_dataset(training_test, pf_data, predict=True, stop_randomization=True)
# create dataloaders for model
batch_size = 256  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=1)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=1)
#test_dataloader = test.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=1)
# DeepAR calcualte mean absolute error on validation set
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = best_deepar.predict(val_dataloader)

In [30]:
import plotly.express as px
fig = px.line(x=range(len(predictions[1])), y=predictions[1])
fig.show()