### Try DeepAR
* PytorchForecasting [Get Started](https://pytorch-forecasting.readthedocs.io/en/stable/getting-started.html)
* DeepAR [doc](https://pytorch-forecasting.readthedocs.io/en/stable/api/pytorch_forecasting.models.deepar.DeepAR.html)

In [7]:
import os

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from pytorch_forecasting import TimeSeriesDataSet, DeepAR
from pytorch_forecasting.data import GroupNormalizer

import warnings
warnings.filterwarnings('ignore')

# Used Yujie's cleaned version
DIR_BYID = '/media/user/12TB1/HanLi/GitHub/CMU11785-project/local_data/content/databyid'

ls_all_invest_ids = sorted([int(fn.split('.')[0]) for fn in os.listdir(os.path.join(DIR_BYID, 'target'))])

In [9]:
f_cols = [f"f_{i}" for i in range(300)]
# Read a subset for testing
n = 3
ls_dfs = []
for id in ls_all_invest_ids[:n]:
# for id in ls_all_invest_ids:
    df_f_id = pd.DataFrame(np.load(os.path.join(DIR_BYID, f'feats/{id}.npy')), columns=f_cols)
    df_t_id = pd.DataFrame(np.load(os.path.join(DIR_BYID, f'target/{id}.npy')), columns=['target'])
    df_f_id['investment_id'] = id
    df_f_id = df_f_id[['investment_id'] + f_cols] # reorder columns
    ls_dfs.append(pd.concat([df_t_id, df_f_id], axis=1))

df = pd.concat(ls_dfs).reset_index().rename(columns={'index': 'time_id'})
df = df.sort_values(by=['time_id']) # sort by time before splitting

# # Use a few features for testing
# df = df.iloc[:, :6]
# f_cols = [c for c in df.columns if 'f_' in c]

df_train, df_test = train_test_split(df, test_size=0.1, shuffle=False)
df_train, df_val = train_test_split(df_train, test_size=2/9, shuffle=False)

In [None]:
df_train

### Create dataset and dataloaders
* Ref: https://pytorch-forecasting.readthedocs.io/en/stable/data.html

In [10]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.metrics import NormalDistributionLoss, QuantileLoss

# define dataset
max_encoder_length = 6
max_prediction_length = 1

# create validation and training dataset
batch_size = 128
max_prediction_length = 3 # prediction horizon
max_encoder_length = 24   # lookback steps

# create the dataset from the pandas dataframe
train_dataset = TimeSeriesDataSet(
    df_train,
    group_ids=["investment_id"],
    target="target",
    time_idx="time_id",
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    # static_reals=[],
    time_varying_known_reals=f_cols,
    time_varying_unknown_reals=['target'], # Need this for DeepAR
    target_normalizer=GroupNormalizer(
        groups=["investment_id"], 
        # transformation="softplus" # NOTE: do not use softplus or relu for encoder normalization with DeepAR
    ),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

val_dataset = TimeSeriesDataSet.from_dataset(train_dataset, df_val, predict=True, stop_randomization=True)

# create dataloaders for model
train_dataloader = train_dataset.to_dataloader(train=True, batch_size=batch_size, num_workers=32)
val_dataloader = val_dataset.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=32)


In [11]:

x, y = next(iter(train_dataloader))
# print("x =", x)
# print("\ny =", y)
print("\nsizes of x =")
for key, value in x.items():
    print(f"\t{key} = {value.size()}")

# sizes of x =
# 	encoder_cat = torch.Size([64, 24, 0])    # (B, encoder_len, in_categorical_feats)
# 	encoder_cont = torch.Size([64, 24, 304]) # (B, encoder_len, in_continuous_feats)
# 	encoder_target = torch.Size([64, 24])    # (B, encoder_len)
# 	encoder_lengths = torch.Size([64])       # (B, )
# 	decoder_cat = torch.Size([64, 3, 0])     # (B, decoder_len, out_categorical_feats) 
# 	decoder_cont = torch.Size([64, 3, 304])  # (B, decoder_len, out_continuous_feats)
# 	decoder_target = torch.Size([64, 3])     # (B, decoder_len)
# 	decoder_lengths = torch.Size([64])       # (B, )
# 	decoder_time_idx = torch.Size([64, 3])   # (B, decoder_len)
# 	groups = torch.Size([64, 1])             # (B, n_investment_id)
# 	target_scale = torch.Size([64, 2])       # (B, )



sizes of x =
	encoder_cat = torch.Size([128, 24, 0])
	encoder_cont = torch.Size([128, 24, 305])
	encoder_target = torch.Size([128, 24])
	encoder_lengths = torch.Size([128])
	decoder_cat = torch.Size([128, 3, 0])
	decoder_cont = torch.Size([128, 3, 305])
	decoder_target = torch.Size([128, 3])
	decoder_lengths = torch.Size([128])
	decoder_time_idx = torch.Size([128, 3])
	groups = torch.Size([128, 1])
	target_scale = torch.Size([128, 2])


### Configure DeepAR model (Han 2022-4-21 update)

In [5]:
# configure network and trainer
# pl.seed_everything(42)
trainer = pl.Trainer(
    gpus=1,
    # clipping gradients is a hyperparameter and important to prevent divergance
    # of the gradient for recurrent neural networks
    gradient_clip_val=0.1,
)

model = DeepAR.from_dataset(
    train_dataset,
    learning_rate=0.03,
    hidden_size=64,  # most important hyperparameter apart from learning rate
    dropout=0.1,  # between 0.1 and 0.3 are good values
    loss=NormalDistributionLoss(),
    # # reduce learning rate if no improvement in validation loss after x epochs
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {model.size()/1e3:.1f}k")


# find optimal learning rate
res = trainer.tuner.lr_find(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
    max_lr=10.0,
    min_lr=1e-6,
)

print(f"suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)
fig.show()


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 128.4k


### Train Model (Han 2022-4-21 update)

* Note: use tensorboard to check the logs: run ```tensorboard --logdir=<logging_folder>```
* To visualize tensorboard in Jupyter Notebook: 
    ```
    %reload_ext tensorboard
    %tensorboard --logdir=<logging_folder>
    ```

In [13]:
from pytorch_lightning.loggers import WandbLogger
DIR_LOGS = '/media/user/12TB1/HanLi/GitHub/CMU11785-project/logs' # Change this!
# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate

checkpoint_callback = ModelCheckpoint(
    monitor='val_SMAPE', 
    dirpath='/media/user/12TB1/HanLi/GitHub/CMU11785-project/logs/model_checkpoints/', 
    save_top_k=2, 
    filename='500-default-{epoch:02d}-{val_SMAPE:.2f}'
)

logger = WandbLogger(
    entity="11785_project",
    project="project_runs",
    name='DeepAR_first_run',
    log_model=True
)

trainer = pl.Trainer(
    max_epochs=5,
    gpus=1,
    weights_summary="top",
    gradient_clip_val=0.1,
    # limit_train_batches=30,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    # callbacks=[lr_logger, early_stop_callback, checkpoint_callback],
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
)

model = DeepAR.from_dataset(
    train_dataset,
    cell_type='LSTM',
    rnn_layers=3,
    learning_rate=0.03,
    hidden_size=256,  # most important hyperparameter apart from learning rate
    dropout=0.1,  # between 0.1 and 0.3 are good values
    # loss=NormalDistributionLoss(),
    # # reduce learning rate if no improvement in validation loss after x epochs
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {model.size()/1e3:.1f}k")

# fit network
trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                   | Type                   | Params
------------------------------------------------------------------
0 | loss                   | NormalDistributionLoss | 0     
1 | logging_metrics        | ModuleList             | 0     
2 | embeddings             | MultiEmbedding         | 0     
3 | rnn                    | LSTM                   | 1.6 M 
4 | distribution_projector | Linear                 | 514   
------------------------------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.519     Total estimated model params size (MB)


Number of parameters in network: 1629.7k
Epoch 4: 100%|██████████| 21/21 [00:29<00:00,  1.40s/it, loss=79.7, v_num=p06p, train_loss_step=1.340, val_loss=0.931, train_loss_epoch=79.70]  


### TODO: test model and calculate performance metrics on test data