In [2]:
# load some libraries
import sys
import os
import pickle
import gzip
sys.path.insert(1, '..')
os.chdir('..')

import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import darts
from darts import metrics

from lib.gluformer.model import *
from lib.latent_ode.trainer_glunet import *
from utils.darts_processing import *
from utils.darts_dataset import *

In [None]:
# MODELS: TRANSFORMER, NHiTS, TFT, XGBOOST, LINEAR REGRESSION

# model params
model_params = {'transformer': {'darts': models.TransformerModel, 'darts_data': SamplingDatasetInferencePast, 'use_covs': False, 'use_static_covs': False, 'cov_type': 'past'},
                'nhits': {'darts': models.NHiTSModel, 'darts_data': SamplingDatasetInferencePast, 'use_covs': False, 'use_static_covs': False, 'cov_type': 'past'},
                'tft': {'darts': models.TFTModel, 'darts_data': SamplingDatasetInferenceMixed, 'use_covs': False, 'use_static_covs': True, 'cov_type': 'mixed'},
                'xgboost': {'darts': models.XGBModel, 'use_covs': False, 'use_static_covs': False, 'cov_type': 'past'},
                'linreg': {'darts': models.LinearRegressionModel, 'use_covs': False, 'use_static_covs': False, 'cov_type': 'past'}}
# data sets
datasets = ['weinstock', 'dubosson', 'colas', 'iglu', 'hall']
save_trues = {}
save_forecasts = {}
save_inputs = {}
# iterate through models and datasets
for model_name in model_params.keys():
    for dataset in datasets:
        print(f'Testing {model_name} for {dataset}')
        formatter, series, scalers = load_data(seed=0, study_file=None, dataset=dataset, 
                                               use_covs=model_params[model_name]['use_covs'], 
                                             use_static_covs=model_params[model_name]['use_static_covs'],
                                             cov_type=model_params[model_name]['cov_type'])
        # load model or refit model
        if model_name in ['tft', 'transformer', 'nhits']:
            # load model: transformer
            model = model_params[model_name]['darts'](input_chunk_length=formatter.params[model_name]['in_len'],
                                              output_chunk_length=formatter.params['length_pred'])
            model = model.load_from_checkpoint(f'tensorboard_{model_name}_{dataset}', work_dir = './output', best=True)
            # define dataset for inference
            test_dataset = model_params[model_name]['darts_data'](target_series=series['test']['target'],
                                                              n=formatter.params['length_pred'],
                                                                input_chunk_length=formatter.params[model_name]['in_len'],
                                                                  output_chunk_length=formatter.params['length_pred'],
                                                                  use_static_covariates=model_params[model_name]['use_static_covs'],
                                                                  max_samples_per_ts = None)
            # get predictions
            forecasts = model.predict_from_dataset(n=formatter.params['length_pred'], 
                                                   input_series_dataset=test_dataset,
                                                   verbose=True,
                                                   num_samples=20 if model_name == 'tft' else 1)
            forecasts = scalers['target'].inverse_transform(forecasts)
            save_forecasts[f'{model_name}_{dataset}'] = forecasts
            # get true values
            save_trues[f'{model_name}_{dataset}'] = [test_dataset.evalsample(i) for i in range(len(test_dataset))]
            save_trues[f'{model_name}_{dataset}'] = scalers['target'].inverse_transform(save_trues[f'{model_name}_{dataset}'])
            # get inputs
            inputs = [test_dataset[i][0] for i in range(len(test_dataset))]
            save_inputs[f'{model_name}_{dataset}'] = (np.array(inputs) - scalers['target'].min_) / scalers['target'].scale_
        elif model_name == 'xgboost':
            # load model: xgboost
            model = model_params[model_name]['darts'](lags=formatter.params[model_name]['in_len'], 
                                                      learning_rate=formatter.params[model_name]['lr'],
                                                      subsample=formatter.params[model_name]['subsample'],
                                                      min_child_weight=formatter.params[model_name]['min_child_weight'],
                                                      colsample_bytree=formatter.params[model_name]['colsample_bytree'],
                                                      max_depth=formatter.params[model_name]['max_depth'],
                                                      gamma=formatter.params[model_name]['gamma'],
                                                      reg_alpha=formatter.params[model_name]['alpha'],
                                                      reg_lambda=formatter.params[model_name]['lambda_'],
                                                      n_estimators=formatter.params[model_name]['n_estimators'],
                                                      random_state=0)
            # fit model
            model.fit(series['train']['target'])
            # get predictions
            forecasts = model.historical_forecasts(series['test']['target'],
                                                   forecast_horizon=formatter.params['length_pred'],
                                                   stride=1,
                                                   retrain=False,
                                                   verbose=True,
                                                   last_points_only=False)
            forecasts = [scalers['target'].inverse_transform(forecast) for forecast in forecasts]
            save_forecasts[f'{model_name}_{dataset}'] = forecasts
            # get true values
            save_trues[f'{model_name}_{dataset}'] = scalers['target'].inverse_transform(series['test']['target'])
        elif model_name == 'linreg':
            # load model: linear regression
            model = models.LinearRegressionModel(lags = formatter.params[model_name]['in_len'],
                                                 output_chunk_length = formatter.params['length_pred'])
            model.fit(series['train']['target'])
            # get predictions
            forecasts = model.historical_forecasts(series['test']['target'],
                                                forecast_horizon=formatter.params['length_pred'], 
                                                stride=1,
                                                retrain=False,
                                                verbose=False,
                                                last_points_only=False)
            forecasts = [scalers['target'].inverse_transform(forecast) for forecast in forecasts]
            save_forecasts[f'{model_name}_{dataset}'] = forecasts
            # get true values
            save_trues[f'{model_name}_{dataset}'] = scalers['target'].inverse_transform(series['test']['target'])

In [None]:
# MODELS: LATENT ODE and GLUFROMER
device = 'cuda'
for dataset in datasets:
    print(f'Testing {dataset}')
    formatter, series, scalers = load_data(seed=0, study_file=None, dataset=dataset, use_covs=True, use_static_covs=True)
    # define dataset for inference: gluformer
    dataset_test_glufo = SamplingDatasetInferenceDual(target_series=series['test']['target'],
                                                      covariates=series['test']['future'],
                                                      input_chunk_length=formatter.params['gluformer']['in_len'],
                                                      output_chunk_length=formatter.params['length_pred'],
                                                      use_static_covariates=True,
                                                      array_output_only=True)
    # define dataset for inference: latent ode
    dataset_test_latod = SamplingDatasetInferenceDual(target_series=series['test']['target'],
                                                      covariates=series['test']['future'],
                                                      input_chunk_length=formatter.params['latentode']['in_len'],
                                                      output_chunk_length=formatter.params['length_pred'],
                                                      use_static_covariates=True,
                                                      array_output_only=True)
    # load model: gluformer
    num_dynamic_features = series['train']['future'][-1].n_components
    num_static_features = series['train']['static'][-1].n_components
    glufo = Gluformer(d_model = formatter.params['gluformer']['d_model'],
                      n_heads = formatter.params['gluformer']['n_heads'],
                      d_fcn = formatter.params['gluformer']['d_fcn'],
                      r_drop = 0.2, 
                      activ = 'relu', 
                      num_enc_layers = formatter.params['gluformer']['num_enc_layers'], 
                      num_dec_layers = formatter.params['gluformer']['num_dec_layers'],
                      distil = True, 
                      len_seq = formatter.params['gluformer']['in_len'],
                      label_len = formatter.params['gluformer']['in_len'] // 3,
                      len_pred = formatter.params['length_pred'],
                      num_dynamic_features = num_dynamic_features,
                      num_static_features = num_static_features,)
    glufo.to(device)
    glufo.load_state_dict(torch.load(f'./output/tensorboard_gluformer_{dataset}/model.pt', map_location=torch.device(device)))
    # load model: latent ode
    latod = LatentODEWrapper(device = device,
                             latents = formatter.params['latentode']['latents'],
                             rec_dims = formatter.params['latentode']['rec_dims'],
                             rec_layers = formatter.params['latentode']['rec_layers'],
                             gen_layers = formatter.params['latentode']['gen_layers'],
                             units = formatter.params['latentode']['units'],
                             gru_units = formatter.params['latentode']['gru_units'],)
    latod.load(f'./output/tensorboard_latentode_{dataset}/model.ckpt', device)
    # get predictions: gluformer
    print('Gluformer')
    forecasts, _ = glufo.predict(dataset_test_glufo,
                                 batch_size=8,
                                 num_samples=10,
                                 device=device,
                                 use_tqdm=True)
    forecasts = (forecasts - scalers['target'].min_) / scalers['target'].scale_
    trues = [dataset_test_glufo.evalsample(i) for i in range(len(dataset_test_glufo))]
    trues = scalers['target'].inverse_transform(trues)
    inputs = [dataset_test_glufo[i][0] for i in range(len(dataset_test_glufo))]
    inputs = (np.array(inputs) - scalers['target'].min_) / scalers['target'].scale_
    save_forecasts[f'gluformer_{dataset}'] = forecasts
    save_trues[f'gluformer_{dataset}'] = trues
    save_inputs[f'gluformer_{dataset}'] = inputs
    # get predictions: latent ode
    print('Latent ODE')
    forecasts = latod.predict(dataset_test_latod,
                              batch_size=32,
                              num_samples=20,
                              device=device,
                              use_tqdm=True,)
    forecasts = (forecasts - scalers['target'].min_) / scalers['target'].scale_
    trues = [dataset_test_latod.evalsample(i) for i in range(len(dataset_test_latod))]
    trues = scalers['target'].inverse_transform(trues)
    inputs = [dataset_test_latod[i][0] for i in range(len(dataset_test_latod))]
    inputs = (np.array(inputs) - scalers['target'].min_) / scalers['target'].scale_
    save_forecasts[f'latentode_{dataset}'] = forecasts
    save_trues[f'latentode_{dataset}'] = trues
    save_inputs[f'latentode_{dataset}'] = inputs

In [None]:
# save forecasts
with gzip.open('./paper_results/data/compressed_forecasts.pkl', 'wb') as file:
    pickle.dump(save_forecasts, file)
# save true values
with gzip.open('./paper_results/data/compressed_trues.pkl', 'wb') as file:
    pickle.dump(save_trues, file)
# save inputs
with gzip.open('./paper_results/data/compressed_inputs.pkl', 'wb') as file:
    pickle.dump(save_inputs, file)