In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch

from gluonts.dataset.multivariate_grouper import MultivariateGrouper
from gluonts.dataset.repository.datasets import dataset_recipes, get_dataset
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.evaluation import MultivariateEvaluator

#from pts.modules import StudentTOutput

from ConvTSMixer import ConvTSMixerEstimator
import random
import numpy as np
import time
import optuna

In [2]:
class ConvTSMixerObjective:  
    def __init__(self, dataset, train_grouper, test_grouper, metric_type="m_sum_mean_wQuantileLoss"):
        self.metric_type = metric_type
        self.dataset = dataset
        self.dataset_train = train_grouper(self.dataset.train)
        self.dataset_test = test_grouper(self.dataset.test)
    
    def get_params(self, trial) -> dict:
        return {
        "context_length": trial.suggest_int("context_length", dataset.metadata.prediction_length, dataset.metadata.prediction_length*10,4),
        "batch_size": trial.suggest_int("batch_size", 128, 256, 64),
        "depth": trial.suggest_int("depth", 2, 16,4),
        "dim": trial.suggest_int("dim", 16, 64, 16),
        "patch_size": trial.suggest_int("dim", 2, 16,4),
         "kernel_size": trial.suggest_int("dim", 9, 18, 3),
        }
     
    def __call__(self, trial):
        params = self.get_params(trial)
        estimator = estimator = ConvTSMixerEstimator(
            #distr_output=StudentTOutput(dim=int(dataset.metadata.feat_static_cat[0].cardinality)),
            input_size=int(self.dataset.metadata.feat_static_cat[0].cardinality),

            prediction_length=self.dataset.metadata.prediction_length,
            context_length=self.dataset.metadata.prediction_length*5,
            freq=self.dataset.metadata.freq,
            scaling="std",

            depth=params["depth"],
            patch_size=(params["patch_size"], params["patch_size"]),
            kernel_size=params["kernel_size"],
            dim=params["dim"],

            batch_size=params["batch_size"],
            num_batches_per_epoch=100,
            trainer_kwargs=dict(accelerator="cuda", max_epochs=30)
        )
        predictor = estimator.train(
        training_data=self.dataset_train,
        num_workers=8,
        shuffle_buffer_length=1024
        )
        
        forecast_it, ts_it = make_evaluation_predictions(dataset=self.dataset_test,
                                             predictor=predictor,
                                             num_samples=100)
        forecasts = list(forecast_it)
        tss = list(ts_it)
        evaluator = MultivariateEvaluator(quantiles=(np.arange(20)/20.0)[1:],
                                  target_agg_funcs={'sum': np.sum})
        agg_metrics, _ = evaluator(iter(tss), iter(forecasts))
        return agg_metrics[self.metric_type]

In [3]:
dataset = get_dataset("solar_nips", regenerate=False)
train_grouper = MultivariateGrouper(max_target_dim=int(dataset.metadata.feat_static_cat[0].cardinality))

test_grouper = MultivariateGrouper(num_test_dates=int(len(dataset.test)/len(dataset.train)), 
                                   max_target_dim=int(dataset.metadata.feat_static_cat[0].cardinality))
dataset_train = train_grouper(dataset.train)
dataset_test = test_grouper(dataset.test)

In [None]:
seed = 42
random.seed(seed)
torch.manual_seed(seed)
start_time = time.time()
study = optuna.create_study(direction="minimize")
study.optimize(ConvTSMixerObjective(dataset, train_grouper, test_grouper), n_trials=10)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
print(time.time() - start_time)

[32m[I 2023-04-28 03:24:04,607][0m A new study created in memory with name: no-name-354d2e39-12b9-421a-8a7b-024560528a23[0m
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | model | ConvTSMixerModel | 738 K 
-------------------------------------------
738 K     Trainable params
0         Non-trainable params
738 K     Total params
2.953     Total estimated model params size (MB)
2023-04-28 03:24:07.557367: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-28 03:24:07.599246: I tensorflow/core/platform/cpu_feature_guard

Training: 0it [00:00, ?it/s]

  return F.conv2d(input, weight, bias, self.stride,
Epoch 0, global step 100: 'train_loss' reached 4.87490 (best 4.87490), saving model to '/gpfs/alpine/csc499/scratch/hstellar/ConvTs/ConvTS-Mixer-main/lightning_logs/version_16/checkpoints/epoch=0-step=100.ckpt' as top 1
Epoch 1, global step 200: 'train_loss' reached 4.41945 (best 4.41945), saving model to '/gpfs/alpine/csc499/scratch/hstellar/ConvTs/ConvTS-Mixer-main/lightning_logs/version_16/checkpoints/epoch=1-step=200.ckpt' as top 1
Epoch 2, global step 300: 'train_loss' reached 4.28979 (best 4.28979), saving model to '/gpfs/alpine/csc499/scratch/hstellar/ConvTs/ConvTS-Mixer-main/lightning_logs/version_16/checkpoints/epoch=2-step=300.ckpt' as top 1
