In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch

from gluonts.dataset.multivariate_grouper import MultivariateGrouper
from gluonts.dataset.repository.datasets import get_dataset
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.evaluation import MultivariateEvaluator

from TSMixer import TSMixerEstimator
import random
import numpy as np
import time
import optuna

In [4]:
class TSMixerObjective:
    def __init__(
        self,
        dataset,
        train_grouper,
        test_grouper,
        metric_type="m_sum_mean_wQuantileLoss",
    ):
        self.metric_type = metric_type
        self.dataset = dataset
        self.dataset_train = train_grouper(self.dataset.train)
        self.dataset_test = test_grouper(self.dataset.test)

    def get_params(self, trial) -> dict:
        return {
            "context_length": trial.suggest_int(
                "context_length",
                dataset.metadata.prediction_length,
                dataset.metadata.prediction_length * 10,
                4,
            ),
            "batch_size": trial.suggest_int("batch_size", 128, 256, 64),
            "depth": trial.suggest_int("depth", 2, 16, 4),
            "expansion_factor": trial.suggest_int("expansion_factor", 2, 8, 2),
            "dim": trial.suggest_int("dim", 16, 64, 16),
        }

    def __call__(self, trial):
        params = self.get_params(trial)
        estimator = estimator = ConvTSMixerEstimator(
            # distr_output=StudentTOutput(dim=int(dataset.metadata.feat_static_cat[0].cardinality)),
            input_size=int(self.dataset.metadata.feat_static_cat[0].cardinality),
            prediction_length=self.dataset.metadata.prediction_length,
            context_length=self.dataset.metadata.prediction_length * 5,
            freq=self.dataset.metadata.freq,
            scaling="std",
            depth=params["depth"],
            dim=params["dim"],
            expansion_factor=params["expansion_factor"],
            batch_size=params["batch_size"],
            num_batches_per_epoch=100,
            trainer_kwargs=dict(accelerator="cuda", max_epochs=30),
        )
        predictor = estimator.train(
            training_data=self.dataset_train, num_workers=8, shuffle_buffer_length=1024
        )

        forecast_it, ts_it = make_evaluation_predictions(
            dataset=self.dataset_test, predictor=predictor, num_samples=100
        )
        forecasts = list(forecast_it)
        tss = list(ts_it)
        evaluator = MultivariateEvaluator(
            quantiles=(np.arange(20) / 20.0)[1:], target_agg_funcs={"sum": np.sum}
        )
        agg_metrics, _ = evaluator(iter(tss), iter(forecasts))
        return agg_metrics[self.metric_type]

In [None]:
dataset = get_dataset("solar_nips", regenerate=False)
train_grouper = MultivariateGrouper(
    max_target_dim=int(dataset.metadata.feat_static_cat[0].cardinality)
)

test_grouper = MultivariateGrouper(
    num_test_dates=int(len(dataset.test) / len(dataset.train)),
    max_target_dim=int(dataset.metadata.feat_static_cat[0].cardinality),
)
dataset_train = train_grouper(dataset.train)
dataset_test = test_grouper(dataset.test)

In [None]:
seed = 42
random.seed(seed)
torch.manual_seed(seed)
start_time = time.time()
study = optuna.create_study(direction="minimize")
study.optimize(TSMixerObjective(dataset, train_grouper, test_grouper), n_trials=10)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
print(time.time() - start_time)