##### 1. Setup Chronos-ZS Benchmark

In [None]:
!pip install 'tirex-ts[gluonts,hfdataset,test]'

##### 2. Load Model and Run Benchmark

In [None]:
from tirex import ForecastModel, load_model

model: ForecastModel = load_model("NX-AI/TiRex", device="cuda:0")

In [None]:
import time

import datasets
import fev
import pandas as pd


def eval_task(model, task):
    inference_time = 0.0
    predictions_per_window = []
    for window in task.iter_windows(trust_remote_code=True):
        past_data, _ = fev.convert_input_data(window, adapter="datasets", as_univariate=True)
        past_data = past_data.with_format("torch").cast_column("target", datasets.Sequence(datasets.Value("float32")))
        loaded_targets = [t for t in past_data["target"]]

        start_time = time.monotonic()
        quantiles, means = model.forecast(
            loaded_targets, quantile_levels=task.quantile_levels, prediction_length=task.horizon
        )
        inference_time += time.monotonic() - start_time

        predictions_dict = {"predictions": means}
        for idx, level in enumerate(task.quantile_levels):
            predictions_dict[str(level)] = quantiles[:, :, idx]

        predictions_per_window.append(
            fev.combine_univariate_predictions_to_multivariate(
                datasets.Dataset.from_dict(predictions_dict), target_columns=task.target_columns
            )
        )

    return predictions_per_window, inference_time


benchmark = fev.Benchmark.from_yaml(
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/tasks.yaml"
)
summaries = []
for task in benchmark.tasks:
    predictions, inference_time = eval_task(model, task)
    evaluation_summary = task.evaluation_summary(
        predictions,
        model_name="TiRex",
        inference_time_s=inference_time,
    )
    print(evaluation_summary)
    summaries.append(evaluation_summary)


summaries = pd.DataFrame(summaries)
summaries