### Import data

In [1]:
from datasets import load_dataset

CHRONOS = "autogluon/chronos_datasets"
DATASET_CHRONOS = [
    "dominick",
    "ercot",
    "exchange_rate",
    #"monash_m3_monthly"
]

for dataset in DATASET_CHRONOS:
    ds = load_dataset(CHRONOS, dataset, trust_remote_code=True)
    display(ds)

DatasetDict({
    train: Dataset({
        features: ['id', 'timestamp', 'target', 'im_0'],
        num_rows: 100014
    })
})

DatasetDict({
    train: Dataset({
        features: ['id', 'timestamp', 'target'],
        num_rows: 8
    })
})

DatasetDict({
    train: Dataset({
        features: ['id', 'timestamp', 'target'],
        num_rows: 8
    })
})

### Results file

In [3]:
import csv
import os

# Ensure the output directory exists
output_dir = "results"
os.makedirs(output_dir, exist_ok=True)

# Define the path for the CSV file
csv_file_path = os.path.join(output_dir, "chronos_data_results.csv")

with open(csv_file_path, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)

    # Write the header
    writer.writerow(
        [
            "dataset",
            "model",
            "eval_metrics/MSE[mean]",
            "eval_metrics/MSE[0.5]",
            "eval_metrics/MAE[0.5]",
            "eval_metrics/MASE[0.5]",
            "eval_metrics/MAPE[0.5]",
            "eval_metrics/sMAPE[0.5]",
            "eval_metrics/MSIS",
            "eval_metrics/RMSE[mean]",
            "eval_metrics/NRMSE[mean]",
            "eval_metrics/ND[0.5]",
            "eval_metrics/mean_weighted_sum_quantile_loss",
            "domain",
            "num_variates",
        ]
    )

## Evaluation

### Chronos

In [None]:
model_name = "chronos_bolt_small" # TODO: change to "chronos_t5_base" for the original Chronos model

model_path="amazon/chronos-bolt-small",
# TODO: use "amazon/chronos-t5-base" for the corresponding original Chronos model
# "amazon/chronos-bolt-tiny", "amazon/chronos-bolt-mini", "amazon/chronos-bolt-small", "amazon/chronos-bolt-base",
# "amazon/chronos-t5-tiny", "amazon/chronos-t5-mini", "amazon/chronos-t5-small",
# "amazon/chronos-t5-base", "amazon/chronos-t5-large",

In [None]:
ds["train"]

Dataset({
    features: ['id', 'timestamp', 'target'],
    num_rows: 8
})

In [None]:
from utils.chronos_predictor import ChronosPredictor
from utils.metrics import evaluate_metrics
from gluonts.model import evaluate_model, evaluate_forecasts
from gluonts.time_feature import get_seasonality

for ds_name in DATASET_CHRONOS:
        print(f"Processing dataset: {ds_name}")

        dataset = load_dataset(CHRONOS, ds_name, trust_remote_code=True)
        season_length = get_seasonality(dataset.freq)
        prediction_length = dataset["train"].metadata.prediction_length
        

        print(f"Dataset size: {len(dataset.test_data)}")
        predictor = ChronosPredictor(
            model_path=model_path,
            num_samples=20,
            prediction_length=dataset.prediction_length,
            # Change device_map to "cpu" to run on CPU or "cuda" to run on GPU
            device_map="cpu",
        )

        # Make predictions
        predictions = predictor.predict(
            test_data_input=dataset["train"],
            batch_size=512,
        )

        # Evaluate the predictions
        res = evaluate_metrics(predictions, dataset["train"])

        # Append the results to the CSV file
        with open(csv_file_path, "a", newline="") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(
                [
                    ds_name,
                    model_name,
                    res["MSE[mean]"][0],
                    res["MSE[0.5]"][0],
                    res["MAE[0.5]"][0],
                    res["MASE[0.5]"][0],
                    res["MAPE[0.5]"][0],
                    res["sMAPE[0.5]"][0],
                    res["MSIS"][0],
                    res["RMSE[mean]"][0],
                    res["NRMSE[mean]"][0],
                    res["ND[0.5]"][0],
                    res["mean_weighted_sum_quantile_loss"][0],
                    ds_name,
                    dataset_properties_map[ds_name]["num_variates"],
                ]
            )

        print(f"Results for {ds_name} have been written to {csv_file_path}")