In [None]:
!pip install dotenv
!pip install gluonts
!pip install --upgrade datasets
!pip install utilsforecast
!pip install lightning
!pip install jaxtyping
!pip install hydra-core
!pip install --upgrade transformers huggingface_hub

In [None]:
!git clone https://github.com/GiuliaGhisolfi/TSFM-ZeroShotEval
%cd TSFM-ZeroShotEval/src

In [None]:
%env PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

### Import Data

In [None]:
import json
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

short_datasets = "solar/10T solar/H solar/D solar/W jena_weather/10T jena_weather/H jena_weather/D " \
"bitbrains_fast_storage/5T bitbrains_fast_storage/H bitbrains_rnd/5T bitbrains_rnd/H bizitobs_application " \
"bizitobs_service bizitobs_l2c/5T bizitobs_l2c/H"

med_long_datasets = "solar/10T solar/H jena_weather/10T jena_weather/H " \
"bitbrains_fast_storage/5T bitbrains_rnd/5T bizitobs_application bizitobs_service bizitobs_l2c/5T bizitobs_l2c/H"

pretty_names = {
    "saugeenday": "saugeen",
    "temperature_rain_with_missing": "temperature_rain",
    "kdd_cup_2018_with_missing": "kdd_cup_2018",
    "car_parts_with_missing": "car_parts",
}

# Get union of short and med_long datasets
all_datasets = list(set(short_datasets.split() + med_long_datasets.split()))

dataset_properties_map = json.load(open("data/dataset_properties.json"))

In [None]:
from utils.load_data import load_gift_data

load_gift_data()

In [None]:
CHRONOS_DATASET_NAME = ["exchange_rate", "ercot", "dominick"]

### Results File

In [None]:
import os
import csv

In [None]:
output_dir = "results"
output_file_name = "inference_time.csv"

In [None]:
# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Define the path for the CSV file
csv_file_path = os.path.join(output_dir, output_file_name)

with open(csv_file_path, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)

    # Write the header
    writer.writerow(
        [
            "model",
            "dataset",
            "trial",
            "inference_time",
            "domain",
            "num_variates",
            "prediction_length",
            "frequency"
        ]
)

In [None]:
def save_results(model_name, ds_name, i, end, start, domain, num_variates, prediction_length, frequency):
    # Append the results to the CSV file
    with open(csv_file_path, "a", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(
            [
                model_name,
                ds_name,
                i,
                end-start,
                domain,
                num_variates,
                prediction_length,
                frequency
            ]
        )

    print(f"Results for {ds_name} have been written")

## Inference

In [None]:
import time
from gift_eval.data import Dataset
from utils.load_chronos_data import load_data

### Chronos

In [None]:
CHRONOS_MODEL_NAME = ["chronos_bolt_tiny", "chronos_bolt_mini", "chronos_bolt_small", "chronos_bolt_base"]

CHRONOS_MODEL = ["amazon/chronos-bolt-tiny", "amazon/chronos-bolt-mini",
    "amazon/chronos-bolt-small", "amazon/chronos-bolt-base"]
# "amazon/chronos-t5-tiny", "amazon/chronos-t5-mini", "amazon/chronos-t5-small",
# "amazon/chronos-t5-base", "amazon/chronos-t5-large",

In [None]:
from utils.chronos_predictor import ChronosPredictor

for model_name, model_path in zip(CHRONOS_MODEL_NAME, CHRONOS_MODEL):
    # Chronos Datasets
    for ds_name in CHRONOS_DATASET_NAME:
        print(f"Processing dataset: {ds_name}")

        terms = ["short", "medium", "long"]
        for term in terms:
            ds_config = f"{ds_name}/{term}"
            for i in range(10):

                dataset, prediction_length, frequency, domain, num_variates = load_data(ds_name, term)

                # Init predictor
                predictor = ChronosPredictor(
                    model_path=model_path,
                    num_samples=20,
                    prediction_length=prediction_length,
                    # Change device_map to "cpu" to run on CPU or "cuda" to run on GPU
                    device_map="cuda",
                )

                start = time.time()
                forecasts = predictor.predict(dataset.input)
                end = time.time()

                # Append the results to the CSV file
                save_results(model_name, ds_config, i, end, start, domain, num_variates, prediction_length, frequency)

    # GIFT-Eval Datasets
    for ds_name in all_datasets:
        print(f"Processing dataset: {ds_name}")

        terms = ["short", "medium", "long"]
        for term in terms:
            if (
                term == "medium" or term == "long"
            ) and ds_name not in med_long_datasets.split():
                continue

            if "/" in ds_name:
                ds_key = ds_name.split("/")[0]
                ds_freq = ds_name.split("/")[1]
                ds_key = ds_key.lower()
                ds_key = pretty_names.get(ds_key, ds_key)
            else:
                ds_key = ds_name.lower()
                ds_key = pretty_names.get(ds_key, ds_key)
                ds_freq = dataset_properties_map[ds_key]["frequency"]
            ds_config = f"{ds_key}/{ds_freq}/{term}"

            for i in range(10):
                # Initialize the dataset
                to_univariate = (
                    False
                    if Dataset(name=ds_name, term=term, to_univariate=False).target_dim == 1
                    else True
                )
                dataset = Dataset(name=ds_name, term=term, to_univariate=to_univariate)
                prediction_length = dataset.prediction_length
                frequency = ds_freq
                domain  = dataset_properties_map[ds_key]["domain"],
                num_variates = dataset_properties_map[ds_key]["num_variates"],

                # Init predictor
                predictor = ChronosPredictor(
                    model_path=model_path,
                    num_samples=20,
                    prediction_length=prediction_length,
                    # Change device_map to "cpu" to run on CPU or "cuda" to run on GPU
                    device_map="cuda",
                )

                start = time.time()
                forecasts = predictor.predict(dataset.test_data.input)
                end = time.time()

                # Append the results to the CSV file
                save_results(model_name, ds_config, i, end, start, domain, num_variates, prediction_length, frequency)

### Moirai

In [None]:
MOIRAI_MODEL_NAME = ["moirai_small", "moirai_base", "moirai_large"]

MOIRAI_MODEL = ["Salesforce/moirai-1.1-R-small", "Salesforce/moirai-1.1-R-base", 
    "Salesforce/moirai-1.1-R-large"]
#"Salesforce/moirai-moe-1.0-R-base", "Salesforce/moirai-moe-1.0-R-small"

In [None]:
from utils.moirai_predictor import load_predictor

for model_name, model_path in zip(MOIRAI_MODEL_NAME, MOIRAI_MODEL):
    # Chronos Datasets
    for ds_name in CHRONOS_DATASET_NAME:
        print(f"Processing dataset: {ds_name}")

        terms = ["short", "medium", "long"]
        for term in terms:
            ds_config = f"{ds_name}/{term}"
            for i in range(10):

                dataset, prediction_length, frequency, domain, num_variates = load_data(ds_name, term)

                # Init predictor
                predictor = load_predictor(model_path, prediction_length, num_variates)

                start = time.time()
                forecasts = predictor.predict(dataset.input)
                end = time.time()

                # Append the results to the CSV file
                save_results(model_name, ds_config, i, end, start, domain, num_variates, prediction_length, frequency)

    # GIFT-Eval Datasets
    for ds_name in all_datasets:
        print(f"Processing dataset: {ds_name}")

        terms = ["short", "medium", "long"]
        for term in terms:
            if (
                term == "medium" or term == "long"
            ) and ds_name not in med_long_datasets.split():
                continue

            if "/" in ds_name:
                ds_key = ds_name.split("/")[0]
                ds_freq = ds_name.split("/")[1]
                ds_key = ds_key.lower()
                ds_key = pretty_names.get(ds_key, ds_key)
            else:
                ds_key = ds_name.lower()
                ds_key = pretty_names.get(ds_key, ds_key)
                ds_freq = dataset_properties_map[ds_key]["frequency"]
            ds_config = f"{ds_key}/{ds_freq}/{term}"

            for i in range(10):
                # Initialize the dataset
                to_univariate = (
                    False
                    if Dataset(name=ds_name, term=term, to_univariate=False).target_dim == 1
                    else True
                )
                dataset = Dataset(name=ds_name, term=term, to_univariate=to_univariate)
                prediction_length = dataset.prediction_length
                frequency = ds_freq
                domain  = dataset_properties_map[ds_key]["domain"],
                num_variates = dataset_properties_map[ds_key]["num_variates"],

                # Init predictor
                predictor = load_predictor(model_path, prediction_length, num_variates)

                start = time.time()
                forecasts = predictor.predict(dataset.test_data.input)
                end = time.time()

                # Append the results to the CSV file
                save_results(model_name, ds_config, i, end, start, domain, num_variates, prediction_length, frequency)

### TimesFM

In [None]:
TIMESFM_MODEL_NAME = ["timesfm2", "timesfm1"]

TIMESFM_MODEL = ["google/timesfm-2.0-500m-pytorch", "google/timesfm-1.0-200m-pytorch"]

In [None]:
from utils.timesfm_predictor import load_predictor

for model_name, model_path in zip(TIMESFM_MODEL_NAME, TIMESFM_MODEL):
    # Chronos Datasets
    for ds_name in CHRONOS_DATASET_NAME:
        print(f"Processing dataset: {ds_name}")

        terms = ["short", "medium", "long"]
        for term in terms:
            ds_config = f"{ds_name}/{term}"
            for i in range(10):

                dataset, prediction_length, frequency, domain, num_variates = load_data(ds_name, term)

                # Init predictor
                predictor = load_predictor(model_path, prediction_length, frequency)

                start = time.time()
                forecasts = predictor.predict(dataset.input)
                end = time.time()

                # Append the results to the CSV file
                save_results(model_name, ds_config, i, end, start, domain, num_variates, prediction_length, frequency)

    # GIFT-Eval Datasets
    for ds_name in all_datasets:
        print(f"Processing dataset: {ds_name}")

        terms = ["short", "medium", "long"]
        for term in terms:
            if (
                term == "medium" or term == "long"
            ) and ds_name not in med_long_datasets.split():
                continue

            if "/" in ds_name:
                ds_key = ds_name.split("/")[0]
                ds_freq = ds_name.split("/")[1]
                ds_key = ds_key.lower()
                ds_key = pretty_names.get(ds_key, ds_key)
            else:
                ds_key = ds_name.lower()
                ds_key = pretty_names.get(ds_key, ds_key)
                ds_freq = dataset_properties_map[ds_key]["frequency"]
            ds_config = f"{ds_key}/{ds_freq}/{term}"

            for i in range(10):
                # Initialize the dataset
                to_univariate = (
                    False
                    if Dataset(name=ds_name, term=term, to_univariate=False).target_dim == 1
                    else True
                )
                dataset = Dataset(name=ds_name, term=term, to_univariate=to_univariate)
                prediction_length = dataset.prediction_length
                frequency = ds_freq
                domain  = dataset_properties_map[ds_key]["domain"],
                num_variates = dataset_properties_map[ds_key]["num_variates"],

                # Init predictor
                predictor = load_predictor(model_path, prediction_length, frequency)

                start = time.time()
                forecasts = predictor.predict(dataset.test_data.input)
                end = time.time()

                # Append the results to the CSV file
                save_results(model_name, ds_config, i, end, start, domain, num_variates, prediction_length, frequency)