In [1]:
import os
from src.evaluation.save_results import create_output_file, save_results

output_file_name = "moirai_small_test_results.csv"

output_dir="results/test"
csv_file_path = os.path.join(output_dir, output_file_name)

create_output_file(output_file_name, output_dir)


In [2]:
from gluonts.model import evaluate_model
from gluonts.time_feature import get_seasonality
from src.evaluation.load_chronos_data import load_data

In [3]:
from src.evaluation.metrics import get_metrics

# Instantiate the metrics
metrics = get_metrics()

### Data

In [4]:
import json
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

short_datasets = "solar/10T solar/H solar/D solar/W jena_weather/10T jena_weather/H jena_weather/D " \
"bitbrains_fast_storage/5T bitbrains_fast_storage/H bitbrains_rnd/5T bitbrains_rnd/H bizitobs_application " \
"bizitobs_service bizitobs_l2c/5T bizitobs_l2c/H"

med_long_datasets = "solar/10T solar/H jena_weather/10T jena_weather/H " \
"bitbrains_fast_storage/5T bitbrains_rnd/5T bizitobs_application bizitobs_service bizitobs_l2c/5T bizitobs_l2c/H"

# Get union of short and med_long datasets
all_datasets = list(set(short_datasets.split() + med_long_datasets.split()))

dataset_properties_map = json.load(open("./data/dataset_properties.json"))

In [5]:
from src.evaluation.load_data import load_gift_data

load_gift_data()

  from .autonotebook import tqdm as notebook_tqdm
For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.
Fetching 175 files: 100%|██████████| 175/175 [00:00<00:00, 1394.27it/s]

Available datasets in GIFT_EVAL:
- LOOP_SEATTLE/5T
- LOOP_SEATTLE/D
- LOOP_SEATTLE/H
- M_DENSE/D
- M_DENSE/H
- SZ_TAXI/15T
- SZ_TAXI/H
- bitbrains_fast_storage/5T
- bitbrains_fast_storage/H
- bitbrains_rnd/5T
- bitbrains_rnd/H
- bizitobs_application
- bizitobs_l2c/5T
- bizitobs_l2c/H
- bizitobs_service
- car_parts_with_missing
- covid_deaths
- electricity/15T
- electricity/D
- electricity/H
- electricity/W
- ett1/15T
- ett1/D
- ett1/H
- ett1/W
- ett2/15T
- ett2/D
- ett2/H
- ett2/W
- hierarchical_sales/D
- hierarchical_sales/W
- hospital
- jena_weather/10T
- jena_weather/D
- jena_weather/H
- kdd_cup_2018_with_missing/D
- kdd_cup_2018_with_missing/H
- m4_daily
- m4_hourly
- m4_monthly
- m4_quarterly
- m4_weekly
- m4_yearly
- restaurant
- saugeenday/D
- saugeenday/M
- saugeenday/W
- solar/10T
- solar/D
- solar/H
- solar/W
- temperature_rain_with_missing
- us_births/D
- us_births/M
- us_births/W





In [6]:
DATASET_NAME = [
    'bizitobs_l2c/5T',
    'bitbrains_fast_storage/5T',
    'bitbrains_rnd/H',
    'bizitobs_l2c/H',
    'jena_weather/H',
    'bizitobs_application',
    'bizitobs_service',
    'bitbrains_fast_storage/H',
    'solar/D',
    'jena_weather/10T',
    'solar/W',
    'jena_weather/D',
    'bitbrains_rnd/5T',
    'solar/10T',
    'solar/H',
]

In [7]:
CHRONOS_DATASET_NAME = ["exchange_rate", "ercot", "dominick"]

### Evaluation

In [8]:
MODEL_PATH = "moirai_checkpoints/"
MODEL_NAME = "moirai_small"
MODULE = "Salesforce/moirai-1.1-R-small"

CHECKPOINTS = [f"moirai_small_epoch_epoch={i}.ckpt" for i in range(1)]

In [9]:
import sys
sys.path.insert(0, "src")

In [10]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""

In [None]:
from src.evaluation.moirai_predictor import load_predictor
from src.gift_eval.data import Dataset

for train_step, model_name in enumerate(CHECKPOINTS):    
    for ds_name in CHRONOS_DATASET_NAME:
        print(f"Processing dataset: {ds_name}")

        terms = ["short", "medium", "long"]
        for term in terms:
            ds_config = f"{ds_name}/{term}"

            dataset, prediction_length, frequency, domain, num_variates = load_data(ds_name, term)
            season_length = get_seasonality(frequency)

            predictor = load_predictor(
                checkpoint=MODEL_PATH+model_name, 
                module=MODULE,
                prediction_length=prediction_length, 
                target_dim=num_variates,
                device_map="cpu"
                )

            res = evaluate_model(
                predictor,
                test_data=dataset,
                metrics=metrics,
                batch_size=1024,
                axis=None,
                mask_invalid_label=True,
                allow_nan_forecast=False,
                seasonality=season_length,
            )

            # Append the results to the CSV file
            save_results(res, ds_config, MODEL_NAME, train_step, domain, num_variates, ds_name, csv_file_path)

    for ds_name in DATASET_NAME:
        ds_key = ds_name.split("/")[0]
        print(f"Processing dataset: {ds_name}")
        terms = ["short", "medium", "long"]
        for term in terms:
            if (
                term == "medium" or term == "long"
            ) and ds_name not in med_long_datasets.split():
                continue

            if "/" in ds_name:
                ds_key = ds_name.split("/")[0]
                ds_freq = ds_name.split("/")[1]
                ds_key = ds_key.lower()
            else:
                ds_key = ds_name.lower()
                ds_freq = dataset_properties_map[ds_key]["frequency"]
            ds_config = f"{ds_key}/{ds_freq}/{term}"

            # Initialize the dataset
            to_univariate = (
                False
                if Dataset(name=ds_name, term=term, to_univariate=False).target_dim == 1
                else True
            )
            dataset = Dataset(name=ds_name, term=term, to_univariate=to_univariate)
            season_length = get_seasonality(dataset.freq)

            predictor = load_predictor(
                checkpoint=MODEL_PATH+model_name, 
                module=MODULE,
                prediction_length=prediction_length, 
                target_dim=num_variates,
                device_map="cpu"
                )

            # Measure the time taken for evaluation
            res = evaluate_model(
                predictor,
                test_data=dataset.test_data,
                metrics=metrics,
                batch_size=1024,
                axis=None,
                mask_invalid_label=True,
                allow_nan_forecast=False,
                seasonality=season_length,
            )

            # Append the results to the CSV file
            save_results(res, ds_config, MODEL_NAME, train_step, domain, num_variates, ds_name, csv_file_path)


Processing dataset: exchange_rate


0it [00:00, ?it/s]