In [1]:
import os
from src.evaluation.save_results import create_output_file, save_results

output_file_name = "chronos_bolt_base_test_results1.csv"

output_dir="results/test"
csv_file_path = os.path.join(output_dir, output_file_name)

create_output_file(output_file_name, output_dir)


In [2]:
from gluonts.model import evaluate_model
from gluonts.time_feature import get_seasonality
from src.evaluation.load_chronos_data import load_data

In [3]:
from src.evaluation.metrics import get_metrics

# Instantiate the metrics
metrics = get_metrics()

### Data

In [4]:
import json
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

short_datasets = "solar/10T solar/H solar/D solar/W jena_weather/10T jena_weather/H jena_weather/D " \
"bitbrains_fast_storage/5T bitbrains_fast_storage/H bitbrains_rnd/5T bitbrains_rnd/H bizitobs_application " \
"bizitobs_service bizitobs_l2c/5T bizitobs_l2c/H"

med_long_datasets = "solar/10T solar/H jena_weather/10T jena_weather/H " \
"bitbrains_fast_storage/5T bitbrains_rnd/5T bizitobs_application bizitobs_service bizitobs_l2c/5T bizitobs_l2c/H"

# Get union of short and med_long datasets
all_datasets = list(set(short_datasets.split() + med_long_datasets.split()))

dataset_properties_map = json.load(open("data/dataset_properties.json"))

In [5]:
from src.evaluation.load_data import load_gift_data

load_gift_data()

  from .autonotebook import tqdm as notebook_tqdm


Available datasets in /raid/decaro/TimeSeriesForecastingFoundationModels/data/gift_benchmark:
- LOOP_SEATTLE/5T
- LOOP_SEATTLE/D
- LOOP_SEATTLE/H
- M_DENSE/D
- M_DENSE/H
- SZ_TAXI/15T
- SZ_TAXI/H
- bitbrains_fast_storage/5T
- bitbrains_fast_storage/H
- bitbrains_rnd/5T
- bitbrains_rnd/H
- bizitobs_application
- bizitobs_l2c/5T
- bizitobs_l2c/H
- bizitobs_service
- car_parts_with_missing
- covid_deaths
- electricity/15T
- electricity/D
- electricity/H
- electricity/W
- ett1/15T
- ett1/D
- ett1/H
- ett1/W
- ett2/15T
- ett2/D
- ett2/H
- ett2/W
- hierarchical_sales/D
- hierarchical_sales/W
- hospital
- jena_weather/10T
- jena_weather/D
- jena_weather/H
- kdd_cup_2018_with_missing/D
- kdd_cup_2018_with_missing/H
- m4_daily
- m4_hourly
- m4_monthly
- m4_quarterly
- m4_weekly
- m4_yearly
- restaurant
- saugeenday/D
- saugeenday/M
- saugeenday/W
- solar/10T
- solar/D
- solar/H
- solar/W
- temperature_rain_with_missing
- us_births/D
- us_births/M
- us_births/W


In [6]:
DATASET_NAME = [
    'bizitobs_l2c/5T',
    'bitbrains_fast_storage/5T',
    'bitbrains_rnd/H',
    'bizitobs_l2c/H',
    'jena_weather/H',
    'bizitobs_application',
    'bizitobs_service',
    'bitbrains_fast_storage/H',
    'solar/D',
    'jena_weather/10T',
    'solar/W',
    'jena_weather/D',
    'bitbrains_rnd/5T',
    'solar/10T',
    'solar/H',
]

In [7]:
CHRONOS_DATASET_NAME = ["exchange_rate", "ercot", "dominick"]

### Evaluation

In [8]:
MODEL_PATH = "/raid/decaro/TimeSeriesForecastingFoundationModels/chronos_output/chronos-bolt-base/run-0/"
MODEL_NAME = "chronos-bolt-base"

CHECKPOINTS = [f"checkpoint-{i*10000}" for i in range(12, 20+1)]

In [9]:
import sys
sys.path.insert(0, "src")

In [10]:
from src.evaluation.chronos_predictor import ChronosPredictor
from src.gift_eval.data import Dataset

for model_name in CHECKPOINTS:
    train_step = int(model_name.split("-")[-1])
    
    for ds_name in CHRONOS_DATASET_NAME:
        if train_step  == 120_000:
            continue
    
        print(f"Processing dataset: {ds_name}")

        terms = ["short", "medium", "long"]
        for term in terms:
            ds_config = f"{ds_name}/{term}"

            dataset, prediction_length, frequency, domain, num_variates = load_data(ds_name, term)
            season_length = get_seasonality(frequency)

            predictor = ChronosPredictor(
                model_path=MODEL_PATH+model_name,
                num_samples=20,
                prediction_length=prediction_length,
                device_map="cuda",
            )

            res = evaluate_model(
                predictor,
                test_data=dataset,
                metrics=metrics,
                batch_size=512,
                axis=None,
                mask_invalid_label=True,
                allow_nan_forecast=False,
                seasonality=season_length,
            )

            # Append the results to the CSV file
            save_results(res, ds_config, MODEL_NAME, train_step, domain, num_variates, ds_name, csv_file_path)

    for ds_name in DATASET_NAME:
        if train_step == 120_000 and ds_name == 'bizitobs_l2c/5T':
            continue

        ds_key = ds_name.split("/")[0]
        print(f"Processing dataset: {ds_name}")
        terms = ["short", "medium", "long"]
        for term in terms:
            if (
                term == "medium" or term == "long"
            ) and ds_name not in med_long_datasets.split():
                continue

            if "/" in ds_name:
                ds_key = ds_name.split("/")[0]
                ds_freq = ds_name.split("/")[1]
                ds_key = ds_key.lower()
            else:
                ds_key = ds_name.lower()
                ds_freq = dataset_properties_map[ds_key]["frequency"]
            ds_config = f"{ds_key}/{ds_freq}/{term}"

            # Initialize the dataset
            to_univariate = (
                False
                if Dataset(name=ds_name, term=term, to_univariate=False).target_dim == 1
                else True
            )
            dataset = Dataset(name=ds_name, term=term, to_univariate=to_univariate)
            season_length = get_seasonality(dataset.freq)

            predictor = ChronosPredictor(
                model_path=MODEL_PATH+model_name,
                num_samples=20,
                prediction_length=dataset.prediction_length,
                device_map="cuda",
            )

            # Measure the time taken for evaluation
            res = evaluate_model(
                predictor,
                test_data=dataset.test_data,
                metrics=metrics,
                batch_size=1024,
                axis=None,
                mask_invalid_label=True,
                allow_nan_forecast=False,
                seasonality=season_length,
            )

            domain = dataset_properties_map[ds_key]["domain"]
            num_variates = dataset_properties_map[ds_key]["num_variates"]

            # Append the results to the CSV file
            save_results(res, ds_config, MODEL_NAME, train_step, domain, num_variates, ds_name, csv_file_path)


Processing dataset: bitbrains_fast_storage/5T


  offset = pd.tseries.frequencies.to_offset(freq)
  freq = norm_freq_str(to_offset(self.freq).name)
Unknown architecture: T5ForConditionalGeneration, defaulting to ChronosBoltModelForForecasting


prediction_length: 48


Some weights of ChronosBoltModelForForecasting were not initialized from the model checkpoint at /raid/decaro/TimeSeriesForecastingFoundationModels/chronos_output/chronos-bolt-base/run-0/checkpoint-120000 and are newly initialized because the shapes did not match:
- shared.weight: found shape torch.Size([4096, 768]) in the checkpoint and torch.Size([1, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0it [00:00, ?it/s]

  return pd.Period(val, freq)


1it [00:22, 22.39s/it]

2it [00:44, 22.23s/it]

3it [01:07, 22.43s/it]

4it [01:28, 22.07s/it]

5it [01:51, 22.18s/it]

6it [02:12, 21.95s/it]

7it [02:34, 21.82s/it]

8it [02:56, 22.09s/it]

9it [03:18, 22.04s/it]

10it [03:40, 21.98s/it]

11it [04:02, 21.82s/it]

12it [04:24, 21.88s/it]

13it [04:45, 21.76s/it]

14it [05:07, 21.76s/it]

15it [05:29, 21.88s/it]

16it [05:51, 21.93s/it]

17it [06:14, 22.14s/it]

18it [06:36, 22.13s/it]

19it [06:58, 22.07s/it]

20it [07:20, 22.05s/it]

21it [07:42, 22.11s/it]

22it [08:04, 22.04s/it]

23it [08:26, 21.97s/it]

24it [08:48, 21.98s/it]

25it [09:10, 22.06s/it]

26it [09:32, 22.15s/it]

27it [09:56, 22.74s/it]

28it [10:22, 23.64s/it]

29it [10:51, 25.19s/it]

30it [11:16, 25.11s/it]

31it [11:42, 25.55s/it]

32it [12:09, 25.92s/it]

33it [12:36, 26.22s/it]

34it [13:02, 26.15s/it]

35it [13:29, 26.28s/it]

36it [13:54, 25.98s/it]

37it [14:22, 26.47s/it]

38it [14:46, 25.83s/it]

39it [15:10, 25.23s/it]

40it [15:34, 24.81s/it]

41it [16:00, 25.39s/it]

42it [16:24, 24.98s/it]

43it [16:49, 24.93s/it]

44it [17:11, 24.08s/it]

44it [17:11, 23.45s/it]




0it [00:00, ?it/s]

1024it [00:02, 375.94it/s]

2048it [00:05, 383.84it/s]

3072it [00:07, 387.77it/s]

4096it [00:10, 388.50it/s]

5120it [00:13, 387.51it/s]

6144it [00:15, 388.29it/s]

7168it [00:18, 388.63it/s]

8192it [00:21, 388.91it/s]

9216it [00:23, 389.62it/s]

10240it [00:26, 389.76it/s]

11264it [00:29, 389.59it/s]

12288it [00:31, 389.21it/s]

13312it [00:34, 389.29it/s]

14336it [00:36, 389.82it/s]

15360it [00:39, 389.74it/s]

16384it [00:42, 389.69it/s]

17408it [00:45, 372.95it/s]

18432it [00:47, 377.89it/s]

19456it [00:50, 380.69it/s]

20480it [00:53, 383.15it/s]

21504it [00:55, 382.49it/s]

22528it [00:58, 381.86it/s]

23552it [01:01, 383.14it/s]

24576it [01:03, 385.22it/s]

25600it [01:06, 386.37it/s]

26624it [01:08, 386.96it/s]

27648it [01:11, 388.01it/s]

28672it [01:14, 388.37it/s]

29696it [01:16, 389.08it/s]

30720it [01:19, 388.46it/s]

31744it [01:22, 388.82it/s]

32768it [01:24, 388.46it/s]

33792it [01:27, 387.92it/s]

34816it [01:30, 388.35it/s]

35840it [01:32, 387.97it/s]

36864it [01:35, 388.04it/s]

37888it [01:37, 388.26it/s]

38912it [01:40, 388.68it/s]

39936it [01:43, 388.89it/s]

40960it [01:45, 389.08it/s]

41984it [01:48, 389.01it/s]

43008it [01:51, 388.56it/s]

44032it [01:53, 388.12it/s]

45000it [01:56, 388.25it/s]

45000it [01:56, 387.01it/s]


  res["MSE[mean]"][0],
  res["MSE[0.5]"][0],
  res["MAE[0.5]"][0],
  res["MASE[0.5]"][0],
  res["MAPE[0.5]"][0],
  res["sMAPE[0.5]"][0],
  res["MSIS"][0],
  res["RMSE[mean]"][0],
  res["NRMSE[mean]"][0],
  res["ND[0.5]"][0],
  res["mean_weighted_sum_quantile_loss"][0],


Results for bitbrains_fast_storage/5T have been written


  offset = pd.tseries.frequencies.to_offset(freq)
  freq = norm_freq_str(to_offset(self.freq).name)
Unknown architecture: T5ForConditionalGeneration, defaulting to ChronosBoltModelForForecasting


prediction_length: 480


Some weights of ChronosBoltModelForForecasting were not initialized from the model checkpoint at /raid/decaro/TimeSeriesForecastingFoundationModels/chronos_output/chronos-bolt-base/run-0/checkpoint-120000 and are newly initialized because the shapes did not match:
- shared.weight: found shape torch.Size([4096, 768]) in the checkpoint and torch.Size([1, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0it [00:00, ?it/s]



1it [03:05, 185.45s/it]