to load data:

huggingface-cli download Salesforce/GiftEval --repo-type=dataset --local-dir data/gift_benchmark

In [7]:
from gift_eval.data import Dataset

import os
from dotenv import load_dotenv
from pathlib import Path

# Load environment variables
load_dotenv()

# Get the GIFT_EVAL path from environment variables
gift_eval_path = os.getenv("GIFT_EVAL")

if gift_eval_path:
    # Convert to Path object for easier manipulation
    gift_eval_path = Path(gift_eval_path)

    # Get all subdirectories (dataset names) in the GIFT_EVAL path
    dataset_names = []
    for dataset_dir in gift_eval_path.iterdir():
        if dataset_dir.name.startswith("."):
            continue
        if dataset_dir.is_dir():
            freq_dirs = [d for d in dataset_dir.iterdir() if d.is_dir()]
            if freq_dirs:
                for freq_dir in freq_dirs:
                    dataset_names.append(f"{dataset_dir.name}/{freq_dir.name}")
            else:
                dataset_names.append(dataset_dir.name)

    print("Available datasets in GIFT_EVAL:")
    for name in sorted(dataset_names):
        print(f"- {name}")
else:
    print(
        "GIFT_EVAL path not found in environment variables. Please check your .env file."
    )

Available datasets in GIFT_EVAL:
- LOOP_SEATTLE/5T
- LOOP_SEATTLE/D
- LOOP_SEATTLE/H
- M_DENSE/D
- M_DENSE/H
- SZ_TAXI/15T
- SZ_TAXI/H
- bitbrains_fast_storage/5T
- bitbrains_fast_storage/H
- bitbrains_rnd/5T
- bitbrains_rnd/H
- bizitobs_application
- bizitobs_l2c/5T
- bizitobs_l2c/H
- bizitobs_service
- car_parts_with_missing
- covid_deaths
- electricity/15T
- electricity/D
- electricity/H
- electricity/W
- ett1/15T
- ett1/D
- ett1/H
- ett1/W
- ett2/15T
- ett2/D
- ett2/H
- ett2/W
- hierarchical_sales/D
- hierarchical_sales/W
- hospital
- jena_weather/10T
- jena_weather/D
- jena_weather/H
- kdd_cup_2018_with_missing/D
- kdd_cup_2018_with_missing/H
- m4_daily
- m4_hourly
- m4_monthly
- m4_quarterly
- m4_weekly
- m4_yearly
- restaurant
- saugeenday/D
- saugeenday/M
- saugeenday/W
- solar/10T
- solar/D
- solar/H
- solar/W
- temperature_rain_with_missing
- us_births/D
- us_births/M
- us_births/W


In [8]:
DATASET_GIFT_EVAL = [
    "bitbrains_fast_storage/5T",
    "bitbrains_fast_storage/H",
    "bitbrains_rnd/5T",
    "bitbrains_rnd/H",

    "bizitobs_application",
    "bizitobs_l2c/5T",
    "bizitobs_l2c/H",
    "bizitobs_service",

    "jena_weather/10T",
    "jena_weather/D",
    "jena_weather/H",

    "solar/10T",
    "solar/D",
    "solar/H",
    "solar/W",
]

In [9]:
import pandas as pd

In [10]:
DATASET_GIFT_EVAL = [
    "bitbrains_fast_storage/5T",
]

In [11]:
for ds_name in DATASET_GIFT_EVAL:
    # Load the dataset
    to_univariate = False  # Whether to convert the data to univariate
    term = "short"

    dataset = Dataset(name=ds_name, term=term, to_univariate=to_univariate)

    train_split_iter = dataset.training_dataset
    val_split_iter = dataset.validation_dataset
    test_split_iter = dataset.test_data

    train_data = [x for x in train_split_iter]
    train_df = pd.DataFrame(train_data)
    train_df["set"] = "train"

    val_data = [x for x in val_split_iter]
    val_df = pd.DataFrame(val_data)
    val_df["set"] = "val"

    test_data = []
    for x in test_split_iter:
        x0, x1 = x
        
        test_data.append(x0)
        test_data.append(x1)
    test_df = pd.DataFrame(test_data)
    test_df["set"] = "test"

    # concatenate the dataframes
    df = pd.concat([train_df, val_df, test_df], ignore_index=True)

    """
    # save the dataframe to a CSV file
    name = ds_name.replace("/", "_")
    df.to_csv(f"data/gift_eval/{name}.csv", index=True)

    print(f"Saved {name} dataset to data/{name}.csv")
    """

  freq = norm_freq_str(to_offset(self.freq).name)
  return pd.Period(val, freq)


In [12]:
test_data[0]["past_feat_dynamic_real"].dtype

dtype('float32')

In [13]:
df["target"][0]

array([[708.93317 , 703.73315 , 695.06647 , ..., 686.39984 , 693.3331  ,
        714.1331  ],
       [ 13.633333,  13.533334,  13.366667, ...,  13.2     ,  13.333333,
         13.733334]], shape=(2, 7728), dtype=float32)