# Running Gift-Eval with Credence

### Credence: Time‑Series Foundation Model

> **Note:** Our model was previously named *Kairos‑1.0*; its successor, **Credence**, is now released publicly with an API for external evaluation.  
> The rename also helps avoid confusion with other models that later adopted the Kairos name or other similar names.

This notebook lets you replicate the **GIFT‑Eval** evaluation using the **Credence API**.  
The model was internally tested and the official results are published on the GIFT‑Eval leaderboard.

To run your own tests:
1. Get an API key and follow setup instructions at [https://docs.credence.continualist.ai/docs](https://docs.credence.continualist.ai/docs).  
2. Use this notebook to call the API and run evaluations.

⚠️ Usage note:   
By default, accounts include limited credits for evaluation due to associated compute costs, so more credits maybe needed for testing all GIFT-Eval datasets.  
For reporting issues or extended testing, email **[team@continualist.ai](team@continualist.ai)**.

In [1]:
# Install requests if not already installed
%cd ..
! pip install requests

/Users/geremiapompei/Desktop/Work/ContinualIST/gift-eval
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m


After obtaining your API key from the Credence dashboard at https://app.credence.continualist.ai, paste it below to authenticate your API requests

In [2]:
API_KEY = "<YOUR_CREDENCE_API_KEY_HERE>"

### Installation

Import the necessary third-party dependencies

In [4]:
import os
import csv
from gift_eval.data import Dataset
from gluonts.ev.metrics import (
    MAE, MAPE, MASE, MSE, MSIS, ND, NRMSE, RMSE, SMAPE, MeanWeightedSumQuantileLoss,)
from gluonts.model.forecast import QuantileForecast
from gluonts.model import evaluate_model
import json
import numpy as np
import pandas as pd
import random
import requests
import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


Prepare the configurations

In [5]:
# Path configurations
out_dir = 'results/Credence'

# Model
model_name = "Credence"

# Auxiliary configurations
seed = 0
batch_size = 16

In [6]:
# Experiment configurations
short_datasets = "m4_yearly m4_quarterly m4_monthly m4_weekly m4_daily m4_hourly electricity/15T electricity/H electricity/D electricity/W solar/10T solar/H solar/D solar/W hospital covid_deaths us_births/D us_births/M us_births/W saugeenday/D saugeenday/M saugeenday/W temperature_rain_with_missing kdd_cup_2018_with_missing/H kdd_cup_2018_with_missing/D car_parts_with_missing restaurant hierarchical_sales/D hierarchical_sales/W LOOP_SEATTLE/5T LOOP_SEATTLE/H LOOP_SEATTLE/D SZ_TAXI/15T SZ_TAXI/H M_DENSE/H M_DENSE/D ett1/15T ett1/H ett1/D ett1/W ett2/15T ett2/H ett2/D ett2/W jena_weather/10T jena_weather/H jena_weather/D bitbrains_fast_storage/5T bitbrains_fast_storage/H bitbrains_rnd/5T bitbrains_rnd/H bizitobs_application bizitobs_service bizitobs_l2c/5T bizitobs_l2c/H"
med_long_datasets = "electricity/15T electricity/H solar/10T solar/H kdd_cup_2018_with_missing/H LOOP_SEATTLE/5T LOOP_SEATTLE/H SZ_TAXI/15T M_DENSE/H ett1/15T ett1/H ett2/15T ett2/H jena_weather/10T jena_weather/H bitbrains_fast_storage/5T bitbrains_rnd/5T bizitobs_application bizitobs_service bizitobs_l2c/5T bizitobs_l2c/H"
dataset_properties = 'notebooks/dataset_properties.json'

In [7]:
# Auxiliary functions
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)


class CredenceAPI:

    def __init__(
        self,
        api_key: str,
        horizon: int,
        frequency: str = "H",
    ):
        self.api_key = api_key
        self.horizon = horizon
        self.frequency = frequency

    def predict(
        self,
        test_data,
        **kwargs
    ):
        preds = []
        for batch in test_data:
            quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
            context_input = []
            prev_value = 0
            for value in batch["target"].tolist():
                context_input.append(prev_value)
                prev_value = value if not np.isnan(value) else prev_value
            response = requests.post(
                "https://api.credence.continualist.ai/api/v1/credence/forecast",
                json={
                    "frequency": self.frequency,
                    "horizon": self.horizon,
                    "model_type": "large",
                    "quantiles": quantiles,
                    "sequence": [
                        {
                            "timestamp": (batch["start"] + i).strftime('%Y-%m-%dT%H:%M:%SZ'),
                            "target": v
                        }
                        for i, v in enumerate(context_input)
                    ]
                },
                headers={
                    "Content-Type": "application/json",
                    "x-api-key": self.api_key
                }
            )
            if response.status_code != 200:
                raise ValueError(f"Error: {response.text}")
            response = response.json()
            forecast_arrays = np.array([
                response["quantiles"][str(q)]
                for q in quantiles
            ])

            # Uncomment to see remaining credits
            # print(f"Remaining credits: {response['remaining_credits']}/{response['total_credits']} ({response['remaining_credits'] / response['total_credits'] * 100:.2f}%)")

            # pred has shape: batch, forecast_len, quantiles
            preds.append(
                QuantileForecast(
                    forecast_arrays=forecast_arrays,
                    forecast_keys=[str(q) for q in quantiles],
                    start_date=batch["start"] + len(context_input),
                )
            )

        return preds

Experiment wrapper

In [8]:
base_row = [
    "dataset",
    "model",
    "eval_metrics/MSE[mean]",
    "eval_metrics/MSE[0.5]",
    "eval_metrics/MAE[mean]",
    "eval_metrics/MAE[0.5]",
    "eval_metrics/MASE[0.5]",
    "eval_metrics/MAPE[0.5]",
    "eval_metrics/sMAPE[0.5]",
    "eval_metrics/MSIS",
    "eval_metrics/RMSE[mean]",
    "eval_metrics/NRMSE[mean]",
    "eval_metrics/ND[0.5]",
    "eval_metrics/mean_weighted_sum_quantile_loss",
    "domain",
    "num_variates"
]


def run_gift_eval(zs=False, save=False, verbose=True):
    set_seed(seed)

    # Get union of short and med_long datasets
    all_datasets = sorted(
        set(short_datasets.split() + med_long_datasets.split()))
    all_datasets = reversed(all_datasets)

    dataset_properties_map = json.load(open(dataset_properties))

    # Instantiate the metrics
    metrics = [
        MSE(forecast_type="mean"),
        MSE(forecast_type=0.5),
        MAE(forecast_type="mean"),
        MAE(forecast_type=0.5),
        MASE(),
        MAPE(),
        SMAPE(),
        MSIS(),
        RMSE(),
        NRMSE(),
        ND(),
        MeanWeightedSumQuantileLoss(
            quantile_levels=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
    ]

    # ## Evaluation
    # Define the path for the CSV file
    csv_file_path = os.path.join(out_dir, "all_results.csv")

    pretty_names = {
        "saugeenday": "saugeen",
        "temperature_rain_with_missing": "temperature_rain",
        "kdd_cup_2018_with_missing": "kdd_cup_2018",
        "car_parts_with_missing": "car_parts",
    }

    if not os.path.exists(csv_file_path) and save:
        with open(csv_file_path, "a", newline="") as csvfile:
            writer = csv.writer(csvfile)

            # Write the header
            writer.writerow(base_row)
    if save:
        df_res_done = pd.read_csv(csv_file_path)
        done_datasets = df_res_done["dataset"].values
    else:
        done_datasets = []
    df_res = pd.DataFrame(columns=base_row)

    # the zero-shot subset whithout data leakage for the chronos pretraining corpus (to fairly compare with tirex and chronos models)
    if zs:
        excluded = ["solar/H", "m4_monthly", "m4_weekly", "m4_daily", "m4_hourly", "electricity/15T", "electricity/H",
                    "electricity/W", "kdd_cup_2018_with_missing/D", "kdd_cup_2018_with_missing/H", "temperature_rain_with_missing"]
    else:
        excluded = []

    for ds_name in all_datasets:
        if ds_name in excluded:
            continue
        set_seed(seed)
        terms = ["short", "medium", "long"]
        for term in terms:
            if (term == "medium" or term == "long") and ds_name not in med_long_datasets.split():
                continue

            if "/" in ds_name:
                ds_key = ds_name.split("/")[0]
                ds_freq = ds_name.split("/")[1]
                ds_key = ds_key.lower()
                ds_key = pretty_names.get(ds_key, ds_key)
            else:
                ds_key = ds_name.lower()
                ds_key = pretty_names.get(ds_key, ds_key)
                ds_freq = dataset_properties_map[ds_key]["frequency"]
            ds_config = f"{ds_key}/{ds_freq}/{term}"

            to_univariate = (
                False
                if Dataset(name=ds_name, term=term, to_univariate=False).target_dim == 1
                else True
            )
            dataset = Dataset(name=ds_name, term=term,
                              to_univariate=to_univariate)

            all_lengths = []
            for x in dataset.test_data:
                if len(x[0]["target"].shape) == 1:
                    all_lengths.append(len(x[0]["target"]))
                else:
                    all_lengths.append(x[0]["target"].shape[1])

            if ds_config in done_datasets:
                df_res = df_res._append(
                    df_res_done.loc[df_res_done['dataset'] == ds_config], ignore_index=True)
                continue

            if verbose:
                print(
                    f"Dataset: {ds_name}, Freq = {dataset.freq}, H = {dataset.prediction_length}")

            # Evaluate
            model = CredenceAPI(
                api_key=API_KEY,
                horizon=dataset.prediction_length,
                frequency=dataset.freq,
            )
            res = evaluate_model(
                model,
                test_data=dataset.test_data,
                metrics=metrics,
                batch_size=batch_size,
                axis=None,
                mask_invalid_label=True,
                allow_nan_forecast=False,
            )
            if verbose:
                print(f'MASE: {res["MASE[0.5]"][0]}')

            # Append the results to the CSV file
            row = [
                ds_config,
                model_name,
                res["MSE[mean]"][0],
                res["MSE[0.5]"][0],
                res["MAE[mean]"][0],
                res["MAE[0.5]"][0],
                res["MASE[0.5]"][0],
                res["MAPE[0.5]"][0],
                res["sMAPE[0.5]"][0],
                res["MSIS"][0],
                res["RMSE[mean]"][0],
                res["NRMSE[mean]"][0],
                res["ND[0.5]"][0],
                res["mean_weighted_sum_quantile_loss"][0],
                dataset_properties_map[ds_key]["domain"],
                dataset_properties_map[ds_key]["num_variates"],
            ]
            if save:
                with open(csv_file_path, "a", newline="") as csvfile:
                    writer = csv.writer(csvfile)
                    writer.writerow(row)
                if verbose:
                    print(
                        f"Results for {ds_name} have been written to {csv_file_path}")
            df_res.loc[len(df_res)] = row

    # Print Results
    seasonal_naive = pd.read_csv(
        f"results/seasonal_naive/all_results.csv").sort_values('dataset')
    dataset = seasonal_naive['dataset'].to_list()
    seasonal_naive_mase = seasonal_naive[f'eval_metrics/MASE[0.5]'].to_numpy()
    seasonal_naive_crps = seasonal_naive[f'eval_metrics/mean_weighted_sum_quantile_loss'].to_numpy()
    df = df_res
    df = df.sort_values(by="dataset")
    df['normalized MASE'] = np.zeros(len(df))
    df['normalized CRPS'] = np.zeros(len(df))
    df['freq'] = np.zeros(len(df))
    df['len'] = np.zeros(len(df))
    for ds in df['dataset']:
        idx = dataset.index(ds)
        _, f, l = ds.split('/')
        df.loc[df['dataset'] == ds, 'freq'] = f
        df.loc[df['dataset'] == ds, 'len'] = l
        df.loc[df['dataset'] == ds, 'normalized MASE'] = df.loc[df['dataset']
                                                                == ds, f'eval_metrics/MASE[0.5]'].values / seasonal_naive_mase[idx]
        df.loc[df['dataset'] == ds, 'normalized CRPS'] = df.loc[df['dataset'] == ds,
                                                                f'eval_metrics/mean_weighted_sum_quantile_loss'].values / seasonal_naive_crps[idx]

    df = df.sort_values(by=['dataset'])

    def geo_mean(iterable):
        a = np.array(iterable)
        return a.prod()**(1.0/len(a))

    mase = geo_mean(df['normalized MASE'].to_numpy())
    crps = geo_mean(df['normalized CRPS'].to_numpy())

    return mase, crps

Start the experiment

In [9]:
mase, crps = run_gift_eval(verbose=True, save=True)
print(
    f'Final GIFT-Eval Performance of {model_name}:\nMASE = {mase}, CRPS = {crps}')

Final GIFT-Eval Performance of Credence:
MASE = 0.6901174782158342, CRPS = 0.4728288546556098
