In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os
shared_path = '/content/drive/Shareddrives/Baiying'
os.chdir(shared_path)
print("Working directory:", os.getcwd())

%cd chronos-forecasting

Mounted at /content/drive
Working directory: /content/drive/Shareddrives/Baiying
/content/drive/Shareddrives/Baiying/chronos-forecasting


In [2]:
!pip install 'chronos-forecasting[extras]>=2.2' 'matplotlib'

Collecting chronos-forecasting>=2.2 (from chronos-forecasting[extras]>=2.2)
  Downloading chronos_forecasting-2.2.2-py3-none-any.whl.metadata (23 kB)
Collecting boto3<2,>=1.10 (from chronos-forecasting[extras]>=2.2)
  Downloading boto3-1.42.34-py3-none-any.whl.metadata (6.8 kB)
Collecting fev>=0.6.1 (from chronos-forecasting[extras]>=2.2)
  Downloading fev-0.7.0-py3-none-any.whl.metadata (20 kB)
Collecting peft<0.18,>=0.13.0 (from chronos-forecasting[extras]>=2.2)
  Downloading peft-0.17.1-py3-none-any.whl.metadata (14 kB)
Collecting botocore<1.43.0,>=1.42.34 (from boto3<2,>=1.10->chronos-forecasting[extras]>=2.2)
  Downloading botocore-1.42.34-py3-none-any.whl.metadata (5.9 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3<2,>=1.10->chronos-forecasting[extras]>=2.2)
  Downloading jmespath-1.1.0-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.17.0,>=0.16.0 (from boto3<2,>=1.10->chronos-forecasting[extras]>=2.2)
  Downloading s3transfer-0.16.0-py3-none-any.whl.metadata (1.7 k

In [3]:
import os

# Use only 1 GPU if available
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from chronos import BaseChronosPipeline, Chronos2Pipeline

from pathlib import Path
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load the Chronos-2 pipeline
# GPU recommended for faster inference, but CPU is also supported using device_map="cpu"
pipeline: Chronos2Pipeline = BaseChronosPipeline.from_pretrained("amazon/chronos-2", device_map="cuda")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/478M [00:00<?, ?B/s]

In [4]:
def load_and_prepare_data(file_path):
    """Load and prepare the glucose monitoring data."""
    context_df = pd.read_csv(file_path)
    df = context_df.copy()
    df = df.rename(columns={'BGvalue': 'target'})
    df['item_id'] = 'patient_1'
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.sort_values('timestamp').reset_index(drop=True)
    df = df[['item_id', 'timestamp', 'target']]
    return df

def split_into_sequences(df, gap_threshold_hours=1):
    """Split data into continuous sequences based on time gaps."""
    df['time_diff'] = df['timestamp'].diff()
    gap_threshold = pd.Timedelta(hours=gap_threshold_hours)

    df['new_sequence'] = (df['time_diff'] > gap_threshold) | (df['time_diff'].isna())
    df['sequence_id'] = df['new_sequence'].cumsum()

    sequences = []
    for seq_id, group in df.groupby('sequence_id'):
        group = group.drop(columns=['time_diff', 'new_sequence', 'sequence_id']).reset_index(drop=True)
        sequences.append(group)

    return sequences

In [5]:
def sequence_to_fit_input(seq_df):
    return {
        "target": seq_df["target"].astype("float32").values,
        "past_covariates": {},
        "future_covariates": {},
    }

In [6]:
def generate_training_windows_from_sequence(
    ts,
    context_length,
    prediction_length,
    stride=10
):
    target = ts["target"]
    windows = []

    max_start = len(target) - context_length - prediction_length
    if max_start < 0:
        return windows

    for i in range(0, max_start + 1, stride):
        windows.append({
            "start": ts["start"] + pd.Timedelta(minutes=5 * i),
            "target": target[i : i + context_length + prediction_length],
            "item_id": ts["item_id"]
        })

    return windows


In [7]:
from pathlib import Path

# def build_chronos_training_data(
#     data_dir,
#     context_length,
#     prediction_length,
#     stride=10,
#     gap_threshold_hours=1
# ):
#     training_windows = []

#     csv_files = sorted(Path(data_dir).glob("*.csv"))

#     for csv_path in csv_files:
#         df = load_and_prepare_data(csv_path)
#         sequences = split_into_sequences(df, gap_threshold_hours)

#         for seq_df in sequences:
#             ts = sequence_to_chronos_ts(seq_df)

#             windows = generate_training_windows_from_sequence(
#                 ts,
#                 context_length,
#                 prediction_length,
#                 stride
#             )
#             training_windows.extend(windows)

#     return training_windows

def build_fit_inputs_from_folder(
    folder,
    gap_threshold_hours=1,
    min_sequence_length=200,   # filter short segments
):
    folder = Path(folder)
    inputs = []

    for csv_path in sorted(folder.glob("*.csv")):
        df = load_and_prepare_data(str(csv_path))
        sequences = split_into_sequences(df, gap_threshold_hours=gap_threshold_hours)

        for seq_df in sequences:
            if len(seq_df) < min_sequence_length:
                continue
            inputs.append(sequence_to_fit_input(seq_df))

    return inputs


In [None]:
PREDICTION_LENGTH = 18   # 90 min

train_inputs = build_fit_inputs_from_folder(
    folder="/content/drive/Shareddrives/Baiying/preprocessed_dataset/training_dataset/mixed",
    gap_threshold_hours=1,
    min_sequence_length=200
)

print(f"Total training sequences: {len(train_inputs)}")


Total training sequences: 943


In [None]:
lora_finetuned_pipeline = pipeline.fit(
    inputs=train_inputs,          # all sequences
    prediction_length=18,
    finetune_mode="lora",         # full fine-tuning
    num_steps=800,
    batch_size=32,
    learning_rate=1e-5,
    min_past=144,                 # enforce ≥12h context
)

lora_finetuned_pipeline.save_pretrained("./chronos2_glucose_lora_more_steps_few_shot")

  return torch._C._get_cublas_allow_tf32()
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
100,3.2656
200,3.3633
300,3.3025
400,3.286
500,3.2718
600,3.2769
700,3.2336
800,3.266


In [8]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import mean_squared_error, mean_absolute_error


def evaluate_test_folder(
    test_folder,
    pipeline,
    context_lengths,
    prediction_length=18,
    gap_threshold_hours=1,
    step_size=1,
    log_every=100,        # <<< log frequency
):
    """
    Evaluate a fine-tuned Chronos pipeline on a test folder with progress logs.
    """

    horizon_steps = {
        "15min": 3,
        "30min": 6,
        "60min": 12,
        "90min": 18,
    }

    records = []
    test_folder = Path(test_folder)
    csv_files = sorted(test_folder.glob("*.csv"))

    print(f"Found {len(csv_files)} test participants\n")

    for context_length in context_lengths:
        print("=" * 70)
        print(f"Context length = {context_length} "
              f"({context_length * 5 / 60:.1f} hours)")
        print("=" * 70)

        for p_idx, csv_path in enumerate(csv_files, 1):
            patient_id = csv_path.stem
            print(f"[Patient {p_idx}/{len(csv_files)}] {patient_id}")

            df = load_and_prepare_data(csv_path)
            sequences = split_into_sequences(df, gap_threshold_hours)

            preds_all = []
            gts_all = []

            total_windows = 0
            processed_windows = 0

            # Count windows first (for accurate progress reporting)
            for seq_df in sequences:
                if len(seq_df) < context_length + prediction_length:
                    continue
                max_start = len(seq_df) - context_length - prediction_length
                total_windows += (max_start // step_size + 1)

            if total_windows == 0:
                print("  -> skipped (no valid windows)")
                continue

            for seq_df in sequences:
                if len(seq_df) < context_length + prediction_length:
                    continue

                max_start = len(seq_df) - context_length - prediction_length

                for start in range(0, max_start + 1, step_size):
                    context = seq_df.iloc[start : start + context_length]
                    future = seq_df.iloc[
                        start + context_length :
                        start + context_length + prediction_length
                    ]

                    try:
                        pred_df = pipeline.predict_df(
                            context,
                            prediction_length=prediction_length,
                            quantile_levels=[0.5],
                        )

                        pred = pred_df["predictions"].values
                        gt = future["target"].values

                        preds_all.append(pred)
                        gts_all.append(gt)

                    except Exception:
                        pass

                    processed_windows += 1

                    if processed_windows % log_every == 0:
                        print(
                            f"    Process {processed_windows}/{total_windows}"
                        )

            preds_all = np.array(preds_all)
            gts_all = np.array(gts_all)

            if len(preds_all) == 0:
                print("  -> no successful predictions")
                continue

            for name, h in horizon_steps.items():
                if h > prediction_length:
                    continue

                rmse = np.sqrt(
                    mean_squared_error(
                        gts_all[:, h - 1],
                        preds_all[:, h - 1]
                    )
                )
                mae = mean_absolute_error(
                    gts_all[:, h - 1],
                    preds_all[:, h - 1]
                )

                records.append({
                    "Patient": patient_id,
                    "Context_Length": context_length,
                    "Context_Hours": context_length * 5 / 60,
                    "Horizon": name,
                    "RMSE": rmse,
                    "MAE": mae,
                    "N_Windows": len(preds_all),
                })

            print(f"  -> completed ({len(preds_all)} windows)\n")

    detailed_df = pd.DataFrame(records)

    summary_df = (
        detailed_df
        .groupby(["Context_Length", "Horizon"])
        .agg(
            RMSE_Mean=("RMSE", "mean"),
            RMSE_Std=("RMSE", "std"),
            MAE_Mean=("MAE", "mean"),
            MAE_Std=("MAE", "std"),
            N_Patients=("Patient", "nunique"),
        )
        .reset_index()
    )

    return summary_df, detailed_df



In [None]:
context_lengths = [144]

summary_df, detailed_df = evaluate_test_folder(
    test_folder="/content/drive/Shareddrives/Baiying/preprocessed_dataset/test_dataset/mixed",
    pipeline=lora_finetuned_pipeline,
    context_lengths=context_lengths,
    prediction_length=18,
    step_size=1,
)

summary_df.to_csv("./chronos2_lora_test_summary_few_shot.csv", index=False)
detailed_df.to_csv("./chronos2_lora_test_detailed_few_shot.csv", index=False)

Found 549 test participants

Context length = 144 (12.0 hours)
[Patient 1/549] 001
    Process 100/122
  -> completed (122 windows)

[Patient 2/549] 002
  -> completed (51 windows)

[Patient 3/549] 003
  -> skipped (no valid windows)
[Patient 4/549] 004
  -> completed (33 windows)

[Patient 5/549] 005
  -> completed (21 windows)

[Patient 6/549] 006
  -> completed (95 windows)

[Patient 7/549] 007
  -> completed (37 windows)

[Patient 8/549] 008
  -> completed (68 windows)

[Patient 9/549] 009
  -> skipped (no valid windows)
[Patient 10/549] 1001
    Process 100/234
    Process 200/234
  -> completed (234 windows)

[Patient 11/549] 1002
    Process 100/1021
    Process 200/1021
    Process 300/1021
    Process 400/1021
    Process 500/1021
    Process 600/1021
    Process 700/1021
    Process 800/1021
    Process 900/1021
    Process 1000/1021
  -> completed (1021 windows)

[Patient 12/549] 1003
    Process 100/642
    Process 200/642
    Process 300/642
    Process 400/642
    Process

  df['timestamp'] = pd.to_datetime(df['timestamp'])


    Process 100/296
    Process 200/296
  -> completed (296 windows)

[Patient 235/549] HT_01
    Process 100/174
  -> completed (174 windows)

[Patient 236/549] HT_02
    Process 100/185
  -> completed (185 windows)

[Patient 237/549] HT_03
    Process 100/209
    Process 200/209
  -> completed (209 windows)

[Patient 238/549] HT_04
    Process 100/231
    Process 200/231
  -> completed (231 windows)

[Patient 239/549] HT_05
  -> completed (10 windows)

[Patient 240/549] HT_06
  -> completed (69 windows)

[Patient 241/549] HT_07
  -> completed (77 windows)

[Patient 242/549] HT_08
    Process 100/154
  -> completed (154 windows)

[Patient 243/549] HT_09
  -> skipped (no valid windows)
[Patient 244/549] HT_10
    Process 100/144
  -> completed (144 windows)

[Patient 245/549] HT_11
    Process 100/166
  -> completed (166 windows)

[Patient 246/549] HUPA0001P
    Process 100/659
    Process 200/659
    Process 300/659
    Process 400/659
    Process 500/659
    Process 600/659
  -> comp

In [9]:
import torch

lora_finetuned_pipeline = Chronos2Pipeline.from_pretrained(
    "./chronos2_glucose_lora_more_steps_few_shot",   # your saved model
    device_map="auto",
    dtype=torch.bfloat16
)

In [11]:
context_lengths = [144]

summary_df, detailed_df = evaluate_test_folder(
    test_folder="/content/drive/Shareddrives/Baiying/preprocessed_dataset/test_dataset/controlled_datasets/OhioT1DM",
    pipeline=lora_finetuned_pipeline,
    context_lengths=context_lengths,
    prediction_length=18,
    step_size=1,
)

summary_df.to_csv("./chronos2_lora_test_summary_Ohio_few_shot.csv", index=False)
detailed_df.to_csv("./chronos2_lora_test_detailed_Ohio_few_shot.csv", index=False)

Found 12 test participants

Context length = 144 (12.0 hours)
[Patient 1/12] 540
    Process 100/2440
    Process 200/2440
    Process 300/2440
    Process 400/2440
    Process 500/2440
    Process 600/2440
    Process 700/2440
    Process 800/2440
    Process 900/2440
    Process 1000/2440
    Process 1100/2440
    Process 1200/2440
    Process 1300/2440
    Process 1400/2440
    Process 1500/2440
    Process 1600/2440
    Process 1700/2440
    Process 1800/2440
    Process 1900/2440
    Process 2000/2440
    Process 2100/2440
    Process 2200/2440
    Process 2300/2440
    Process 2400/2440
  -> completed (2297 windows)

[Patient 2/12] 544
    Process 100/2115
    Process 200/2115
    Process 300/2115
    Process 400/2115
    Process 500/2115
    Process 600/2115
    Process 700/2115
    Process 800/2115
    Process 900/2115
    Process 1000/2115
    Process 1100/2115
    Process 1200/2115
    Process 1300/2115
    Process 1400/2115
    Process 1500/2115
    Process 1600/2115
    Pro

In [12]:
context_lengths = [144]

summary_df, detailed_df = evaluate_test_folder(
    test_folder="/content/drive/Shareddrives/Baiying/preprocessed_dataset/test_dataset/controlled_datasets/5_T1DEXI",
    pipeline=lora_finetuned_pipeline,
    context_lengths=context_lengths,
    prediction_length=18,
    step_size=1,
)

summary_df.to_csv("./chronos2_lora_test_summary_t1dexi_few_shot.csv", index=False)
detailed_df.to_csv("./chronos2_lora_test_detailed_t1dexi_few_shot.csv", index=False)

Found 502 test participants

Context length = 144 (12.0 hours)
[Patient 1/502] 1
    Process 100/1419
    Process 200/1419
    Process 300/1419
    Process 400/1419
    Process 500/1419
    Process 600/1419
    Process 700/1419
    Process 800/1419
    Process 900/1419
    Process 1000/1419
    Process 1100/1419
    Process 1200/1419
    Process 1300/1419
    Process 1400/1419
  -> completed (1419 windows)

[Patient 2/502] 1000
    Process 100/1262
    Process 200/1262
    Process 300/1262
    Process 400/1262
    Process 500/1262
    Process 600/1262
    Process 700/1262
    Process 800/1262
    Process 900/1262
    Process 1000/1262
    Process 1100/1262
    Process 1200/1262
  -> completed (1262 windows)

[Patient 3/502] 1004
    Process 100/1273
    Process 200/1273
    Process 300/1273
    Process 400/1273
    Process 500/1273
    Process 600/1273
    Process 700/1273
    Process 800/1273
    Process 900/1273
    Process 1000/1273
    Process 1100/1273
    Process 1200/1273
  -> c

  df['timestamp'] = pd.to_datetime(df['timestamp'])


    Process 100/1124
    Process 200/1124
    Process 300/1124
    Process 400/1124
    Process 500/1124
    Process 600/1124
    Process 700/1124
    Process 800/1124
    Process 900/1124
    Process 1000/1124
    Process 1100/1124
  -> completed (1124 windows)

[Patient 269/502] 255
    Process 100/1183
    Process 200/1183
    Process 300/1183
    Process 400/1183
    Process 500/1183
    Process 600/1183
    Process 700/1183
    Process 800/1183
    Process 900/1183
    Process 1000/1183
    Process 1100/1183
  -> completed (1183 windows)

[Patient 270/502] 256
    Process 100/1430
    Process 200/1430
    Process 300/1430
    Process 400/1430
    Process 500/1430
    Process 600/1430
    Process 700/1430
    Process 800/1430
    Process 900/1430
    Process 1000/1430
    Process 1100/1430
    Process 1200/1430
    Process 1300/1430
    Process 1400/1430
  -> completed (1430 windows)

[Patient 271/502] 261
    Process 100/1260
    Process 200/1260
    Process 300/1260
    Process 4

In [13]:
context_lengths = [144]

summary_df, detailed_df = evaluate_test_folder(
    test_folder="/content/drive/Shareddrives/Baiying/preprocessed_dataset/test_dataset/controlled_datasets/8_DiaTrend",
    pipeline=lora_finetuned_pipeline,
    context_lengths=context_lengths,
    prediction_length=18,
    step_size=3,
)

summary_df.to_csv("./chronos2_lora_test_summary_diatrend_few_shot.csv", index=False)
detailed_df.to_csv("./chronos2_lora_test_detailed_diatrend_few_shot.csv", index=False)

Found 54 test participants

Context length = 144 (12.0 hours)
[Patient 1/54] Subject1
    Process 100/2098
    Process 200/2098
    Process 300/2098
    Process 400/2098
    Process 500/2098
    Process 600/2098
    Process 700/2098
    Process 800/2098
    Process 900/2098
    Process 1000/2098
    Process 1100/2098
    Process 1200/2098
    Process 1300/2098
    Process 1400/2098
    Process 1500/2098
    Process 1600/2098
    Process 1700/2098
    Process 1800/2098
    Process 1900/2098
    Process 2000/2098
  -> completed (2074 windows)

[Patient 2/54] Subject10
    Process 100/19104
    Process 200/19104
    Process 300/19104
    Process 400/19104
    Process 500/19104
    Process 600/19104
    Process 700/19104
    Process 800/19104
    Process 900/19104
    Process 1000/19104
    Process 1100/19104
    Process 1200/19104
    Process 1300/19104
    Process 1400/19104
    Process 1500/19104
    Process 1600/19104
    Process 1700/19104
    Process 1800/19104
    Process 1900/19104