In [None]:
!pip install -r requirements.txt

In [None]:
import os
import shutil
from pathlib import Path

import numpy as np
import tensorflow as tf
import pandas as pd


import importlib
hf_datasets = importlib.import_module("datasets")

In [None]:
def export_hf_to_csv(
    hf_name,
    split,
    out_root,
):
    """
    Export HF dataset to per-subject CSV files
    compatible with glucofm_bench.py
    """
    ds = hf_datasets.load_dataset(
        "byluuu/gluco-tsfm-benchmark",
        split="train"
    )

    for row in ds:
        dataset = row["dataset"]
        subject_id = row["subject_id"]

        out_dir = os.path.join(out_root, split, dataset)
        os.makedirs(out_dir, exist_ok=True)

        csv_path = os.path.join(out_dir, f"{subject_id}.csv")

        df = pd.DataFrame({
            "timestamp": row["timestamp"],
            "BGvalue": row["BGvalue"],  # IMPORTANT: keep this name
        })

        df.to_csv(csv_path, index=False)

    # create "all" marker for training
    if split == "train":
        for dataset in set(ds["dataset"]):
            open(os.path.join(out_root, "train", dataset, "all"), "w").close()



def copy_hf_csvs_to_mixed(root_dir="hf_cache"):
    """
    Recursively find CSV files under:
      hf_cache/train/**/**/*.csv -> hf_cache/train/mixed/
      hf_cache/test/**/**/*.csv  -> hf_cache/test/mixed/

    - Does NOT modify originals
    - Skips any CSV already under a 'mixed' folder to avoid duplication
    - Handles name collisions by prefixing relative path (so files won't overwrite)
    """

    root = Path(root_dir)

    for split in ["train", "test"]:
        split_dir = root / split
        dst_dir = split_dir / "mixed"
        dst_dir.mkdir(parents=True, exist_ok=True)

        if not split_dir.exists():
            print(f"[{split}] Skip: {split_dir} not found")
            continue

        # Recursively find CSVs
        csv_paths = [p for p in split_dir.rglob("*.csv") if "mixed" not in p.parts]

        print(f"[{split}] Found {len(csv_paths)} CSV files under subfolders")

        copied = 0
        for src_path in csv_paths:
            # Build a collision-safe filename by encoding relative path
            rel = src_path.relative_to(split_dir)
            safe_name = "__".join(rel.parts)  # e.g., datasetA__subject1.csv
            dst_path = dst_dir / safe_name

            shutil.copy2(src_path, dst_path)
            copied += 1

        print(f"[{split}] Copied {copied} files to: {dst_dir}\n")


In [None]:
export_hf_to_csv(
hf_name="byluuu/gluco-tsfm-benchmark",
split="train",
out_root="./hf_cache",
)

export_hf_to_csv(
    hf_name="byluuu/gluco-tsfm-benchmark",
    split="test",
    out_root="./hf_cache",
)
copy_hf_csvs_to_mixed(root_dir="hf_cache")

In [None]:
!python run_main.py \
  --task_name long_term_forecast \
  --is_training 1 \
  --model TimeLLM \
  --model_id Glucose_train_main \
  --model_comment "GlucoseTrain" \
  --llm_model GPT2 \
  --llm_layers 4 \
  --llm_dim 768 \
  --data Glucose \
  --root_path /content/drive/Shareddrives/Baiying/preprocessed_dataset/training_dataset/mixed \
  --features S \
  --target glucose \
  --freq 5min \
  --seq_len 144 \
  --label_len 72 \
  --pred_len 18 \
  --enc_in 1 \
  --dec_in 1 \
  --c_out 1 \
  --batch_size 16 \
  --train_epochs 40 \
  --learning_rate 5e-4 \
  --num_workers 2 \
  --stride 12 \
  --max_windows_per_epoch 30000 \
  --des GlucoseTrain


In [None]:
!python run_main.py \
  --task_name long_term_forecast \
  --is_training 0 \
  --model TimeLLM \
  --model_id Glucose_train_main \
  --model_comment "GlucoseTrain" \
  --llm_model GPT2 \
  --llm_layers 4 \
  --llm_dim 768 \
  --data Glucose \
  --test_root_path /content/drive/Shareddrives/Baiying/preprocessed_dataset/test_dataset/mixed \
  --features S \
  --target glucose \
  --freq 5min \
  --seq_len 144 \
  --label_len 72 \
  --pred_len 18 \
  --enc_in 1 \
  --dec_in 1 \
  --c_out 1 \
  --batch_size 16 \
  --num_workers 2 \
  --stride 3 \
  --des GlucoseTrain


In [None]:
!python run_main.py \
  --task_name long_term_forecast \
  --is_training 0 \
  --model TimeLLM \
  --model_id Glucose_train_main \
  --model_comment "GlucoseTrain_7to1" \
  --llm_model GPT2 \
  --llm_layers 4 \
  --llm_dim 768 \
  --data Glucose \
  --test_root_path /content/drive/Shareddrives/Baiying/preprocessed_dataset/test_dataset/controlled_mixed \
  --features S \
  --target glucose \
  --freq 5min \
  --seq_len 144 \
  --label_len 72 \
  --pred_len 18 \
  --enc_in 1 \
  --dec_in 1 \
  --c_out 1 \
  --batch_size 16 \
  --num_workers 2 \
  --stride 3 \
  --des GlucoseTrain
