In [1]:
# ========= 0) Imports & 全局配置（沿用你现有的） =========
from __future__ import annotations
import time, numpy as np, polars as pl, pandas as pd, torch, lightning as L, glob, random, warnings
from pathlib import Path
from typing import List, Tuple, Dict, Any

from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
from torch.utils.data import IterableDataset

from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import NaNLabelEncoder
from pytorch_forecasting.metrics import MAE, RMSE
from sklearn.preprocessing import FunctionTransformer

from pipeline.io import cfg, P, fs, storage_options, ensure_dir_local

def _now(): return time.strftime("%Y-%m-%d %H:%M:%S")
print(f"[{_now()}] imports ok")

# ---- 关键配置（按需改）----
target_col = cfg["target"]
g_sym, g_date, g_time = cfg["keys"]
weight_col = cfg["weight"]

time_features = ["time_pos", "time_sin", "time_cos", "time_bucket"]
base_features   = ["feature_36", "feature_06"]
resp_his_feats  = ["responder_5_prevday_std", "responder_3_prevday_std", "responder_4_prev_tail_d1"]
feat_his_feats  = ["feature_08__ewm5", "feature_53__rstd3"]
feature_cols = list(dict.fromkeys(base_features + resp_his_feats + feat_his_feats))

need_cols = list(dict.fromkeys([g_sym, g_date, g_time, weight_col, target_col] + time_features + feature_cols))

# CV & 训练
N_SPLITS   = 5
GAP_DAYS   = 7
TRAIN_TO_VAL = 4
ENC_LEN    = 36
PRED_LEN   = 1
BATCH_SIZE = 1024
LR = float(cfg.get("tft", {}).get("lr", 1e-3))
HIDDEN     = int(cfg.get("tft", {}).get("hidden_size", 128))
HEADS      = int(cfg.get("tft", {}).get("heads", 4))
DROPOUT    = float(cfg.get("tft", {}).get("dropout", 0.2))
VAL_EVERY_STEPS = 500
SEED = int(cfg.get("seed", 42))

panel_dir = P("az", cfg["paths"].get("panel_shards", "panel_shards"))
glob_pat  = f"{panel_dir}/*.parquet"
if not fs.glob(glob_pat.replace("az://", "")):
    raise FileNotFoundError(f"No parquet shards under: {glob_pat}")

lf_raw = pl.scan_parquet(glob_pat, storage_options=storage_options)

# ---- 全局 time_idx 网格（仅一次）----
grid_path = P("local", "tft/panel/grid_timeidx.parquet")
if not Path(grid_path).exists():
    lf_grid = (
        lf_raw.select([g_date, g_time]).unique()
              .sort([g_date, g_time])
              .with_row_index("time_idx")
              .with_columns(pl.col("time_idx").cast(pl.Int64))
    )
    ensure_dir_local(Path(grid_path).parent.as_posix())
    lf_grid.collect(streaming=True).write_parquet(grid_path, compression="zstd")
grid_lazy = pl.scan_parquet(grid_path)

# ---- 预处理基础 LazyFrame（尚未标准化）----
lf0 = (
    lf_raw.join(grid_lazy, on=[g_date, g_time], how="left")
          .select(need_cols + ["time_idx"])
          .sort([g_date, g_time, g_sym])
)
print(f"[{_now()}] lazyframe ready")

# ---- 全量天列表 ----
all_days = (
    lf0.select(pl.col(g_date)).unique().sort(by=g_date)
       .collect(streaming=True).get_column(g_date).to_numpy()
)

def make_sliding_cv_by_days(all_days: np.ndarray, *, n_splits: int, gap_days: int, train_to_val: int):
    all_days = np.asarray(all_days).ravel()
    K, R, G = n_splits, train_to_val, gap_days
    usable = len(all_days) - G
    if usable <= 0 or K <= 0 or R <= 0: return []
    V_base, rem = divmod(usable, R + K)
    if V_base <= 0: return []
    T = R * V_base
    v_lens = [V_base + 1 if i < rem else V_base for i in range(K)]
    folds, v_lo = [], T + G
    for V_i in v_lens:
        v_hi, tr_hi, tr_lo = v_lo + V_i, v_lo - G, v_lo - G - T
        if tr_lo < 0 or v_hi > len(all_days): break
        folds.append((all_days[tr_lo:tr_hi], all_days[v_lo:v_hi]))
        v_lo = v_hi
    return folds

folds_by_day = make_sliding_cv_by_days(all_days, n_splits=N_SPLITS, gap_days=GAP_DAYS, train_to_val=TRAIN_TO_VAL)
assert len(folds_by_day) > 0, "no CV folds constructed"
print(f"[{_now()}] built {len(folds_by_day)} folds")

# ========= 1) 按“折训练期上界”进行标准化并落盘（fold-aware） =========
def clean_and_standardize_for_fold(lf_base: pl.LazyFrame,
                                   train_days: np.ndarray,
                                   *,
                                   feature_cols: List[str],
                                   out_dir: str,
                                   g_sym: str, g_date: str,
                                   eps: float = 1e-6) -> Tuple[List[str], List[str]]:
    """
    以本折训练期的最大日期为 stats_hi，按“组内优先、全局回退”的 z-score 标准化；
    输出到 out_dir/fold=K/{g_date}=YYYYMMDD/*.parquet
    返回：z_cols, namark_cols
    """
    stats_hi = int(train_days[-1])
    print(f"[{_now()}] fold stats_hi={stats_hi}")

    # 连续特征：inf->null / 缺失标记 / 组内ffill / 0兜底
    inf2null_exprs = [pl.when(pl.col(c).is_infinite()).then(None).otherwise(pl.col(c)).alias(c)
                      for c in feature_cols]
    flags_exprs    = [pl.col(c).is_null().cast(pl.Int8).alias(f"{c}__isna")
                      for c in feature_cols]
    fill_exprs     = [pl.col(c).forward_fill().over(g_sym).fill_null(0.0).alias(c)
                      for c in feature_cols]

    lf_clean = (
        lf_base.with_columns(inf2null_exprs)
               .with_columns(flags_exprs)
               .with_columns(fill_exprs)
    )

    # 训练期统计
    lf_stats_sym = (
        lf_clean.filter(pl.col(g_date) <= stats_hi)
                .group_by(g_sym)
                .agg([pl.col(c).mean().alias(f"mu_{c}") for c in feature_cols] +
                     [pl.col(c).std().alias(f"std_{c}") for c in feature_cols])
    )
    lf_stats_glb = (
        lf_clean.filter(pl.col(g_date) <= stats_hi)
                .select([pl.col(c).mean().alias(f"mu_{c}_glb") for c in feature_cols] +
                        [pl.col(c).std().alias(f"std_{c}_glb") for c in feature_cols])
    )

    # 合并并做 z-score（组内优先，全局回退；std=0 回退全局）
    lf_z = lf_clean.join(lf_stats_glb, how="cross").join(lf_stats_sym, on=g_sym, how="left")

    z_cols = []
    for c in feature_cols:
        mu_c_sym, std_c_sym = f"mu_{c}", f"std_{c}"
        mu_c_glb, std_c_glb = f"mu_{c}_glb", f"std_{c}_glb"
        c_z = f"{c}_z"
        lf_z = lf_z.with_columns(
            pl.when(pl.col(mu_c_sym).is_null()).then(pl.col(mu_c_glb)).otherwise(pl.col(mu_c_sym)).alias(f"{c}_mu_use"),
            pl.when(pl.col(std_c_sym).is_null() | (pl.col(std_c_sym) == 0)).then(pl.col(std_c_glb)).otherwise(pl.col(std_c_sym)).alias(f"{c}_std_use"),
        ).with_columns(
            ((pl.col(c) - pl.col(f"{c}_mu_use")) / (pl.col(f"{c}_std_use") + eps)).alias(c_z)
        ).drop([mu_c_glb, std_c_glb, mu_c_sym, std_c_sym, f"{c}_mu_use", f"{c}_std_use"])
        z_cols.append(c_z)

    namark_cols = [f"{c}__isna" for c in feature_cols]
    out_cols = [g_sym, g_date, g_time, "time_idx", weight_col, target_col] + time_features + z_cols + namark_cols
    lf_out = lf_z.select(out_cols).sort([g_date, g_time, g_sym])

    # 按天分区写出到 fold 子目录
    out_root = Path(out_dir)
    ensure_dir_local(out_root.as_posix())

    # 分块写
    all_fold_days = list(map(int, lf_out.select(g_date).unique().collect(streaming=True)[g_date].to_list()))
    CHUNK_DAYS_LOCAL = 20
    day_chunks = [all_fold_days[i:i+CHUNK_DAYS_LOCAL] for i in range(0, len(all_fold_days), CHUNK_DAYS_LOCAL)]

    for ci, chunk in enumerate(day_chunks, 1):
        df_chunk = (
            lf_out.filter(pl.col(g_date).is_in(chunk))
                  .collect(streaming=True)
        )
        try:
            df_chunk.write_parquet(
                out_root.as_posix(),
                compression="zstd",
                partition_by=[g_date],
            )
        except TypeError:
            # Fallback：与读取保持一致的目录层级
            for d in chunk:
                day_dir = out_root / f"{g_date}={d}"
                day_dir.mkdir(parents=True, exist_ok=True)
                df_chunk.filter(pl.col(g_date) == d)\
                        .write_parquet((day_dir / "part-000.parquet").as_posix(), compression="zstd")
        print(f"[{_now()}] fold-chunk {ci}/{len(day_chunks)} -> days {chunk[0]}..{chunk[-1]} written")

    return z_cols, namark_cols


  from tqdm.autonotebook import tqdm


[2025-09-26 11:08:00] imports ok
[2025-09-26 11:08:00] lazyframe ready
[2025-09-26 11:08:00] built 5 folds


In [2]:
def build_template_and_val(clean_dir_fold: str,
                           train_days: np.ndarray,
                           val_days: np.ndarray,
                           *,
                           g_sym: str, g_date: str) -> Tuple[TimeSeriesDataSet, torch.utils.data.DataLoader]:
    """
    用多天训练片段构建 template，生成验证 DataLoader
    """
    # 选择若干训练日用于 template（覆盖更多类别）
    k = min(5, len(train_days))
    tmpl_days = [int(d) for d in train_days[:k]]

    tmpl_paths = []
    for d in tmpl_days:
        tmpl_paths.extend(glob.glob(f"{clean_dir_fold}/{g_date}={d}/*.parquet"))
    assert len(tmpl_paths) > 0, f"no template files under {clean_dir_fold}"

    pdf_tmpl = pl.scan_parquet(tmpl_paths).collect(streaming=True).to_pandas()
    pdf_tmpl[g_sym] = pdf_tmpl[g_sym].astype("str").astype("category")

    unknown_reals = time_features + z_cols_global + namark_cols_global  # 将在主流程里覆盖
    identity_scalers = {c: FunctionTransformer(validate=False) for c in unknown_reals}

    template = TimeSeriesDataSet(
        pdf_tmpl.sort_values([g_sym, "time_idx"]),
        time_idx="time_idx",
        target=target_col,
        group_ids=[g_sym],
        weight=weight_col,
        max_encoder_length=ENC_LEN,
        max_prediction_length=PRED_LEN,
        static_categoricals=[g_sym],
        time_varying_unknown_reals=unknown_reals,
        target_normalizer=None,   # 按需改成 TorchNormalizer()，若目标量纲很大
        categorical_encoders={g_sym: NaNLabelEncoder(add_nan=True)},
        add_relative_time_idx=True, add_target_scales=True, add_encoder_length=True,
        allow_missing_timesteps=True,
        scalers=identity_scalers,
    )

    # 验证集
    val_paths = []
    for d in val_days:
        val_paths.extend(glob.glob(f"{clean_dir_fold}/{g_date}={int(d)}/*.parquet"))
    assert len(val_paths) > 0, f"no validation files under {clean_dir_fold}"

    pdf_val = pl.scan_parquet(val_paths).collect(streaming=True).to_pandas()
    pdf_val[g_sym] = pdf_val[g_sym].astype("str").astype("category")

    # 对齐类别以避免 unseen
    pdf_val[g_sym] = pdf_val[g_sym].astype(
        pd.CategoricalDtype(categories=list(pdf_tmpl[g_sym].cat.categories))
    )

    validation = TimeSeriesDataSet.from_dataset(
        template, data=pdf_val.sort_values([g_sym, "time_idx"]), stop_randomization=True
    )
    val_loader = validation.to_dataloader(
        train=False, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True, persistent_workers=True
    )
    return template, val_loader


class ShardedBatchStream(IterableDataset):
    def __init__(self, template_tsd, shard_days, clean_dir: str, g_sym: str,
                 batch_size: int = 1024, num_workers: int = 4, shuffle_within_shard: bool = True,
                 buffer_batches: int = 16, seed: int = 42):
        super().__init__()
        self.template = template_tsd
        self.days = list(map(int, shard_days))
        self.clean_dir = clean_dir
        self.g_sym = g_sym
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.shuffle_within_shard = shuffle_within_shard
        self.buffer_batches = buffer_batches
        self.seed = seed

    def __iter__(self):
        rng = random.Random(self.seed)
        days = self.days[:]
        rng.shuffle(days)

        from collections import deque
        buf = deque()

        for d in days:
            paths = glob.glob(f"{self.clean_dir}/{g_date}={d}/*.parquet")
            if not paths:
                continue
            pdf = pl.scan_parquet(paths).collect(streaming=True).to_pandas()
            if pdf.empty:
                continue
            pdf[self.g_sym] = pdf[self.g_sym].astype("str").astype("category")

            tsds = TimeSeriesDataSet.from_dataset(
                self.template,
                data=pdf.sort_values([self.g_sym, "time_idx"]),
                stop_randomization=True,
            )
            dl = tsds.to_dataloader(
                train=True,
                batch_size=self.batch_size,
                num_workers=self.num_workers,
                shuffle=self.shuffle_within_shard,
                pin_memory=True,
                persistent_workers=self.num_workers > 0,
            )

            for batch in dl:
                if self.buffer_batches > 0:
                    buf.append(batch)
                    if len(buf) >= self.buffer_batches:
                        k = rng.randrange(len(buf))
                        if k:
                            buf.rotate(-k)
                        yield buf.popleft()
                else:
                    yield batch

        while buf:
            yield buf.popleft()


In [3]:
def train_one_fold(fold_idx: int,
                   train_days: np.ndarray,
                   val_days: np.ndarray,
                   *,
                   lf_base: pl.LazyFrame,
                   feature_cols: List[str]) -> Dict[str, Any]:
    """
    清洗&标准化本折 -> 构建 template/val -> 训练 -> 返回指标与路径
    """
    print(f"\n========== FOLD {fold_idx} ==========")
    L.seed_everything(SEED, workers=True)

    # 1) 本折清洗输出目录
    clean_dir_fold = P("local", f"tft/clean_by_day/fold={fold_idx}")
    ensure_dir_local(clean_dir_fold)

    # 2) 以本折训练上界做标准化并落盘
    global z_cols_global, namark_cols_global
    z_cols_global, namark_cols_global = clean_and_standardize_for_fold(
        lf_base, np.sort(train_days), feature_cols=feature_cols,
        out_dir=clean_dir_fold, g_sym=g_sym, g_date=g_date
    )

    # 3) 构建 template & val loader
    template, val_loader = build_template_and_val(
        clean_dir_fold=clean_dir_fold,
        train_days=np.sort(train_days), val_days=val_days,
        g_sym=g_sym, g_date=g_date
    )

    # 4) 训练流数据集
    train_stream = ShardedBatchStream(
        template_tsd=template,
        shard_days=np.sort(train_days),
        clean_dir=clean_dir_fold,
        g_sym=g_sym,
        batch_size=BATCH_SIZE,
        num_workers=4,
        shuffle_within_shard=True,
        buffer_batches=16,
        seed=SEED,
    )

    # 5) 模型与训练器
    ckpt_dir = P("local", f"tft/ckpts/fold={fold_idx}")
    callbacks = [
        EarlyStopping(monitor="val_loss", mode="min", patience=10, min_delta=1e-4),
        ModelCheckpoint(
            dirpath=ckpt_dir,
            filename="tft-{epoch:02d}-{val_loss:.4f}",
            monitor="val_loss", mode="min", save_top_k=1
        ),
        LearningRateMonitor(logging_interval="step"),
    ]
    logger = TensorBoardLogger(save_dir=P("local","tft/tblogs"), name=f"fold{fold_idx}")

    tft = TemporalFusionTransformer.from_dataset(
        template,
        loss=RMSE(),                              # 训练目标
        logging_metrics=[MAE(), RMSE()],          # 记录更直观的指标
        learning_rate=LR,
        hidden_size=HIDDEN,
        attention_head_size=HEADS,
        dropout=DROPOUT,
        reduce_on_plateau_patience=4,
    )

    trainer = L.Trainer(
        accelerator="gpu", devices=1, precision=32,
        max_epochs=1,                             # 你可改成更多 epoch
        val_check_interval=VAL_EVERY_STEPS,
        num_sanity_val_steps=0,
        gradient_clip_val=0.5,
        log_every_n_steps=50,
        callbacks=callbacks,
        logger=logger,
        default_root_dir=ckpt_dir,
    )

    trainer.fit(tft, train_dataloaders=train_stream, val_dataloaders=val_loader)

    # 6) 记录本折最佳指标
    best_ckpt = callbacks[1].best_model_path
    best_val = callbacks[1].best_model_score.item() if callbacks[1].best_model_score is not None else float("nan")
    print(f"[{_now()}] FOLD {fold_idx} best val_loss={best_val:.6f} @ {best_ckpt}")
    return {"fold": fold_idx, "best_val_loss": best_val, "ckpt": best_ckpt, "clean_dir": clean_dir_fold}


In [5]:
results = []
for k, (train_days, val_days) in enumerate(folds_by_day, start=1):
    res = train_one_fold(k, train_days, val_days, lf_base=lf0, feature_cols=feature_cols)
    results.append(res)

# 汇总打印
print("\n===== CV Summary =====")
vals = [r["best_val_loss"] for r in results if np.isfinite(r["best_val_loss"])]
if len(vals) > 0:
    print(f"Folds: {len(vals)}  mean(val_loss)={np.mean(vals):.6f}  std={np.std(vals):.6f}")
for r in results:
    print(f"fold={r['fold']}  val_loss={r['best_val_loss']:.6f}  ckpt={r['ckpt']}")


Seed set to 42



[2025-09-26 11:12:23] fold stats_hi=1640
[2025-09-26 11:12:30] fold-chunk 1/5 -> days 1605..1624 written
[2025-09-26 11:12:34] fold-chunk 2/5 -> days 1625..1644 written
[2025-09-26 11:12:38] fold-chunk 3/5 -> days 1645..1664 written
[2025-09-26 11:12:42] fold-chunk 4/5 -> days 1665..1684 written
[2025-09-26 11:12:45] fold-chunk 5/5 -> days 1685..1698 written


/home/admin_ml/Jackson/projects/js/JS/.venv/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 520    | train
3  | prescalers                         | ModuleDict                      | 352    | tra

Epoch 0: |          | 1228/? [21:17<00:00,  0.96it/s, v_num=0, train_loss_step=1.460, val_loss=0.899, train_loss_epoch=1.270]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: |          | 1228/? [21:18<00:00,  0.96it/s, v_num=0, train_loss_step=1.460, val_loss=0.899, train_loss_epoch=1.270]


Seed set to 42


[2025-09-26 11:34:09] FOLD 1 best val_loss=0.898728 @ /mnt/data/js/exp/v1/tft/ckpts/fold=1/tft-epoch=00-val_loss=0.8987.ckpt

[2025-09-26 11:34:09] fold stats_hi=1650
[2025-09-26 11:34:16] fold-chunk 1/5 -> days 1605..1624 written
[2025-09-26 11:34:20] fold-chunk 2/5 -> days 1625..1644 written
[2025-09-26 11:34:24] fold-chunk 3/5 -> days 1645..1664 written
[2025-09-26 11:34:27] fold-chunk 4/5 -> days 1665..1684 written
[2025-09-26 11:34:32] fold-chunk 5/5 -> days 1685..1698 written


/home/admin_ml/Jackson/projects/js/JS/.venv/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 520    | train
3  | prescalers                         | ModuleDict                      | 352    | tra

Epoch 0: |          | 1229/? [21:30<00:00,  0.95it/s, v_num=0, train_loss_step=1.360, val_loss=1.130, train_loss_epoch=1.130]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: |          | 1229/? [21:30<00:00,  0.95it/s, v_num=0, train_loss_step=1.360, val_loss=1.130, train_loss_epoch=1.130]


Seed set to 42


[2025-09-26 11:56:07] FOLD 2 best val_loss=1.126468 @ /mnt/data/js/exp/v1/tft/ckpts/fold=2/tft-epoch=00-val_loss=1.1265.ckpt

[2025-09-26 11:56:07] fold stats_hi=1660
[2025-09-26 11:56:15] fold-chunk 1/5 -> days 1605..1624 written
[2025-09-26 11:56:18] fold-chunk 2/5 -> days 1625..1644 written
[2025-09-26 11:56:23] fold-chunk 3/5 -> days 1645..1664 written
[2025-09-26 11:56:27] fold-chunk 4/5 -> days 1665..1684 written
[2025-09-26 11:56:30] fold-chunk 5/5 -> days 1685..1698 written


/home/admin_ml/Jackson/projects/js/JS/.venv/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 520    | train
3  | prescalers                         | ModuleDict                      | 352    | tra

Epoch 0: |          | 1236/? [21:39<00:00,  0.95it/s, v_num=0, train_loss_step=1.120, val_loss=1.020, train_loss_epoch=1.050]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: |          | 1236/? [21:40<00:00,  0.95it/s, v_num=0, train_loss_step=1.120, val_loss=1.020, train_loss_epoch=1.050]


Seed set to 42


[2025-09-26 12:18:18] FOLD 3 best val_loss=1.016473 @ /mnt/data/js/exp/v1/tft/ckpts/fold=3/tft-epoch=00-val_loss=1.0165.ckpt

[2025-09-26 12:18:18] fold stats_hi=1670
[2025-09-26 12:18:25] fold-chunk 1/5 -> days 1605..1624 written
[2025-09-26 12:18:29] fold-chunk 2/5 -> days 1625..1644 written
[2025-09-26 12:18:33] fold-chunk 3/5 -> days 1645..1664 written
[2025-09-26 12:18:37] fold-chunk 4/5 -> days 1665..1684 written
[2025-09-26 12:18:41] fold-chunk 5/5 -> days 1685..1698 written


/home/admin_ml/Jackson/projects/js/JS/.venv/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 520    | train
3  | prescalers                         | ModuleDict                      | 352    | tra

Epoch 0: |          | 1231/? [21:42<00:00,  0.95it/s, v_num=0, train_loss_step=1.010, val_loss=0.959, train_loss_epoch=1.030]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: |          | 1231/? [21:43<00:00,  0.94it/s, v_num=0, train_loss_step=1.010, val_loss=0.959, train_loss_epoch=1.030]


Seed set to 42


[2025-09-26 12:40:32] FOLD 4 best val_loss=0.959434 @ /mnt/data/js/exp/v1/tft/ckpts/fold=4/tft-epoch=00-val_loss=0.9594.ckpt

[2025-09-26 12:40:32] fold stats_hi=1680
[2025-09-26 12:40:39] fold-chunk 1/5 -> days 1605..1624 written
[2025-09-26 12:40:43] fold-chunk 2/5 -> days 1625..1644 written
[2025-09-26 12:40:47] fold-chunk 3/5 -> days 1645..1664 written
[2025-09-26 12:40:51] fold-chunk 4/5 -> days 1665..1684 written
[2025-09-26 12:40:55] fold-chunk 5/5 -> days 1685..1698 written


/home/admin_ml/Jackson/projects/js/JS/.venv/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 520    | train
3  | prescalers                         | ModuleDict                      | 352    | tra

Epoch 0: |          | 1245/? [21:57<00:00,  0.95it/s, v_num=0, train_loss_step=1.180, val_loss=0.966, train_loss_epoch=1.000]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: |          | 1245/? [21:58<00:00,  0.94it/s, v_num=0, train_loss_step=1.180, val_loss=0.966, train_loss_epoch=1.000]
[2025-09-26 13:02:59] FOLD 5 best val_loss=0.966133 @ /mnt/data/js/exp/v1/tft/ckpts/fold=5/tft-epoch=00-val_loss=0.9661.ckpt

===== CV Summary =====
Folds: 5  mean(val_loss)=0.993447  std=0.076288
fold=1  val_loss=0.898728  ckpt=/mnt/data/js/exp/v1/tft/ckpts/fold=1/tft-epoch=00-val_loss=0.8987.ckpt
fold=2  val_loss=1.126468  ckpt=/mnt/data/js/exp/v1/tft/ckpts/fold=2/tft-epoch=00-val_loss=1.1265.ckpt
fold=3  val_loss=1.016473  ckpt=/mnt/data/js/exp/v1/tft/ckpts/fold=3/tft-epoch=00-val_loss=1.0165.ckpt
fold=4  val_loss=0.959434  ckpt=/mnt/data/js/exp/v1/tft/ckpts/fold=4/tft-epoch=00-val_loss=0.9594.ckpt
fold=5  val_loss=0.966133  ckpt=/mnt/data/js/exp/v1/tft/ckpts/fold=5/tft-epoch=00-val_loss=0.9661.ckpt
