In [3]:
# =========================
# Step Counter for Baseline (epoch-based) training
# - Computes "original" total optimizer steps when epochs=100
# - For BOTH training modes:
#   (1) trial_only        : train samples = trials
#   (2) ours_window_proxy : train samples = windows (win_sec/stride_sec)
# - LOSO per activity, per fold
# =========================

import os, glob, random
import numpy as np
import pandas as pd

import torch
from torch.utils.data import Dataset, DataLoader


# ---------------------------------------------------------------------
# 1) Strict Seeding
# ---------------------------------------------------------------------
def set_strict_seed(seed: int):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


# ---------------------------------------------------------------------
# 2) Data Loading
# ---------------------------------------------------------------------
def load_mhealth_dataset(data_dir, target_activities_map, column_names):
    full_dataset = {}
    file_list = sorted(glob.glob(os.path.join(data_dir, "mHealth_subject*.log")))
    if not file_list:
        print(f"[Warning] No mHealth logs found in {data_dir}")
        return {}

    print(f"Loading {len(file_list)} subjects from {data_dir}...")
    for file_path in file_list:
        file_name = os.path.basename(file_path)
        subj_part = file_name.split('.')[0]
        try:
            subj_id_num = int(''.join(filter(str.isdigit, subj_part)))
            subj_key = f"subject{subj_id_num}"
        except:
            subj_key = subj_part

        try:
            df = pd.read_csv(file_path, sep="\t", header=None)
            df = df.iloc[:, :len(column_names)]
            df.columns = column_names

            subj_data = {}
            for label_code, activity_name in target_activities_map.items():
                activity_df = df[df['activity_id'] == label_code].copy()
                if not activity_df.empty:
                    subj_data[activity_name] = activity_df.drop(columns=['activity_id'])

            full_dataset[subj_key] = subj_data
        except Exception as e:
            print(f"Error loading {file_name}: {e}")
            pass

    return full_dataset


def prepare_trial_list(label_config, full_data, target_map, feature_map):
    trial_list = []
    for subj, act_id, gt_count in label_config:
        act_name = target_map.get(act_id)
        feats = feature_map.get(act_id)

        if subj in full_data and act_name in full_data[subj]:
            raw_df = full_data[subj][act_name][feats]
            raw_np = raw_df.values.astype(np.float32)

            mean = raw_np.mean(axis=0)
            std = raw_np.std(axis=0) + 1e-6
            norm_np = (raw_np - mean) / std

            trial_list.append({
                'data': norm_np,              # (T, C)
                'count': float(gt_count),
                'meta': f"{subj}_{act_name}"
            })
        else:
            print(f"[Skip] Missing data for {subj} - {act_name}")

    return trial_list


# ---------------------------------------------------------------------
# 2.5) Windowing
# ---------------------------------------------------------------------
def trial_list_to_windows(trial_list, fs, win_sec=8.0, stride_sec=4.0, drop_last=True):
    win_len = int(round(win_sec * fs))
    stride = int(round(stride_sec * fs))
    assert win_len > 0 and stride > 0

    windows = []
    for item in trial_list:
        x = item["data"]  # (T,C)
        T = x.shape[0]
        total_count = float(item["count"])
        meta = item["meta"]

        total_dur = max(T / float(fs), 1e-6)
        rate_trial = total_count / total_dur  # reps/s

        if T < win_len:
            win_dur = T / float(fs)
            windows.append({
                "data": x,
                "count": rate_trial * win_dur,
                "meta": f"{meta}__win[0:{T}]",
            })
            continue

        last_start = T - win_len
        starts = list(range(0, last_start + 1, stride))

        for st in starts:
            ed = st + win_len
            win_dur = win_len / float(fs)
            windows.append({
                "data": x[st:ed],
                "count": rate_trial * win_dur,
                "meta": f"{meta}__win[{st}:{ed}]",
            })

        if not drop_last:
            last_st = starts[-1] + stride
            if last_st < T:
                ed = T
                win_dur = (ed - last_st) / float(fs)
                windows.append({
                    "data": x[last_st:ed],
                    "count": rate_trial * win_dur,
                    "meta": f"{meta}__win[{last_st}:{ed}]",
                })

    return windows


# ---------------------------------------------------------------------
# 2.8) Dataset / Collate (same as your baseline)
# ---------------------------------------------------------------------
class TrialDataset(Dataset):
    def __init__(self, trial_list):
        self.trials = trial_list

    def __len__(self):
        return len(self.trials)

    def __getitem__(self, idx):
        item = self.trials[idx]
        data = torch.tensor(item['data'], dtype=torch.float32).transpose(0, 1)  # (C,T)
        count = torch.tensor(item['count'], dtype=torch.float32)
        return data, count, item['meta']


def collate_variable_length(batch):
    max_len = max([x[0].shape[1] for x in batch])
    C = batch[0][0].shape[0]

    padded_data, masks, counts, metas, lengths = [], [], [], [], []
    for data, count, meta in batch:
        T = data.shape[1]
        lengths.append(T)

        pad_size = max_len - T
        if pad_size > 0:
            pad = torch.zeros(C, pad_size)
            d_padded = torch.cat([data, pad], dim=1)
            mask = torch.cat([torch.ones(T), torch.zeros(pad_size)], dim=0)
        else:
            d_padded = data
            mask = torch.ones(T)

        padded_data.append(d_padded)
        masks.append(mask)
        counts.append(count)
        metas.append(meta)

    return {
        "data": torch.stack(padded_data),         # (B,C,Tmax)
        "mask": torch.stack(masks),               # (B,Tmax)
        "count": torch.stack(counts),             # (B,)
        "length": torch.tensor(lengths, dtype=torch.float32),  # (B,)
        "meta": metas
    }


# ---------------------------------------------------------------------
# Step counting helpers
# ---------------------------------------------------------------------
def make_loader_and_count_steps(train_data, batch_size, seed=42):
    g = torch.Generator()
    g.manual_seed(seed)
    loader = DataLoader(
        TrialDataset(train_data),
        batch_size=batch_size,
        shuffle=True,
        collate_fn=collate_variable_length,
        generator=g,
        num_workers=0
    )
    steps_per_epoch = len(loader)          # = ceil(N_samples / batch_size)
    n_samples = len(train_data)
    return n_samples, steps_per_epoch


def summarize_steps(steps_list):
    arr = np.array(steps_list, dtype=np.float32)
    return dict(min=int(arr.min()), mean=float(arr.mean()), max=int(arr.max()), std=float(arr.std()))


# ---------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------
def main():
    CONFIG = {
        "seed": 42,
        "data_dir": "/content/drive/MyDrive/Colab Notebooks/HAR_data/MHEALTHDATASET",

        "COLUMN_NAMES": [
            'acc_chest_x', 'acc_chest_y', 'acc_chest_z',
            'ecg_1', 'ecg_2',
            'acc_ankle_x', 'acc_ankle_y', 'acc_ankle_z',
            'gyro_ankle_x', 'gyro_ankle_y', 'gyro_ankle_z',
            'mag_ankle_x', 'mag_ankle_y', 'mag_ankle_z',
            'acc_arm_x', 'acc_arm_y', 'acc_arm_z',
            'gyro_arm_x', 'gyro_arm_y', 'gyro_arm_z',
            'mag_arm_x', 'mag_arm_y', 'mag_arm_z',
            'activity_id'
        ],

        "TARGET_ACTIVITIES_MAP": {
            6:  'Waist bends forward',
            7:  'Frontal elevation of arms',
            8:  'Knees bending',
            10:  'Jogging',
            11: 'Running',
            12: 'Jump front & back',
        },

        "ACT_FEATURE_MAP": {
            6:  ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            7:  ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            8:  ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            10:  ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            11: ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            12: ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
        },
        # Baseline training params (what you called "original")
        "epochs": 100,
        "batch_size": 64,
        "fs": 50,

        # window params (ours)
        "win_sec": 8.0,
        "stride_sec": 4.0,
        "drop_last": True,

        # Compare against your fixed budget
        "fixed_updates": 4000,
    }

    set_strict_seed(CONFIG["seed"])

    full_data = load_mhealth_dataset(
        CONFIG["data_dir"],
        CONFIG["TARGET_ACTIVITIES_MAP"],
        CONFIG["COLUMN_NAMES"]
    )
    if not full_data:
        return

    subjects = [f"subject{i}" for i in range(1, 11)]

    # ---- labels per activity ----
    ALL_LABELS_BY_ACT = {
        6: [
            ("subject1", 6, 21), ("subject2", 6, 19), ("subject3", 6, 21), ("subject4", 6, 20), ("subject5", 6, 20),
            ("subject6", 6, 20), ("subject7", 6, 20), ("subject8", 6, 21), ("subject9", 6, 21), ("subject10", 6, 20),
        ],
        7: [
            ("subject1", 7, 20), ("subject2", 7, 20), ("subject3", 7, 20), ("subject4", 7, 20), ("subject5", 7, 20),
            ("subject6", 7, 20), ("subject7", 7, 20), ("subject8", 7, 19), ("subject9", 7, 19), ("subject10", 7, 20),
        ],
        8: [
            ("subject1", 8, 20), ("subject2", 8, 21), ("subject3", 8, 21), ("subject4", 8, 19), ("subject5", 8, 20),
            ("subject6", 8, 20), ("subject7", 8, 21), ("subject8", 8, 21), ("subject9", 8, 21), ("subject10", 8, 21),
        ],
        10: [
            ("subject1", 10, 157), ("subject2", 10, 161), ("subject3", 10, 154), ("subject4", 10, 154), ("subject5", 10, 160),
            ("subject6", 10, 156), ("subject7", 10, 153), ("subject8", 10, 160), ("subject9", 10, 166), ("subject10", 10, 156),
        ],
        11: [
            ("subject1", 11, 165), ("subject2", 11, 158), ("subject3", 11, 174), ("subject4", 11, 163), ("subject5", 11, 157),
            ("subject6", 11, 172), ("subject7", 11, 149), ("subject8", 11, 166), ("subject9", 11, 174), ("subject10", 11, 172),
        ],
        12: [
            ("subject1", 12, 20), ("subject2", 12, 22), ("subject3", 12, 21), ("subject4", 12, 21), ("subject5", 12, 20),
            ("subject6", 12, 21), ("subject7", 12, 19), ("subject8", 12, 20), ("subject9", 12, 20), ("subject10", 12, 20),
        ]
    }

    print("\n" + "=" * 110)
    print("STEP COUNT REPORT (epoch-based baseline)  |  steps = optimizer.step() calls")
    print(f"epochs={CONFIG['epochs']} | batch_size={CONFIG['batch_size']} | fixed_updates={CONFIG['fixed_updates']}")
    print("=" * 110)

    for act_id, label_list in ALL_LABELS_BY_ACT.items():
        act_name = CONFIG["TARGET_ACTIVITIES_MAP"].get(act_id, str(act_id))
        print("\n" + "-" * 110)
        print(f"[Activity {act_id}] {act_name}")
        print("-" * 110)

        trial_steps_per_epoch = []
        ours_steps_per_epoch = []
        trial_total_steps = []
        ours_total_steps = []

        for fold_idx, test_subj in enumerate(subjects, start=1):
            train_labels = [x for x in label_list if x[0] != test_subj]
            test_labels  = [x for x in label_list if x[0] == test_subj]
            if len(test_labels) == 0:
                print(f"[Fold {fold_idx:2d}] skip (no test label for {test_subj})")
                continue

            train_trials = prepare_trial_list(
                train_labels, full_data,
                CONFIG["TARGET_ACTIVITIES_MAP"],
                CONFIG["ACT_FEATURE_MAP"]
            )

            # ------------------------
            # (A) trial_only: train_data = train_trials
            # ------------------------
            train_data_trial = train_trials
            n_trial, steps_ep_trial = make_loader_and_count_steps(
                train_data_trial, batch_size=CONFIG["batch_size"], seed=CONFIG["seed"]
            )
            total_steps_trial = CONFIG["epochs"] * steps_ep_trial
            eq_epochs_trial = CONFIG["fixed_updates"] / max(steps_ep_trial, 1)

            # ------------------------
            # (B) ours_window_proxy: train_data = windows
            # ------------------------
            train_data_ours = trial_list_to_windows(
                train_trials,
                fs=CONFIG["fs"],
                win_sec=CONFIG["win_sec"],
                stride_sec=CONFIG["stride_sec"],
                drop_last=CONFIG["drop_last"]
            )
            n_ours, steps_ep_ours = make_loader_and_count_steps(
                train_data_ours, batch_size=CONFIG["batch_size"], seed=CONFIG["seed"]
            )
            total_steps_ours = CONFIG["epochs"] * steps_ep_ours
            eq_epochs_ours = CONFIG["fixed_updates"] / max(steps_ep_ours, 1)

            trial_steps_per_epoch.append(steps_ep_trial)
            ours_steps_per_epoch.append(steps_ep_ours)
            trial_total_steps.append(total_steps_trial)
            ours_total_steps.append(total_steps_ours)

            print(
                f"[Fold {fold_idx:2d} | Test={test_subj}] "
                f"trial_only: N={n_trial:4d} steps/ep={steps_ep_trial:4d} total(100ep)={total_steps_trial:6d} "
                f"| fixed4000 ~= {eq_epochs_trial:6.2f} ep"
            )
            print(
                f"                     "
                f"ours_win : N={n_ours:4d} steps/ep={steps_ep_ours:4d} total(100ep)={total_steps_ours:6d} "
                f"| fixed4000 ~= {eq_epochs_ours:6.2f} ep"
            )

        # summary
        if len(trial_total_steps) > 0:
            s1 = summarize_steps(trial_total_steps)
            s2 = summarize_steps(ours_total_steps)
            print("\n[Summary: total steps for epochs=100]")
            print(f"  trial_only      : min={s1['min']} | mean={s1['mean']:.1f} | max={s1['max']} | std={s1['std']:.1f}")
            print(f"  ours_windowproxy: min={s2['min']} | mean={s2['mean']:.1f} | max={s2['max']} | std={s2['std']:.1f}")

            sp1 = summarize_steps(trial_steps_per_epoch)
            sp2 = summarize_steps(ours_steps_per_epoch)
            print("[Summary: steps per epoch]")
            print(f"  trial_only      : min={sp1['min']} | mean={sp1['mean']:.2f} | max={sp1['max']} | std={sp1['std']:.2f}")
            print(f"  ours_windowproxy: min={sp2['min']} | mean={sp2['mean']:.2f} | max={sp2['max']} | std={sp2['std']:.2f}")

    print("\nDONE.")


if __name__ == "__main__":
    main()


Loading 10 subjects from /content/drive/MyDrive/Colab Notebooks/HAR_data/MHEALTHDATASET...

STEP COUNT REPORT (epoch-based baseline)  |  steps = optimizer.step() calls
epochs=100 | batch_size=64 | fixed_updates=4000

--------------------------------------------------------------------------------------------------------------
[Activity 6] Waist bends forward
--------------------------------------------------------------------------------------------------------------
[Fold  1 | Test=subject1] trial_only: N=   9 steps/ep=   1 total(100ep)=   100 | fixed4000 ~= 4000.00 ep
                     ours_win : N= 113 steps/ep=   2 total(100ep)=   200 | fixed4000 ~= 2000.00 ep
[Fold  2 | Test=subject2] trial_only: N=   9 steps/ep=   1 total(100ep)=   100 | fixed4000 ~= 4000.00 ep
                     ours_win : N= 113 steps/ep=   2 total(100ep)=   200 | fixed4000 ~= 2000.00 ep
[Fold  3 | Test=subject3] trial_only: N=   9 steps/ep=   1 total(100ep)=   100 | fixed4000 ~= 4000.00 ep
               

In [1]:
# ============================================================
# Effect of Windowing Experiment (mHealth) — FULL CODE
#
# What you get:
# 1) Trial-only (TRAIN: full-trial samples)  VS.  Ours (TRAIN: window-proxy supervision)
# 2) Train step matching: BOTH variants train for the SAME number of optimizer updates (MAX_UPDATES)
# 3) Metrics: MAE, MAPE, STD (std across LOSO folds) — per activity & per variant
# 4) Run LOSO for multiple activities (e.g., 6 activities). You fill GT counts per activity.
#
# Notes:
# - TEST inference is unified: windowing inference (same for both variants) -> fair comparison.
# - You can optionally add multiple seeds via CONFIG["seeds"] (kept simple by default).
# ============================================================

import os
import glob
import random
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from scipy.ndimage import gaussian_filter1d

# ---------------------------------------------------------------------
# 1) Strict Seeding
# ---------------------------------------------------------------------
def set_strict_seed(seed: int):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


# ---------------------------------------------------------------------
# 2) Data Loading
# ---------------------------------------------------------------------
def load_mhealth_dataset(data_dir, target_activities_map, column_names):
    full_dataset = {}
    file_list = sorted(glob.glob(os.path.join(data_dir, "mHealth_subject*.log")))

    if not file_list:
        print(f"[Warning] No mHealth logs found in {data_dir}")
        return {}

    print(f"Loading {len(file_list)} subjects from {data_dir}...")

    for file_path in file_list:
        file_name = os.path.basename(file_path)
        subj_part = file_name.split('.')[0]
        try:
            subj_id_num = int(''.join(filter(str.isdigit, subj_part)))
            subj_key = f"subject{subj_id_num}"
        except:
            subj_key = subj_part

        try:
            df = pd.read_csv(file_path, sep="\t", header=None)
            df = df.iloc[:, :len(column_names)]
            df.columns = column_names

            subj_data = {}
            for label_code, activity_name in target_activities_map.items():
                activity_df = df[df['activity_id'] == label_code].copy()
                if not activity_df.empty:
                    subj_data[activity_name] = activity_df.drop(columns=['activity_id'])

            full_dataset[subj_key] = subj_data
        except Exception as e:
            print(f"Error loading {file_name}: {e}")
            pass

    return full_dataset


def prepare_trial_list(label_config, full_data, target_map, feature_map):
    """
    label_config: list of (subj, act_id, gt_count)
    returns: list of dicts {data:(T,C), count:float, meta:str}
    """
    trial_list = []
    for subj, act_id, gt_count in label_config:
        act_name = target_map.get(act_id)
        feats = feature_map.get(act_id)

        if act_name is None or feats is None:
            print(f"[Skip] Missing mapping for act_id={act_id}")
            continue

        if subj in full_data and act_name in full_data[subj]:
            raw_df = full_data[subj][act_name][feats]
            raw_np = raw_df.values.astype(np.float32)

            # Z-score normalization per trial
            mean = raw_np.mean(axis=0)
            std = raw_np.std(axis=0) + 1e-6
            norm_np = (raw_np - mean) / std

            trial_list.append({
                'data': norm_np,              # (T, C)
                'count': float(gt_count),      # trial total count
                'meta': f"{subj}_{act_name}"
            })
        else:
            print(f"[Skip] Missing data for {subj} - {act_name}")

    return trial_list


# ---------------------------------------------------------------------
# 2.5) Windowing (TRAIN only for Ours)
# ---------------------------------------------------------------------
def trial_list_to_windows(trial_list, fs, win_sec=8.0, stride_sec=4.0, drop_last=True):
    """
    TRAIN-only expansion: trial -> sliding windows
    window proxy label:
      rate_trial = count_total / total_duration
      count_window = rate_trial * window_duration
    """
    win_len = int(round(win_sec * fs))
    stride = int(round(stride_sec * fs))
    assert win_len > 0 and stride > 0

    windows = []
    for item in trial_list:
        x = item["data"]  # (T,C)
        T = x.shape[0]
        total_count = float(item["count"])
        meta = item["meta"]

        total_dur = max(T / float(fs), 1e-6)
        rate_trial = total_count / total_dur  # reps/s

        if T < win_len:
            win_dur = T / float(fs)
            windows.append({
                "data": x,
                "count": rate_trial * win_dur,
                "meta": f"{meta}__win[0:{T}]",
                "parent_meta": meta,
                "parent_T": T,
                "win_start": 0,
                "win_end": T,
            })
            continue

        last_start = T - win_len
        starts = list(range(0, last_start + 1, stride))

        for st in starts:
            ed = st + win_len
            win_dur = win_len / float(fs)
            windows.append({
                "data": x[st:ed],
                "count": rate_trial * win_dur,
                "meta": f"{meta}__win[{st}:{ed}]",
                "parent_meta": meta,
                "parent_T": T,
                "win_start": st,
                "win_end": ed,
            })

        if not drop_last:
            last_st = starts[-1] + stride
            if last_st < T:
                ed = T
                win_dur = (ed - last_st) / float(fs)
                windows.append({
                    "data": x[last_st:ed],
                    "count": rate_trial * win_dur,
                    "meta": f"{meta}__win[{last_st}:{ed}]",
                    "parent_meta": meta,
                    "parent_T": T,
                    "win_start": last_st,
                    "win_end": ed,
                })

    return windows


def predict_count_by_windowing(model, x_np, fs, win_sec, stride_sec, device, tau=1.0, batch_size=64):
    """
    TEST: trial -> sliding windows inference -> window rate 평균 -> total count
    x_np: (T,C) numpy (already normalized)
    return: pred_count(float), window_rates(np.ndarray)
    """
    win_len = int(round(win_sec * fs))
    stride = int(round(stride_sec * fs))
    T = x_np.shape[0]
    total_dur = T / float(fs)

    # short trial -> 1 forward
    if T <= win_len:
        x_tensor = torch.tensor(x_np, dtype=torch.float32).transpose(0, 1).unsqueeze(0).to(device)  # (1,C,T)
        with torch.no_grad():
            rate_hat, _, _, _ = model(x_tensor, mask=None, tau=tau)
        pred_count = float(rate_hat.item() * total_dur)
        return pred_count, np.array([float(rate_hat.item())], dtype=np.float32)

    starts = list(range(0, T - win_len + 1, stride))
    windows = np.stack([x_np[st:st + win_len] for st in starts], axis=0)  # (N, win_len, C)

    xw = torch.tensor(windows, dtype=torch.float32).permute(0, 2, 1).to(device)  # (N, C, win_len)

    rates = []
    model.eval()
    with torch.no_grad():
        for i in range(0, xw.shape[0], batch_size):
            xb = xw[i:i + batch_size]
            r_hat, _, _, _ = model(xb, mask=None, tau=tau)  # (B,)
            rates.append(r_hat.detach().cpu().numpy())

    rates = np.concatenate(rates, axis=0)  # (N,)
    rate_mean = float(rates.mean())
    pred_count = rate_mean * total_dur
    return float(pred_count), rates


# ---------------------------------------------------------------------
# 2.8) Dataset / Collate
# ---------------------------------------------------------------------
class TrialDataset(Dataset):
    def __init__(self, trial_list):
        self.trials = trial_list

    def __len__(self):
        return len(self.trials)

    def __getitem__(self, idx):
        item = self.trials[idx]
        data = torch.tensor(item['data'], dtype=torch.float32).transpose(0, 1)  # (C, T)
        count = torch.tensor(item['count'], dtype=torch.float32)
        return data, count, item['meta']


def collate_variable_length(batch):
    max_len = max([x[0].shape[1] for x in batch])
    C = batch[0][0].shape[0]

    padded_data, masks, counts, metas, lengths = [], [], [], [], []
    for data, count, meta in batch:
        T = data.shape[1]
        lengths.append(T)

        pad_size = max_len - T
        if pad_size > 0:
            pad = torch.zeros(C, pad_size)
            d_padded = torch.cat([data, pad], dim=1)
            mask = torch.cat([torch.ones(T), torch.zeros(pad_size)], dim=0)
        else:
            d_padded = data
            mask = torch.ones(T)

        padded_data.append(d_padded)
        masks.append(mask)
        counts.append(count)
        metas.append(meta)

    return {
        "data": torch.stack(padded_data),         # (B, C, T_max)
        "mask": torch.stack(masks),               # (B, T_max)
        "count": torch.stack(counts),             # (B,)
        "length": torch.tensor(lengths, dtype=torch.float32),  # (B,)
        "meta": metas
    }


# ---------------------------------------------------------------------
# 3) Model
# ---------------------------------------------------------------------
class ManifoldEncoder(nn.Module):
    def __init__(self, input_ch, hidden_dim=128, latent_dim=16):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(input_ch, hidden_dim, 5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hidden_dim, hidden_dim, 5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hidden_dim, latent_dim, 1)
        )

    def forward(self, x):
        z = self.net(x)            # (B, D, T)
        z = z.transpose(1, 2)      # (B, T, D)
        return z


class ManifoldDecoder(nn.Module):
    def __init__(self, latent_dim, hidden_dim, out_ch):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(latent_dim, hidden_dim, 5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hidden_dim, hidden_dim, 5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hidden_dim, out_ch, 1)
        )

    def forward(self, z):
        zt = z.transpose(1, 2)     # (B, D, T)
        x_hat = self.net(zt)       # (B, C, T)
        return x_hat


class MultiRateHead(nn.Module):
    def __init__(self, latent_dim=16, hidden=64, K_max=6):
        super().__init__()
        self.K_max = K_max
        self.net = nn.Sequential(
            nn.Linear(latent_dim, hidden),
            nn.ReLU(),
            nn.Linear(hidden, 1 + K_max)  # [amp_logit | phase_logits...]
        )

    def forward(self, z, tau=1.0):
        out = self.net(z)                               # (B,T,1+K)
        amp = F.softplus(out[..., 0])                   # (B,T) >=0
        phase_logits = out[..., 1:]                     # (B,T,K)
        phase = F.softmax(phase_logits / tau, dim=-1)   # (B,T,K)
        return amp, phase, phase_logits


class KAutoCountModel(nn.Module):
    """
    - micro_rate_t = amp(t)
    - phase_p(t,k) distributes micro-events across K streams
    - k_hat computed from p_bar (effective K)
    - rep_rate_t = micro_rate_t / k_hat
    - output avg_rep_rate over valid time
    """
    def __init__(self, input_ch, hidden_dim=128, latent_dim=16, K_max=6):
        super().__init__()
        self.encoder = ManifoldEncoder(input_ch, hidden_dim, latent_dim)
        self.decoder = ManifoldDecoder(latent_dim, hidden_dim, input_ch)
        self.rate_head = MultiRateHead(latent_dim, hidden=hidden_dim, K_max=K_max)
        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, (nn.Conv1d, nn.Linear)):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
        with torch.no_grad():
            b = self.rate_head.net[-1].bias
            b.zero_()
            b[0].fill_(-2.0)  # amp bias only

    @staticmethod
    def _masked_mean_time(x, mask=None, eps=1e-6):
        if mask is None:
            return x.mean(dim=1)
        if x.dim() == 2:
            m = mask.to(dtype=x.dtype, device=x.device)
            return (x * m).sum(dim=1) / (m.sum(dim=1) + eps)
        elif x.dim() == 3:
            m = mask.to(dtype=x.dtype, device=x.device).unsqueeze(-1)
            return (x * m).sum(dim=1) / (m.sum(dim=1) + eps)
        else:
            raise ValueError(f"Unsupported dim for masked mean: {x.dim()}")

    def forward(self, x, mask=None, tau=1.0):
        z = self.encoder(x)              # (B,T,D)
        x_hat = self.decoder(z)          # (B,C,T)

        amp_t, phase_p, phase_logits = self.rate_head(z, tau=tau)  # amp:(B,T), phase:(B,T,K)
        rates_k_t = amp_t.unsqueeze(-1) * phase_p                   # (B,T,K)

        micro_rate_t = amp_t                                        # (B,T)

        p_bar = self._masked_mean_time(phase_p, mask)               # (B,K)
        k_hat = 1.0 / (p_bar.pow(2).sum(dim=1) + 1e-6)              # (B,) ~ [1,K]

        rep_rate_t = micro_rate_t / (k_hat.unsqueeze(1) + 1e-6)     # (B,T)
        if mask is not None:
            rep_rate_t = rep_rate_t * mask

        if mask is None:
            avg_rep_rate = rep_rate_t.mean(dim=1)
        else:
            avg_rep_rate = (rep_rate_t * mask).sum(dim=1) / (mask.sum(dim=1) + 1e-6)

        aux = {
            "rates_k_t": rates_k_t,
            "phase_p": phase_p,
            "phase_logits": phase_logits,
            "micro_rate_t": micro_rate_t,
            "rep_rate_t": rep_rate_t,
            "k_hat": k_hat,
        }
        return avg_rep_rate, z, x_hat, aux


# ---------------------------------------------------------------------
# 4) Loss utils
# ---------------------------------------------------------------------
def masked_recon_mse(x_hat, x, mask, eps=1e-6):
    mask = mask.to(dtype=x.dtype, device=x.device)
    mask_bc = mask.unsqueeze(1)              # (B,1,T)
    se = (x_hat - x) ** 2                    # (B,C,T)
    se = se * mask_bc
    denom = (mask.sum() * x.shape[1]) + eps  # valid(B*T)*C
    return se.sum() / denom


def temporal_smoothness(v, mask=None, eps=1e-6):
    dv = torch.abs(v[:, 1:] - v[:, :-1])  # (B,T-1)
    if mask is None:
        return dv.mean()
    m = mask[:, 1:] * mask[:, :-1]
    m = m.to(dtype=dv.dtype, device=dv.device)
    return (dv * m).sum() / (m.sum() + eps)


def phase_entropy_loss(phase_p, mask=None, eps=1e-8):
    ent = -(phase_p * (phase_p + eps).log()).sum(dim=-1)  # (B,T)
    if mask is None:
        return ent.mean()
    ent = ent * mask
    return ent.sum() / (mask.sum() + eps)


def effK_usage_loss(phase_p, mask=None, eps=1e-6):
    if mask is None:
        p_bar = phase_p.mean(dim=1)  # (B,K)
    else:
        m = mask.to(dtype=phase_p.dtype, device=phase_p.device).unsqueeze(-1)  # (B,T,1)
        p_bar = (phase_p * m).sum(dim=1) / (m.sum(dim=1) + eps)

    effK = 1.0 / (p_bar.pow(2).sum(dim=1) + eps)
    return effK.mean(), effK.detach()


# ---------------------------------------------------------------------
# 5) Train by FIXED UPDATES (step matching)
# ---------------------------------------------------------------------
def train_one_step(model, batch, optimizer, config, device):
    model.train()

    fs = config["fs"]
    tau = config.get("tau", 1.0)

    lam_recon = config.get("lambda_recon", 1.0)
    lam_smooth = config.get("lambda_smooth", 0.05)
    lam_phase_ent = config.get("lambda_phase_ent", 0.01)
    lam_effk = config.get("lambda_effk", 0.005)

    x = batch["data"].to(device)         # (B,C,T)
    mask = batch["mask"].to(device)      # (B,T)
    y_count = batch["count"].to(device)  # (B,)
    length = batch["length"].to(device)  # (B,)

    duration = torch.clamp(length / fs, min=1e-6)  # sec
    y_rate = y_count / duration                    # reps/s

    optimizer.zero_grad(set_to_none=True)

    rate_hat, z, x_hat, aux = model(x, mask, tau=tau)

    loss_rate = F.mse_loss(rate_hat, y_rate)
    loss_recon = masked_recon_mse(x_hat, x, mask)
    loss_smooth = temporal_smoothness(aux["rep_rate_t"], mask)
    loss_phase_ent = phase_entropy_loss(aux["phase_p"], mask)
    loss_effk, _ = effK_usage_loss(aux["phase_p"], mask)

    loss = (loss_rate
            + lam_recon * loss_recon
            + lam_smooth * loss_smooth
            + lam_phase_ent * loss_phase_ent
            + lam_effk * loss_effk)

    loss.backward()
    optimizer.step()

    with torch.no_grad():
        count_hat = rate_hat * duration
        mae_count = torch.abs(count_hat - y_count).mean().item()

    stats = {
        "loss": float(loss.item()),
        "loss_rate": float(loss_rate.item()),
        "loss_recon": float(loss_recon.item()),
        "loss_smooth": float(loss_smooth.item()),
        "loss_phase_ent": float(loss_phase_ent.item()),
        "loss_effk": float(loss_effk.item()),
        "mae_count": float(mae_count),
    }
    return stats


def train_by_updates(model, loader, optimizer, scheduler, config, device, max_updates=3000, log_every=500):
    """
    핵심: optimizer.update 횟수(=step)를 정확히 max_updates로 고정
    -> Trial-only vs Window-proxy 공정 비교
    """
    it = iter(loader)
    agg = {k: 0.0 for k in ["loss", "loss_rate", "loss_recon", "loss_smooth", "loss_phase_ent", "loss_effk", "mae_count"]}

    for step in range(1, max_updates + 1):
        try:
            batch = next(it)
        except StopIteration:
            it = iter(loader)
            batch = next(it)

        st = train_one_step(model, batch, optimizer, config, device)
        for k in agg:
            agg[k] += st[k]

        if scheduler is not None:
            scheduler.step()

        if (log_every is not None) and (step % log_every == 0):
            avg = {k: agg[k] / step for k in agg}
            print(f"  [train] step {step:5d}/{max_updates} | loss={avg['loss']:.4f} | mae={avg['mae_count']:.3f}")

    avg = {k: agg[k] / max_updates for k in agg}
    return avg


# ---------------------------------------------------------------------
# 6) Eval (MAE / MAPE)
# ---------------------------------------------------------------------
def eval_on_trials_window_infer(model, test_trials, config, device):
    """
    Unified evaluation for BOTH variants:
    - Use windowing inference at test time
    - Return fold-avg MAE, MAPE
    """
    fs = config["fs"]
    win_sec = config["win_sec"]
    stride_sec = config["stride_sec"]
    tau = config.get("tau", 1.0)
    bs = config.get("batch_size", 64)

    mae_list = []
    mape_list = []

    model.eval()
    for item in test_trials:
        x_np = item["data"]
        gt = float(item["count"])

        pred, _ = predict_count_by_windowing(
            model,
            x_np=x_np,
            fs=fs,
            win_sec=win_sec,
            stride_sec=stride_sec,
            device=device,
            tau=tau,
            batch_size=bs
        )

        ae = abs(pred - gt)
        mae_list.append(ae)

        # MAPE (%)
        denom = max(abs(gt), 1e-6)
        mape_list.append((ae / denom) * 100.0)

    fold_mae = float(np.mean(mae_list)) if len(mae_list) > 0 else np.nan
    fold_mape = float(np.mean(mape_list)) if len(mape_list) > 0 else np.nan
    return fold_mae, fold_mape


# ---------------------------------------------------------------------
# 7) Main: multi-activity LOSO, two variants
# ---------------------------------------------------------------------
def main():
    CONFIG = {
        # Repro
        "seeds": [42],   # 필요하면 [1,2,3,4,5] 로 확장 가능

        # Data
        "data_dir": "/content/drive/MyDrive/Colab Notebooks/HAR_data/MHEALTHDATASET",
        "fs": 50,

        "COLUMN_NAMES": [
            'acc_chest_x', 'acc_chest_y', 'acc_chest_z',
            'ecg_1', 'ecg_2',
            'acc_ankle_x', 'acc_ankle_y', 'acc_ankle_z',
            'gyro_ankle_x', 'gyro_ankle_y', 'gyro_ankle_z',
            'mag_ankle_x', 'mag_ankle_y', 'mag_ankle_z',
            'acc_arm_x', 'acc_arm_y', 'acc_arm_z',
            'gyro_arm_x', 'gyro_arm_y', 'gyro_arm_z',
            'mag_arm_x', 'mag_arm_y', 'mag_arm_z',
            'activity_id'
        ],

        # ====== ✅ Put YOUR 6 activities here ======
        # act_id -> name
        "TARGET_ACTIVITIES_MAP": {
            6:  'Waist bends forward',
            7:  'Frontal elevation of arms',
            8:  'Knees bending',
            10:  'Jogging',
            11: 'Running',
            12: 'Jump front & back',
        },

        # act_id -> feature list (you can keep same list for all)
        "ACT_FEATURE_MAP": {
            6:  ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            7:  ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            8:  ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            10:  ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            11: ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            12: ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
        },

        # Train (step matching 핵심)
        "batch_size": 64,
        "lr": 5e-4,
        "MAX_UPDATES": 200,  # ✅ Trial-only vs Ours 둘 다 정확히 이 step만큼 학습
        "log_every": 50,

        # Windowing (TRAIN for Ours, TEST for both)
        "win_sec": 8.0,
        "stride_sec": 4.0,
        "drop_last": True,

        # Model
        "hidden_dim": 128,
        "latent_dim": 16,
        "K_max": 6,

        # Loss weights
        "lambda_recon": 1.0,
        "lambda_smooth": 0.05,
        "lambda_phase_ent": 0.01,
        "lambda_effk": 0.0075,
        "tau": 1.0,

        # Variants
        "VARIANTS": ["trial_only", "ours_window_proxy"],

        # ====== ✅ YOU FILL THESE GT COUNTS ======
        # act_id -> list of (subjectX, act_id, gt_count)
        "GT_LABELS_BY_ACT": {
            6: [
                ("subject1", 6, 21), ("subject2", 6, 19), ("subject3", 6, 21), ("subject4", 6, 20), ("subject5", 6, 20),
                ("subject6", 6, 20), ("subject7", 6, 20), ("subject8", 6, 21), ("subject9", 6, 21), ("subject10", 6, 20),
            ],
            7: [
                ("subject1", 7, 20), ("subject2", 7, 20), ("subject3", 7, 20), ("subject4", 7, 20), ("subject5", 7, 20),
                ("subject6", 7, 20), ("subject7", 7, 20), ("subject8", 7, 19), ("subject9", 7, 19), ("subject10", 7, 20),
            ],
            8: [
                ("subject1", 8, 20), ("subject2", 8, 21), ("subject3", 8, 21), ("subject4", 8, 19), ("subject5", 8, 20),
                ("subject6", 8, 20), ("subject7", 8, 21), ("subject8", 8, 21), ("subject9", 8, 21), ("subject10", 8, 21),
            ],
            10: [
                ("subject1", 10, 157), ("subject2", 10, 161), ("subject3", 10, 154), ("subject4", 10, 154), ("subject5", 10, 160),
                ("subject6", 10, 156), ("subject7", 10, 153), ("subject8", 10, 160), ("subject9", 10, 166), ("subject10", 10, 156),
            ],
            11: [
                ("subject1", 11, 165), ("subject2", 11, 158), ("subject3", 11, 174), ("subject4", 11, 163), ("subject5", 11, 157),
                ("subject6", 11, 172), ("subject7", 11, 149), ("subject8", 11, 166), ("subject9", 11, 174), ("subject10", 11, 172),
            ],
            12: [
                ("subject1", 12, 20), ("subject2", 12, 22), ("subject3", 12, 21), ("subject4", 12, 21), ("subject5", 12, 20),
                ("subject6", 12, 21), ("subject7", 12, 19), ("subject8", 12, 20), ("subject9", 12, 20), ("subject10", 12, 20),
            ],
        },
    }

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device: {device}")

    # Load once (contains only activities in TARGET_ACTIVITIES_MAP)
    full_data = load_mhealth_dataset(CONFIG["data_dir"], CONFIG["TARGET_ACTIVITIES_MAP"], CONFIG["COLUMN_NAMES"])
    if not full_data:
        return

    subjects = [f"subject{i}" for i in range(1, 11)]

    # Results container
    all_records = []

    # Run per seed (optional)
    for seed in CONFIG["seeds"]:
        print("\n" + "=" * 90)
        print(f"SEED = {seed}")
        print("=" * 90)
        set_strict_seed(seed)

        # Run per activity
        for act_id, act_name in CONFIG["TARGET_ACTIVITIES_MAP"].items():
            gt_labels = CONFIG["GT_LABELS_BY_ACT"].get(act_id, [])
            if gt_labels is None or len(gt_labels) == 0:
                print(f"\n[Skip Activity] act_id={act_id} ({act_name}) because GT_LABELS_BY_ACT is empty.")
                continue

            print("\n" + "-" * 90)
            print(f"ACTIVITY {act_id}: {act_name}")
            print("-" * 90)

            # Store fold metrics per variant
            fold_metrics = {v: {"mae": [], "mape": []} for v in CONFIG["VARIANTS"]}

            for fold_idx, test_subj in enumerate(subjects):
                set_strict_seed(seed)

                # Split labels
                train_labels = [x for x in gt_labels if x[0] != test_subj]
                test_labels  = [x for x in gt_labels if x[0] == test_subj]

                # Prepare trials
                train_trials = prepare_trial_list(train_labels, full_data, CONFIG["TARGET_ACTIVITIES_MAP"], CONFIG["ACT_FEATURE_MAP"])
                test_trials  = prepare_trial_list(test_labels,  full_data, CONFIG["TARGET_ACTIVITIES_MAP"], CONFIG["ACT_FEATURE_MAP"])

                if len(test_trials) == 0:
                    print(f"[Skip Fold] act={act_id} Fold {fold_idx+1}: {test_subj} has no data.")
                    continue
                if len(train_trials) == 0:
                    print(f"[Skip Fold] act={act_id} Fold {fold_idx+1}: train_trials empty.")
                    continue

                # For each variant: train -> eval
                for variant in CONFIG["VARIANTS"]:
                    # Build train samples
                    if variant == "trial_only":
                        train_samples = train_trials
                    elif variant == "ours_window_proxy":
                        train_samples = trial_list_to_windows(
                            train_trials,
                            fs=CONFIG["fs"],
                            win_sec=CONFIG["win_sec"],
                            stride_sec=CONFIG["stride_sec"],
                            drop_last=CONFIG["drop_last"],
                        )
                    else:
                        raise ValueError(f"Unknown variant: {variant}")

                    # Dataloader
                    g = torch.Generator()
                    g.manual_seed(seed)

                    train_loader = DataLoader(
                        TrialDataset(train_samples),
                        batch_size=CONFIG["batch_size"],
                        shuffle=True,
                        collate_fn=collate_variable_length,
                        generator=g,
                        num_workers=0,
                        drop_last=False,
                    )

                    # Model init
                    input_ch = train_samples[0]['data'].shape[1]
                    model = KAutoCountModel(
                        input_ch=input_ch,
                        hidden_dim=CONFIG["hidden_dim"],
                        latent_dim=CONFIG["latent_dim"],
                        K_max=CONFIG["K_max"],
                    ).to(device)

                    optimizer = torch.optim.Adam(model.parameters(), lr=CONFIG["lr"])

                    # Scheduler in "updates" domain (optional)
                    # Example: half LR every 2000 updates
                    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=max(1, CONFIG["MAX_UPDATES"] // 2), gamma=0.5)

                    print(f"\n[Fold {fold_idx+1:2d} | Test={test_subj} | {variant}] TRAIN (fixed updates={CONFIG['MAX_UPDATES']})")
                    _train_avg = train_by_updates(
                        model=model,
                        loader=train_loader,
                        optimizer=optimizer,
                        scheduler=scheduler,
                        config=CONFIG,
                        device=device,
                        max_updates=CONFIG["MAX_UPDATES"],
                        log_every=CONFIG.get("log_every", 800)
                    )

                    # Eval (windowing inference for BOTH variants)
                    fold_mae, fold_mape = eval_on_trials_window_infer(model, test_trials, CONFIG, device)

                    fold_metrics[variant]["mae"].append(fold_mae)
                    fold_metrics[variant]["mape"].append(fold_mape)

                    print(f"[Fold {fold_idx+1:2d} | {variant}]  MAE={fold_mae:.3f} | MAPE={fold_mape:.2f}%")

            # Summarize per activity
            for variant in CONFIG["VARIANTS"]:
                maes = np.array(fold_metrics[variant]["mae"], dtype=np.float32)
                mapes = np.array(fold_metrics[variant]["mape"], dtype=np.float32)

                rec = {
                    "seed": seed,
                    "act_id": act_id,
                    "activity": act_name,
                    "variant": variant,
                    "MAE_mean": float(np.nanmean(maes)),
                    "MAE_std": float(np.nanstd(maes)),
                    "MAPE_mean(%)": float(np.nanmean(mapes)),
                    "MAPE_std(%)": float(np.nanstd(mapes)),
                    "n_folds": int(np.sum(~np.isnan(maes))),
                }
                all_records.append(rec)

            # Print quick compare (Trial-only vs Ours) for this activity
            def _get(v, key):
                arr = np.array(fold_metrics[v][key], dtype=np.float32)
                return float(np.nanmean(arr)), float(np.nanstd(arr))

            t_mae_m, t_mae_s = _get("trial_only", "mae")
            o_mae_m, o_mae_s = _get("ours_window_proxy", "mae")
            t_mape_m, t_mape_s = _get("trial_only", "mape")
            o_mape_m, o_mape_s = _get("ours_window_proxy", "mape")

            print("\n" + "-" * 90)
            print(f"[Activity Summary] {act_id} {act_name}")
            print(f"  Trial-only       : MAE {t_mae_m:.3f} ± {t_mae_s:.3f} | MAPE {t_mape_m:.2f}% ± {t_mape_s:.2f}%")
            print(f"  Ours (win-proxy)  : MAE {o_mae_m:.3f} ± {o_mae_s:.3f} | MAPE {o_mape_m:.2f}% ± {o_mape_s:.2f}%")
            print("-" * 90)

    # Final summary table
    df = pd.DataFrame(all_records)
    if len(df) > 0:
        print("\n" + "=" * 90)
        print("FINAL SUMMARY (per seed, per activity, per variant)")
        print("=" * 90)
        with pd.option_context("display.max_rows", 200, "display.max_columns", 50):
            print(df.sort_values(["seed", "act_id", "variant"]).to_string(index=False))
    else:
        print("[No results] Check GT_LABELS_BY_ACT and activity mappings.")


if __name__ == "__main__":
    main()


Device: cuda
Loading 10 subjects from /content/drive/MyDrive/Colab Notebooks/HAR_data/MHEALTHDATASET...

SEED = 42

------------------------------------------------------------------------------------------
ACTIVITY 6: Waist bends forward
------------------------------------------------------------------------------------------

[Fold  1 | Test=subject1 | trial_only] TRAIN (fixed updates=200)
  [train] step    50/200 | loss=0.7587 | mae=2.672
  [train] step   100/200 | loss=0.5067 | mae=1.609
  [train] step   150/200 | loss=0.3993 | mae=1.221
  [train] step   200/200 | loss=0.3380 | mae=1.017
[Fold  1 | trial_only]  MAE=2.070 | MAPE=9.86%

[Fold  1 | Test=subject1 | ours_window_proxy] TRAIN (fixed updates=200)
  [train] step    50/200 | loss=0.5937 | mae=0.783
  [train] step   100/200 | loss=0.4017 | mae=0.535
  [train] step   150/200 | loss=0.3181 | mae=0.435
  [train] step   200/200 | loss=0.2705 | mae=0.380
[Fold  1 | ours_window_proxy]  MAE=2.551 | MAPE=12.15%

[Fold  2 | Test=subj