In [None]:
import os
import glob
import numpy as np
import pandas as pd

# ------------------------------------------------------------
# 1) Data Loading (from your base code)
# ------------------------------------------------------------
def load_mhealth_dataset(data_dir, target_activities_map, column_names):
    full_dataset = {}
    file_list = sorted(glob.glob(os.path.join(data_dir, "mHealth_subject*.log")))

    if not file_list:
        print(f"[Warning] No mHealth logs found in {data_dir}")
        return {}

    print(f"Loading {len(file_list)} subjects from {data_dir}...")

    for file_path in file_list:
        file_name = os.path.basename(file_path)
        subj_part = file_name.split('.')[0]
        try:
            subj_id_num = int(''.join(filter(str.isdigit, subj_part)))
            subj_key = f"subject{subj_id_num}"
        except:
            subj_key = subj_part

        try:
            df = pd.read_csv(file_path, sep="\t", header=None)
            df = df.iloc[:, :len(column_names)]
            df.columns = column_names

            subj_data = {}
            for label_code, activity_name in target_activities_map.items():
                activity_df = df[df['activity_id'] == label_code].copy()
                if not activity_df.empty:
                    subj_data[activity_name] = activity_df.drop(columns=['activity_id'])

            full_dataset[subj_key] = subj_data
        except Exception as e:
            print(f"Error loading {file_name}: {e}")
            pass

    return full_dataset


# ------------------------------------------------------------
# 2) Quantification helpers
# ------------------------------------------------------------
def _iqr(s: pd.Series) -> float:
    q1 = s.quantile(0.25)
    q3 = s.quantile(0.75)
    return float(q3 - q1)


def find_largest_gap_threshold(values_1d: np.ndarray):
    """
    Find split threshold by largest adjacent gap (bimodal-friendly).

    Args:
        values_1d: array-like of activity-level representative values (e.g., count_mean)

    Returns:
        thr: threshold float
        v_sorted: sorted values
        j: index of largest gap between v_sorted[j] and v_sorted[j+1]
        gaps: all adjacent gaps (len N-1)
    """
    v = np.asarray(values_1d, dtype=np.float64)
    v_sorted = np.sort(v)

    if v_sorted.size < 2:
        return float(v_sorted[0]), v_sorted, 0, np.array([])

    gaps = v_sorted[1:] - v_sorted[:-1]
    j = int(np.argmax(gaps))
    thr = float((v_sorted[j] + v_sorted[j + 1]) / 2.0)
    return thr, v_sorted, j, gaps


def compute_tempo_and_count_groups(
    full_data,
    all_labels,             # list of (subj, act_id, gt_count)
    target_map,             # {act_id: act_name}
    feature_map,            # {act_id: [feature cols]}
    fs: float,
    print_trials=True,
    print_activity_table=True,
    save_dir=None
):
    """
    Trial tempo:
        r_i = C_i / T_i (reps/sec), where T_i = duration_sec = len(raw_df)/fs
    Activity tempo summary:
        mean/std/median/IQR over r_i

    Activity count summary:
        mean/std/median/IQR over C_i

    Grouping:
      - tempo_group (Slow/Fast) by q50 of activity-level mean tempo
      - count_group (Low/High) by "largest adjacent gap" threshold on activity-level mean count
        (robust when counts are bimodal e.g., ~20 vs ~165)
    """
    rows = []
    missing = 0

    # ---- trial-level rows ----
    for subj, act_id, gt_count in all_labels:
        act_name = target_map.get(act_id)
        feats = feature_map.get(act_id)

        if act_name is None or feats is None:
            missing += 1
            continue

        if subj not in full_data or act_name not in full_data[subj]:
            missing += 1
            continue

        raw_df = full_data[subj][act_name][feats]   # (T, C)
        T = int(len(raw_df))
        dur = max(T / float(fs), 1e-6)
        r_i = float(gt_count) / dur  # reps/sec

        rows.append({
            "subj": subj,
            "act_id": int(act_id),
            "activity": act_name,
            "gt_count": float(gt_count),
            "T": T,
            "duration_sec": dur,
            "tempo_rps": r_i,
        })

    trial_df = pd.DataFrame(rows)
    if trial_df.empty:
        print("[Error] No trials collected. Check data_dir / labels / target_map.")
        return trial_df, pd.DataFrame(), {}

    if print_trials:
        print("\n" + "=" * 80)
        print("Trial-level table (first 30)")
        print("=" * 80)
        cols = ["subj", "act_id", "activity", "gt_count", "duration_sec", "tempo_rps", "T"]
        print(trial_df[cols].head(30).to_string(index=False))
        print(f"\n#Trials={len(trial_df)} | #Activities={trial_df['activity'].nunique()} | Missing={missing}")

    # ---- activity-level summaries ----
    g = trial_df.groupby(["act_id", "activity"], as_index=False)

    act_df = g.agg(
        n_trials=("tempo_rps", "count"),

        tempo_mean_rps=("tempo_rps", "mean"),
        tempo_std_rps=("tempo_rps", "std"),
        tempo_median_rps=("tempo_rps", "median"),
        tempo_iqr_rps=("tempo_rps", _iqr),

        count_mean=("gt_count", "mean"),
        count_std=("gt_count", "std"),
        count_median=("gt_count", "median"),
        count_iqr=("gt_count", _iqr),
    )

    act_df["tempo_std_rps"] = act_df["tempo_std_rps"].fillna(0.0)
    act_df["count_std"] = act_df["count_std"].fillna(0.0)

    # ---- tempo split: q50 over activity-level mean tempo ----
    tempo_q50 = float(act_df["tempo_mean_rps"].median())
    act_df["tempo_group"] = np.where(act_df["tempo_mean_rps"] <= tempo_q50, "Slow", "Fast")

    # ---- count split: largest-gap threshold over activity-level mean count ----
    count_thr, sorted_counts, gap_j, gaps = find_largest_gap_threshold(act_df["count_mean"].values)
    act_df["count_group"] = np.where(act_df["count_mean"] <= count_thr, "Low", "High")

    # sort for readability
    act_df = act_df.sort_values(["tempo_mean_rps", "count_mean"]).reset_index(drop=True)

    thresholds = {
        "tempo_q50_rps": tempo_q50,
        "count_thr_largest_gap": count_thr,
        "count_gap_between": (float(sorted_counts[gap_j]), float(sorted_counts[gap_j + 1])) if sorted_counts.size >= 2 else (None, None),
    }

    if print_activity_table:
        print("\n" + "=" * 80)
        print("Activity-level summary + groups")
        print("=" * 80)
        print(f"tempo q50 (median of activity mean tempo): {tempo_q50:.6f} reps/sec")

        if sorted_counts.size >= 2:
            a = float(sorted_counts[gap_j])
            b = float(sorted_counts[gap_j + 1])
            print(f"count threshold (largest adjacent gap midpoint): {count_thr:.6f} reps  "
                  f"(gap between {a:.3f} and {b:.3f})")
        else:
            print(f"count threshold (largest-gap): {count_thr:.6f} reps (only one activity)")

        print()

        cols = [
            "act_id", "activity", "n_trials",
            "tempo_mean_rps", "tempo_std_rps", "tempo_median_rps", "tempo_iqr_rps", "tempo_group",
            "count_mean", "count_std", "count_median", "count_iqr", "count_group",
        ]
        print(act_df[cols].to_string(index=False))

        print("\nGroup sizes:")
        print("tempo_group:", act_df["tempo_group"].value_counts().to_dict())
        print("count_group:", act_df["count_group"].value_counts().to_dict())

        # optional: group membership lists
        print("\nTempo groups:")
        for gname in ["Slow", "Fast"]:
            acts = act_df.loc[act_df["tempo_group"] == gname, "activity"].tolist()
            print(f"  {gname}: {acts}")

        print("\nCount-scale groups:")
        for gname in ["Low", "High"]:
            acts = act_df.loc[act_df["count_group"] == gname, "activity"].tolist()
            print(f"  {gname}: {acts}")

    if save_dir is not None:
        os.makedirs(save_dir, exist_ok=True)
        trial_path = os.path.join(save_dir, "trial_tempo_count_table.csv")
        act_path = os.path.join(save_dir, "activity_tempo_count_summary.csv")
        trial_df.to_csv(trial_path, index=False)
        act_df.to_csv(act_path, index=False)
        print(f"\n[Saved] {trial_path}")
        print(f"[Saved] {act_path}")

    return trial_df, act_df, thresholds


# ------------------------------------------------------------
# 3) Main (only for quantification/logging)
# ------------------------------------------------------------
def main():
    CONFIG = {
        "data_dir": "/content/drive/MyDrive/Colab Notebooks/HAR_data/MHEALTHDATASET",
        "fs": 50,

        "COLUMN_NAMES": [
            'acc_chest_x', 'acc_chest_y', 'acc_chest_z',
            'ecg_1', 'ecg_2',
            'acc_ankle_x', 'acc_ankle_y', 'acc_ankle_z',
            'gyro_ankle_x', 'gyro_ankle_y', 'gyro_ankle_z',
            'mag_ankle_x', 'mag_ankle_y', 'mag_ankle_z',
            'acc_arm_x', 'acc_arm_y', 'acc_arm_z',
            'gyro_arm_x', 'gyro_arm_y', 'gyro_arm_z',
            'mag_arm_x', 'mag_arm_y', 'mag_arm_z',
            'activity_id'
        ],

        "TARGET_ACTIVITIES_MAP": {
            6:  'Waist bends forward',
            7:  'Frontal elevation of arms',
            8:  'Knees bending',
            10: 'Jogging',
            11: 'Running',
            12: 'Jump front & back',
        },

        # Features: only used to select the activity segment length (T)
        "ACT_FEATURE_MAP": {
            6:  ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            7:  ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            8:  ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            10: ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            11: ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
            12: ['acc_chest_x','acc_chest_y','acc_chest_z','acc_ankle_x','acc_ankle_y','acc_ankle_z',
                 'gyro_ankle_x','gyro_ankle_y','gyro_ankle_z','acc_arm_x','acc_arm_y','acc_arm_z',
                 'gyro_arm_x','gyro_arm_y','gyro_arm_z'],
        },

        "ALL_LABELS": [
            ("subject1", 6, 21), ("subject2", 6, 19), ("subject3", 6, 21), ("subject4", 6, 20), ("subject5", 6, 20),
            ("subject6", 6, 20), ("subject7", 6, 20), ("subject8", 6, 21), ("subject9", 6, 21), ("subject10", 6, 20),

            ("subject1", 7, 20), ("subject2", 7, 20), ("subject3", 7, 20), ("subject4", 7, 20), ("subject5", 7, 20),
            ("subject6", 7, 20), ("subject7", 7, 20), ("subject8", 7, 19), ("subject9", 7, 19), ("subject10", 7, 20),

            ("subject1", 8, 20), ("subject2", 8, 21), ("subject3", 8, 21), ("subject4", 8, 19), ("subject5", 8, 20),
            ("subject6", 8, 20), ("subject7", 8, 21), ("subject8", 8, 21), ("subject9", 8, 21), ("subject10", 8, 21),

            ("subject1", 10, 157), ("subject2", 10, 161), ("subject3", 10, 154), ("subject4", 10, 154), ("subject5", 10, 160),
            ("subject6", 10, 156), ("subject7", 10, 153), ("subject8", 10, 160), ("subject9", 10, 166), ("subject10", 10, 156),

            ("subject1", 11, 165), ("subject2", 11, 158), ("subject3", 11, 174), ("subject4", 11, 163), ("subject5", 11, 157),
            ("subject6", 11, 172), ("subject7", 11, 149), ("subject8", 11, 166), ("subject9", 11, 174), ("subject10", 11, 172),

            ("subject1", 12, 20), ("subject2", 12, 22), ("subject3", 12, 21), ("subject4", 12, 21), ("subject5", 12, 20),
            ("subject6", 12, 21), ("subject7", 12, 19), ("subject8", 12, 20), ("subject9", 12, 20), ("subject10", 12, 20),
        ],

        "SAVE_DIR": None,  # e.g., "/content/drive/MyDrive/tempo_scale_tables"
    }

    full_data = load_mhealth_dataset(
        CONFIG["data_dir"],
        CONFIG["TARGET_ACTIVITIES_MAP"],
        CONFIG["COLUMN_NAMES"]
    )
    if not full_data:
        return

    compute_tempo_and_count_groups(
        full_data=full_data,
        all_labels=CONFIG["ALL_LABELS"],
        target_map=CONFIG["TARGET_ACTIVITIES_MAP"],
        feature_map=CONFIG["ACT_FEATURE_MAP"],
        fs=CONFIG["fs"],
        print_trials=True,
        print_activity_table=True,
        save_dir=CONFIG["SAVE_DIR"]
    )


if __name__ == "__main__":
    main()


Loading 10 subjects from /content/drive/MyDrive/Colab Notebooks/HAR_data/MHEALTHDATASET...

Trial-level table (first 30)
     subj  act_id                  activity  gt_count  duration_sec  tempo_rps    T
 subject1       6       Waist bends forward      21.0         61.44   0.341797 3072
 subject2       6       Waist bends forward      19.0         63.48   0.299307 3174
 subject3       6       Waist bends forward      21.0         64.52   0.325480 3226
 subject4       6       Waist bends forward      20.0         66.56   0.300481 3328
 subject5       6       Waist bends forward      20.0         55.30   0.361664 2765
 subject6       6       Waist bends forward      20.0         44.04   0.454133 2202
 subject7       6       Waist bends forward      20.0         61.44   0.325521 3072
 subject8       6       Waist bends forward      21.0         43.02   0.488145 2151
 subject9       6       Waist bends forward      21.0         57.34   0.366236 2867
subject10       6       Waist bends for

# mm-Fit

In [11]:
import os
import glob
import numpy as np
import pandas as pd

# ------------------------------------------------------------
# 0) Small utils
# ------------------------------------------------------------
def _iqr(s: pd.Series) -> float:
    q1 = s.quantile(0.25)
    q3 = s.quantile(0.75)
    return float(q3 - q1)

def find_largest_gap_threshold(values_1d: np.ndarray):
    v = np.asarray(values_1d, dtype=np.float64)
    v_sorted = np.sort(v)

    if v_sorted.size < 2:
        return float(v_sorted[0]), v_sorted, 0, np.array([])

    gaps = v_sorted[1:] - v_sorted[:-1]
    j = int(np.argmax(gaps))
    thr = float((v_sorted[j] + v_sorted[j + 1]) / 2.0)
    return thr, v_sorted, j, gaps


# ------------------------------------------------------------
# 1) MM-Fit signal loader (sw_r acc+gyro 6ch)
# ------------------------------------------------------------
def load_mmfit_signal(path: str, select_6ch=True):
    """
    Returns:
        X: (T, C) float32
        fs: float or None
    Supported:
        - .npz with keys:
            * X  (T,C)    [preferred]
            * or acc (T,3) + gyro (T,3)
            * optionally fs / sampling_rate
        - .npy (T,C)
        - .csv / .txt (assumes numeric columns)
    """
    if not os.path.exists(path):
        return None, None

    ext = os.path.splitext(path)[1].lower()

    fs = None
    X = None

    try:
        if ext == ".npz":
            d = np.load(path, allow_pickle=True)

            # fs key candidates
            for k in ["fs", "sampling_rate", "sr", "hz"]:
                if k in d.files:
                    try:
                        fs = float(np.array(d[k]).squeeze())
                        break
                    except:
                        pass

            if "X" in d.files:
                X = d["X"]
            else:
                # fallback: acc + gyro
                if ("acc" in d.files) and ("gyro" in d.files):
                    acc = np.asarray(d["acc"], dtype=np.float32)
                    gyro = np.asarray(d["gyro"], dtype=np.float32)
                    if acc.ndim == 2 and gyro.ndim == 2 and acc.shape[0] == gyro.shape[0]:
                        X = np.concatenate([acc, gyro], axis=1)
                # another common naming
                elif ("acc_sw_r" in d.files) and ("gyro_sw_r" in d.files):
                    acc = np.asarray(d["acc_sw_r"], dtype=np.float32)
                    gyro = np.asarray(d["gyro_sw_r"], dtype=np.float32)
                    if acc.ndim == 2 and gyro.ndim == 2 and acc.shape[0] == gyro.shape[0]:
                        X = np.concatenate([acc, gyro], axis=1)

        elif ext == ".npy":
            X = np.load(path).astype(np.float32)

        elif ext in [".csv", ".txt"]:
            df = pd.read_csv(path)
            X = df.values.astype(np.float32)

        else:
            # unknown
            return None, None

    except Exception as e:
        print(f"[load_mmfit_signal] failed: {path} | {e}")
        return None, None

    if X is None:
        return None, None

    X = np.asarray(X, dtype=np.float32)
    if X.ndim != 2:
        return None, None

    # ✅ keep acc+gyro 6ch by default (first 6 columns)
    if select_6ch:
        if X.shape[1] < 6:
            # not enough channels
            return None, fs
        X = X[:, :6]

    # remove NaN/Inf
    if not np.isfinite(X).all():
        X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32)

    return X, fs


# ------------------------------------------------------------
# 2) MM-Fit meta loader
# ------------------------------------------------------------
def load_mmfit_meta(meta_csv_path, root_dir, target_exercises, reps_col="reps"):
    """
    meta CSV는 최소한 아래 컬럼이 있으면 됨:
      - participant (or subject)
      - exercise
      - npz_path (or path)
      - reps (GT count)
      - fs (optional; 없으면 CONFIG fs fallback)

    경로는:
      - 절대경로면 그대로
      - 상대경로면 root_dir 기준으로 join
    """
    if not os.path.exists(meta_csv_path):
        raise FileNotFoundError(f"meta csv not found: {meta_csv_path}")

    meta = pd.read_csv(meta_csv_path)

    # column normalization
    col_map = {}
    if "participant" not in meta.columns and "subject" in meta.columns:
        col_map["subject"] = "participant"
    if "npz_path" not in meta.columns and "path" in meta.columns:
        col_map["path"] = "npz_path"
    meta = meta.rename(columns=col_map)

    need = ["participant", "exercise", "npz_path", reps_col]
    for c in need:
        if c not in meta.columns:
            raise ValueError(f"meta csv missing column: {c} (have={list(meta.columns)})")

    meta["exercise"] = meta["exercise"].astype(str).str.strip()
    meta = meta[meta["exercise"].isin(set(target_exercises))].copy()

    # participant -> int-ish string key
    # (MM-Fit이 1,2,3 형태면 "subject1"로 바꿔서 통일)
    meta["participant"] = pd.to_numeric(meta["participant"], errors="coerce")
    meta = meta.dropna(subset=["participant"]).copy()
    meta["participant"] = meta["participant"].astype(int)
    meta["subj_key"] = meta["participant"].apply(lambda x: f"subject{x}")

    # build full path
    def _full(p):
        p = str(p)
        if os.path.isabs(p):
            return p
        return os.path.join(root_dir, p)

    meta["npz_path_full"] = meta["npz_path"].apply(_full)
    meta = meta[meta["npz_path_full"].apply(os.path.exists)].copy()

    # reps numeric
    meta[reps_col] = pd.to_numeric(meta[reps_col], errors="coerce")
    meta = meta.dropna(subset=[reps_col]).copy()

    # fs numeric if exists
    if "fs" in meta.columns:
        meta["fs"] = pd.to_numeric(meta["fs"], errors="coerce")

    meta = meta.reset_index(drop=True)
    return meta


# ------------------------------------------------------------
# 3) Quantification (MM-Fit trial list -> tempo/count summary + grouping)
# ------------------------------------------------------------
def compute_tempo_and_count_groups_mmfit(
    meta_df: pd.DataFrame,
    fs_fallback: float,
    reps_col="reps",
    print_trials=True,
    print_activity_table=True,
    save_dir=None,
    select_6ch=True,
):
    """
    Trial tempo:
        r_i = C_i / T_i (reps/sec), where T_i = duration_sec = len(X)/fs
    Activity tempo summary:
        mean/std/median/IQR over r_i
    Activity count summary:
        mean/std/median/IQR over C_i

    Grouping:
      - tempo_group (Slow/Fast) by q50 of activity-level mean tempo
      - count_group (Low/High) by "largest adjacent gap" threshold on activity-level mean count
    """
    rows = []
    missing = 0

    for i, row in meta_df.iterrows():
        subj = row["subj_key"]
        act = row["exercise"]
        gt = float(row[reps_col])

        path = row["npz_path_full"]

        X, fs_from_file = load_mmfit_signal(path, select_6ch=select_6ch)
        if X is None:
            missing += 1
            continue

        fs = fs_from_file
        if fs is None:
            # meta fs가 있으면 그거 사용
            if "fs" in meta_df.columns and pd.notna(row.get("fs", np.nan)):
                fs = float(row["fs"])
            else:
                fs = float(fs_fallback)

        T = int(X.shape[0])
        dur = max(T / float(fs), 1e-6)
        tempo = gt / dur

        # trial identifier: 있으면 쓰고, 없으면 index 사용
        trial_id = None
        for cand in ["set_id", "trial", "rep_id", "segment_id"]:
            if cand in meta_df.columns:
                trial_id = row[cand]
                break
        if trial_id is None:
            trial_id = i

        rows.append({
            "subj": subj,
            "activity": act,
            "trial_id": str(trial_id),
            "gt_count": gt,
            "T": T,
            "fs": float(fs),
            "duration_sec": dur,
            "tempo_rps": float(tempo),
            "path": path,
        })

    trial_df = pd.DataFrame(rows)
    if trial_df.empty:
        print("[Error] No trials collected. Check meta paths / columns / loader.")
        return trial_df, pd.DataFrame(), {}

    if print_trials:
        print("\n" + "=" * 80)
        print("Trial-level table (first 40)")
        print("=" * 80)
        cols = ["subj", "activity", "trial_id", "gt_count", "duration_sec", "tempo_rps", "T", "fs"]
        print(trial_df[cols].head(40).to_string(index=False))
        print(f"\n#Trials={len(trial_df)} | #Activities={trial_df['activity'].nunique()} | Missing={missing}")

    # ---- activity-level summaries ----
    g = trial_df.groupby(["activity"], as_index=False)

    act_df = g.agg(
        n_trials=("tempo_rps", "count"),

        tempo_mean_rps=("tempo_rps", "mean"),
        tempo_std_rps=("tempo_rps", "std"),
        tempo_median_rps=("tempo_rps", "median"),
        tempo_iqr_rps=("tempo_rps", _iqr),

        count_mean=("gt_count", "mean"),
        count_std=("gt_count", "std"),
        count_median=("gt_count", "median"),
        count_iqr=("gt_count", _iqr),
    )

    act_df["tempo_std_rps"] = act_df["tempo_std_rps"].fillna(0.0)
    act_df["count_std"] = act_df["count_std"].fillna(0.0)

    # tempo split
    tempo_q50 = float(act_df["tempo_mean_rps"].median())
    act_df["tempo_group"] = np.where(act_df["tempo_mean_rps"] <= tempo_q50, "Slow", "Fast")

    # count split (largest gap)
    count_thr, sorted_counts, gap_j, gaps = find_largest_gap_threshold(act_df["count_mean"].values)
    act_df["count_group"] = np.where(act_df["count_mean"] <= count_thr, "Low", "High")

    act_df = act_df.sort_values(["tempo_mean_rps", "count_mean"]).reset_index(drop=True)

    thresholds = {
        "tempo_q50_rps": tempo_q50,
        "count_thr_largest_gap": count_thr,
        "count_gap_between": (float(sorted_counts[gap_j]), float(sorted_counts[gap_j + 1])) if sorted_counts.size >= 2 else (None, None),
    }

    if print_activity_table:
        print("\n" + "=" * 80)
        print("Activity-level summary + groups (MM-Fit)")
        print("=" * 80)
        print(f"tempo q50 (median of activity mean tempo): {tempo_q50:.6f} reps/sec")

        if sorted_counts.size >= 2:
            a = float(sorted_counts[gap_j])
            b = float(sorted_counts[gap_j + 1])
            print(f"count threshold (largest adjacent gap midpoint): {count_thr:.6f} reps  "
                  f"(gap between {a:.3f} and {b:.3f})")
        else:
            print(f"count threshold (largest-gap): {count_thr:.6f} reps (only one activity)")

        print()
        cols = [
            "activity", "n_trials",
            "tempo_mean_rps", "tempo_std_rps", "tempo_median_rps", "tempo_iqr_rps", "tempo_group",
            "count_mean", "count_std", "count_median", "count_iqr", "count_group",
        ]
        print(act_df[cols].to_string(index=False))

        print("\nGroup sizes:")
        print("tempo_group:", act_df["tempo_group"].value_counts().to_dict())
        print("count_group:", act_df["count_group"].value_counts().to_dict())

        print("\nTempo groups:")
        for gname in ["Slow", "Fast"]:
            acts = act_df.loc[act_df["tempo_group"] == gname, "activity"].tolist()
            print(f"  {gname}: {acts}")

        print("\nCount-scale groups:")
        for gname in ["Low", "High"]:
            acts = act_df.loc[act_df["count_group"] == gname, "activity"].tolist()
            print(f"  {gname}: {acts}")

    if save_dir is not None:
        os.makedirs(save_dir, exist_ok=True)
        trial_path = os.path.join(save_dir, "mmfit_trial_tempo_count_table.csv")
        act_path = os.path.join(save_dir, "mmfit_activity_tempo_count_summary.csv")
        trial_df.to_csv(trial_path, index=False)
        act_df.to_csv(act_path, index=False)
        print(f"\n[Saved] {trial_path}")
        print(f"[Saved] {act_path}")

    return trial_df, act_df, thresholds


# ------------------------------------------------------------
# 4) Main
# ------------------------------------------------------------
def main():
    CONFIG = {
        "ROOT_DIR": "/content/drive/MyDrive/Colab Notebooks/HAR_data/mmfit_imu_3ex_trials",
        "META_CSV": "/content/drive/MyDrive/Colab Notebooks/HAR_data/mmfit_imu_3ex_trials/meta_sw_r_dumbbell_rows_lunges_pushups.csv",

        "TARGET_EXERCISES": [
            "pushups",
            "lunges",
            "dumbbell_rows",
        ],

        # ✅ CSV에 따라 컬럼명 다름 (이 파일은 'repetition' 컬럼 사용)
        "REPS_COL": "reps",

        "FS_FALLBACK": 100.0,   # fps30 → sampling rate 30Hz
        "SELECT_6CH": True,
        "SAVE_DIR": None,
    }

    meta = load_mmfit_meta(
        meta_csv_path=CONFIG["META_CSV"],
        root_dir=CONFIG["ROOT_DIR"],
        target_exercises=CONFIG["TARGET_EXERCISES"],
        reps_col=CONFIG["REPS_COL"]
    )

    if meta.empty:
        print("[Error] meta after filtering is empty. Check exercise names / paths / reps col.")
        return

    compute_tempo_and_count_groups_mmfit(
        meta_df=meta,
        fs_fallback=CONFIG["FS_FALLBACK"],
        reps_col=CONFIG["REPS_COL"],
        print_trials=True,
        print_activity_table=True,
        save_dir=CONFIG["SAVE_DIR"],
        select_6ch=CONFIG["SELECT_6CH"],
    )


if __name__ == "__main__":
    main()



Trial-level table (first 40)
    subj      activity trial_id  gt_count  duration_sec  tempo_rps    T    fs
subject2       pushups        1      10.0         14.19   0.704722 1419 100.0
subject2       pushups        2      10.0         12.66   0.789889 1266 100.0
subject2        lunges        3      10.0         28.08   0.356125 2808 100.0
subject2        lunges        4      10.0         27.83   0.359324 2783 100.0
subject2        lunges        5      10.0         29.29   0.341413 2929 100.0
subject2 dumbbell_rows        6      11.0         17.50   0.628571 1750 100.0
subject2 dumbbell_rows        7      10.0         15.17   0.659196 1517 100.0
subject2 dumbbell_rows        8      11.0         16.65   0.660661 1665 100.0
subject0       pushups        0      10.0         11.87   0.842460 1187 100.0
subject0       pushups        1      10.0         15.14   0.660502 1514 100.0
subject0       pushups        2      10.0         16.00   0.625000 1600 100.0
subject0        lunges        3   