In [None]:
# -*- coding: utf-8 -*-
"""
処理(1): 各測定器のRAWデータを Time_sec + 値列 に正規化し、CSV/PNG を出力
- ディレクトリ: BASE_DIR/{subject_id}/        ← 入力は ID のみ（人名フォルダなし）
- 出力先: 上記フォルダ内の RAW/
- サンプリング周波数は仕様で固定（ファイル内の時刻は使わない）
    Pulse=1000Hz / Thermo=10Hz / Skinos=0.1Hz(=10s) / Face=15Hz
- MAT: ch1=Pulse, ch2=Sweat（2ch想定）。1D連結(+datastart/dataend)にも対応し、別ファイルに保存
- Skinos は {subject_id}_skinos.* を最優先で検索（拡張子・大小文字ゆれ対応／CSV優先）
- Face は CSV/Excel（.xlsx/.xls）にも対応（ファイル名優先：A/B/C）
- ログ出力はオフセット非表示の [OK]/[SKIP] 形式
"""

import os
import csv
import glob
from typing import Optional, List, Tuple

import numpy as np
import pandas as pd
from scipy.io import loadmat
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter


# ===================== ユーザー設定 =====================
BASE_DIR = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"

# 入力は「IDのみ」
SUBJECT_IDS: List[str] = [
    "10031","10061","10062","10063","10064",
    "10071","10072","10073","10074",
    "10081","10082","10083","10084",
    "10091","10092","10093","10094",
    "10101","10102","10103"
]

# サンプリング周波数（Hz）— 仕様固定
FS_PULSE  = 1000.0  # Pulse および Sweat（2chとも 1000Hz で扱う）
FS_THERMO = 10.0    # Thermo
FS_SKINOS = 0.1     # Skinos (= 10s)
FS_FACE   = 15.0    # Face

# MATのチャンネル扱い（0始まりの番号）：ch1=Pulse, ch2=Sweat
PULSE_CH_IDX = 0
SWEAT_CH_IDX = 1
# =======================================================


# ===================== 共通ユーティリティ =====================
def ensure_dir(path: str) -> None:
    os.makedirs(path, exist_ok=True)

def generate_time(n: int, fs_hz: float) -> np.ndarray:
    """サンプル数 n をサンプリング周波数 fs_hz に基づく等間隔時刻（秒）に変換（先頭t=0）"""
    if fs_hz <= 0:
        raise ValueError(f"fs_hz must be positive, got {fs_hz}")
    return np.arange(n, dtype=float) / fs_hz

def _fmt_mmss(x: float, _pos) -> str:
    x = float(x); m = int(x // 60); s = int(x % 60)
    return f"{m:02d}:{s:02d}"

def plot_lines(df: pd.DataFrame, title_ascii: str, out_png: str) -> None:
    """Time_sec + 値列を折れ線で描画しPNG保存（x軸は mm:ss）"""
    plt.figure(figsize=(10, 5))
    ax = plt.gca()
    for c in df.columns:
        if c == "Time_sec":
            continue
        ax.plot(df["Time_sec"], df[c], linewidth=1.5, label=c)
    ax.set_title(title_ascii, fontsize=30)
    ax.set_xlabel("Time (mm:ss)", fontsize=24)
    ax.set_ylabel("Value", fontsize=24)
    ax.tick_params(axis="both", labelsize=20)
    ax.xaxis.set_major_formatter(FuncFormatter(_fmt_mmss))
    if len(df.columns) > 2:
        ax.legend(fontsize=20)
    plt.tight_layout()
    plt.savefig(out_png, dpi=200)
    plt.close()

def finalize_df(df: pd.DataFrame) -> pd.DataFrame:
    """Time_sec のNaN/全NaN行を整理し、昇順整列"""
    df = df.dropna(subset=["Time_sec"])
    value_cols = [c for c in df.columns if c != "Time_sec"]
    if value_cols:
        df = df.dropna(how="all", subset=value_cols)
    return df.sort_values("Time_sec").reset_index(drop=True)

def first_existing(paths: List[str]) -> Optional[str]:
    """複数候補から最初に存在するパスを返す"""
    for p in paths:
        if p and os.path.exists(p):
            return p
    return None

def find_skinos_file(subject_dir: str, subject_id: str) -> Optional[str]:
    """
    Skinosは '{id}_skinos.*' を最優先に、広めに '*skinos*' も検索。
    優先順: 更新日時が新しいもの／Excelは除外（CSV優先）
    """
    patterns = [
        os.path.join(subject_dir, f"{subject_id}_skinos.*"),
        os.path.join(subject_dir, f"{subject_id}-skinos.*"),
        os.path.join(subject_dir, "*skinos*.*"),           # フォールバック
    ]
    files: List[str] = []
    for pat in patterns:
        files.extend(glob.glob(pat))
        stem = pat.replace("skinos", "[sS]kinos")  # 大小文字ゆれ
        files.extend(glob.glob(stem))
    files = [p for p in files if not p.lower().endswith((".xlsx", ".xls"))]
    if not files:
        return None
    files.sort(key=lambda p: os.path.getmtime(p), reverse=True)
    return files[0]
# ============================================================


# ===================== MAT: Pulse + Sweat（ch1/ch2=1000Hz） =====================
def _to_1d(arr) -> np.ndarray:
    a = np.asarray(arr)
    if a.ndim == 2:
        # サンプル次元が長い向きに揃える
        if a.shape[0] < a.shape[1]:
            a = a.T
    return np.ravel(a).astype(float)

def _split_by_datastart_dataend(data: np.ndarray, m) -> List[np.ndarray]:
    """
    LabChart 由来の 1D 連結データを datastart/dataend で分割し、各chの配列リストを返す。
    datastart/dataend は 1始まりのインデックスであることが多い点に注意。
    """
    if ("datastart" not in m) or ("dataend" not in m):
        return []
    ds = np.asarray(m["datastart"]).astype(int).ravel()
    de = np.asarray(m["dataend"]).astype(int).ravel()
    if ds.ndim != 1 or de.ndim != 1 or len(ds) != len(de):
        return []
    segs = []
    for i in range(len(ds)):
        s = int(ds[i]) - 1  # 1-based → 0-based
        e = int(de[i])      # Python のスライスは終端を含まない
        s = max(s, 0)
        e = min(e, data.shape[0])
        if e > s:
            segs.append(data[s:e])
    return segs

def process_pulse_sweat_mat(mat_path: str,
                            out_pulse_csv: str, out_pulse_png: str,
                            out_sweat_csv: str, out_sweat_png: str) -> Tuple[bool, bool]:
    """
    MAT内の Pulse(ch1) / Sweat(ch2) を抽出して別ファイルに保存する（両chとも 1000Hz 固定）。
    優先順位:
      1) 'pulse' / 'sweat' キーがあればそれを使う
      2) 'data' が 2D → 列インデックス（PULSE_CH_IDX/SWEAT_CH_IDX）
      3) 'data' が 1D かつ 'datastart'/'dataend' あり → 連結を分割（ch1/ch2…）
      4) 'data' が 1D 単独 → Pulseのみ
    """
    if not os.path.exists(mat_path):
        raise FileNotFoundError(mat_path)

    m = loadmat(mat_path, squeeze_me=True, struct_as_record=False)
    keys = {k.lower(): k for k in m.keys() if not k.startswith("__")}

    pulse_arr: Optional[np.ndarray] = None
    sweat_arr: Optional[np.ndarray] = None

    # 1) 個別キー
    if "pulse" in keys:
        pulse_arr = _to_1d(m[keys["pulse"]])
    elif "Pulse" in m:
        pulse_arr = _to_1d(m["Pulse"])

    if "sweat" in keys:
        sweat_arr = _to_1d(m[keys["sweat"]])
    elif "Sweat" in m:
        sweat_arr = _to_1d(m["Sweat"])

    # 2/3/4) data から抽出
    if (pulse_arr is None or sweat_arr is None) and ("data" in m):
        data = np.asarray(m["data"]).astype(float)
        if data.ndim == 2:
            if data.shape[0] < data.shape[1]:
                data = data.T
            n_ch = data.shape[1]
            if pulse_arr is None and (0 <= PULSE_CH_IDX < n_ch):
                pulse_arr = data[:, PULSE_CH_IDX]
            if sweat_arr is None and (0 <= SWEAT_CH_IDX < n_ch):
                sweat_arr = data[:, SWEAT_CH_IDX]
        elif data.ndim == 1:
            segs = _split_by_datastart_dataend(data, m)
            if segs:
                if pulse_arr is None and len(segs) > PULSE_CH_IDX:
                    pulse_arr = segs[PULSE_CH_IDX]
                if sweat_arr is None and len(segs) > SWEAT_CH_IDX:
                    sweat_arr = segs[SWEAT_CH_IDX]
            else:
                if pulse_arr is None:
                    pulse_arr = data

    pulse_done = False
    sweat_done = False

    # 保存（Pulse, 1000Hz）
    if pulse_arr is not None:
        t = generate_time(len(pulse_arr), FS_PULSE)
        df_pulse = pd.DataFrame({"Time_sec": t, "Pulse": np.ravel(pulse_arr).astype(float)})
        df_pulse = finalize_df(df_pulse)
        df_pulse.to_csv(out_pulse_csv, index=False)
        plot_lines(df_pulse, "Pulse", out_pulse_png)
        pulse_done = True

    # 保存（Sweat, 1000Hz）
    if sweat_arr is not None:
        t = generate_time(len(sweat_arr), FS_PULSE)
        df_sweat = pd.DataFrame({"Time_sec": t, "Sweat": np.ravel(sweat_arr).astype(float)})
        df_sweat = finalize_df(df_sweat)
        df_sweat.to_csv(out_sweat_csv, index=False)
        plot_lines(df_sweat, "Sweat", out_sweat_png)
        sweat_done = True

    return pulse_done, sweat_done

def process_pulse_mat(mat_path: str, out_csv: str, out_png: str) -> None:
    """
    ラッパ：MATから Pulse(+Sweat) を出力（両ch=1000Hz）。
    - 既存呼び出し互換を保ちつつ、Sweat も自動で出力（別ファイル）
    """
    base_dir = os.path.dirname(out_csv)
    subject_id = os.path.basename(out_csv).split("_")[0]
    out_sweat_csv = os.path.join(base_dir, f"{subject_id}_Sweat.csv")
    out_sweat_png = os.path.join(base_dir, f"{subject_id}_Sweat.png")

    pulse_ok, _sweat_ok = process_pulse_sweat_mat(
        mat_path,
        out_pulse_csv=out_csv, out_pulse_png=out_png,
        out_sweat_csv=out_sweat_csv, out_sweat_png=out_sweat_png,
    )
    if not pulse_ok:
        raise RuntimeError("Pulse channel not found in MAT.")
# ============================================================


# ===================== デバイス別処理（Thermo / Skinos / Face） =====================
def process_thermo_csv(csv_path: str, out_csv: str, out_png: str) -> None:
    """
    Thermo（CSV）
    - 入力: {subject_id}.CSV（先頭に 'Time' を含むヘッダ行がある形式）
    - 使用列: 'U1-1[C]' -> 'Thermo1'
    - 出力: CSV(Time_sec, Thermo1) + PNG
    - 時刻: FS_THERMO(=10Hz) で生成
    """
    if not os.path.exists(csv_path):
        raise FileNotFoundError(csv_path)
    header_row = None
    with open(csv_path, "r", encoding="cp932", errors="ignore") as f:
        for idx, line in enumerate(f):
            s = line.strip()
            if s.startswith('"Time"') or s.startswith("Time"):
                header_row = idx
                break
    if header_row is None:
        raise ValueError("Thermo: ヘッダ(Time, ...)が見つかりません。")

    df_raw = pd.read_csv(csv_path, header=header_row, encoding="cp932")
    col_name = "U1-1[C]"
    if col_name not in df_raw.columns:
        raise KeyError(f"Thermo: 必要列 '{col_name}' なし。columns={list(df_raw.columns)}")

    y = pd.to_numeric(df_raw[col_name], errors="coerce").to_numpy()
    t = generate_time(len(y), FS_THERMO)
    out = pd.DataFrame({"Time_sec": t, "Thermo1": y})
    out = finalize_df(out)
    out.to_csv(out_csv, index=False)
    plot_lines(out, "Thermo", out_png)

def process_skinos_csv(csv_path: str, out_csv: str, out_png: str) -> None:
    """
    Skinos（CSV: *_skinos.* を想定）
    - 入力: header=1（2行目が列名）
    - 使用列:
        'Instance_Sweat(mg/cm^2/min)' -> 'Sweat_Rate'
        'Heart_Rate(bpm)'            -> 'Heart_Rate'
        'Skin_Temperature(degree C)' -> 'Skin_Temp'
    - 出力: CSV(Time_sec, Sweat_Rate, Heart_Rate, Skin_Temp) + PNG
    - 時刻: FS_SKINOS(=0.1Hz, 10s間隔) で生成
    """
    if not os.path.exists(csv_path):
        raise FileNotFoundError(csv_path)
    with open(csv_path, "r", encoding="utf-8", errors="ignore") as f:
        df_raw = pd.read_csv(f, header=1)

    need = [
        "Instance_Sweat(mg/cm^2/min)",
        "Heart_Rate(bpm)",
        "Skin_Temperature(degree C)",
    ]
    for c in need:
        if c not in df_raw.columns:
            raise KeyError(f"Skinos: 必要列 '{c}' なし。columns={list(df_raw.columns)}")

    sweat = pd.to_numeric(df_raw[need[0]], errors="coerce").to_numpy()
    hr    = pd.to_numeric(df_raw[need[1]],  errors="coerce").to_numpy()
    temp  = pd.to_numeric(df_raw[need[2]],  errors="coerce").to_numpy()

    t = generate_time(len(df_raw), FS_SKINOS)
    out = pd.DataFrame({"Time_sec": t, "Sweat_Rate": sweat, "Heart_Rate": hr, "Skin_Temp": temp})
    out = finalize_df(out)
    out.to_csv(out_csv, index=False)
    plot_lines(out, "Skinos", out_png)

def process_face_table(csv_or_xlsx_path: str, expected_prefix: str, out_csv: str, out_png: str,
                       encoding: str = "cp932") -> None:
    """
    FaceA/FaceB/FaceC（表形式：CSV もしくは Excel）
    - 入力: {subject_id}A/B/C.CSV または {subject_id}_A/B/C.xlsx など
      先頭にメタ行、ヘッダに「番号/測定日付/測定時間」などがある形式を想定
    - 抽出列（中身のA/B/Cは自動検出）:
        'BOX <letter> MAX.' -> '{expected_prefix}_BoxMax'
        'BOX <letter> AVE.' -> '{expected_prefix}_BoxAve'
      ※ expected_prefix（'FaceA'|'FaceB'|'FaceC'）で出力名を固定（ファイル名優先）
    - 出力: CSV(Time_sec, {prefix}_BoxMax, {prefix}_BoxAve) + PNG
    - 時刻: FS_FACE(=15Hz) で生成
    """
    if not os.path.exists(csv_or_xlsx_path):
        raise FileNotFoundError(csv_or_xlsx_path)
    if expected_prefix not in ("FaceA", "FaceB", "FaceC"):
        raise ValueError("expected_prefix must be 'FaceA', 'FaceB', or 'FaceC'.")

    ext = os.path.splitext(csv_or_xlsx_path)[1].lower()

    if ext in (".xlsx", ".xls"):
        # Excel: まずヘッダ行を検出するため、header=None で読む
        df0 = pd.read_excel(csv_or_xlsx_path, sheet_name=0, header=None, dtype=str)
        header_row = None
        for i in range(min(len(df0), 80)):  # 余裕をみて80行まで探索
            row_vals = "".join([str(v) for v in df0.iloc[i].tolist() if pd.notna(v)])
            if ("番号" in row_vals) and ("測定日付" in row_vals) and ("測定時間" in row_vals):
                header_row = i
                break
        if header_row is None:
            raise ValueError(f"{csv_or_xlsx_path}: ヘッダ行が見つかりません。")
        # ヘッダ行を列名にして再構築
        cols = df0.iloc[header_row].tolist()
        df_raw = df0.iloc[header_row+1:].copy()
        df_raw.columns = cols
    else:
        # CSV: ヘッダ行スキャン
        header_row = None
        with open(csv_or_xlsx_path, "r", encoding=encoding, errors="ignore") as f:
            for idx, line in enumerate(f):
                if ("番号" in line) and ("測定日付" in line) and ("測定時間" in line):
                    header_row = idx
                    break
        if header_row is None:
            raise ValueError(f"{csv_or_xlsx_path}: ヘッダ行が見つかりません。")
        df_raw = pd.read_csv(csv_or_xlsx_path, header=header_row, encoding=encoding,
                             engine="python", quoting=csv.QUOTE_NONE)

    # 中身の A/B/C 列を検出。まず expected を優先、無ければ他を順に採用
    want_letter = expected_prefix[-1]  # 'A' or 'B' or 'C'
    letters = [want_letter] + [L for L in ("A", "B", "C") if L != want_letter]

    def has_cols(letter: str) -> bool:
        return {f"BOX {letter} MAX.", f"BOX {letter} AVE."}.issubset(df_raw.columns)

    src_letter = None
    for L in letters:
        if has_cols(L):
            src_letter = L
            break
    if src_letter is None:
        raise KeyError(
            f"{csv_or_xlsx_path}: Faceの必要列（BOX A/B/C MAX./AVE.）が見つかりません。"
            f"columns={list(df_raw.columns)}"
        )

    max_vals = pd.to_numeric(df_raw[f"BOX {src_letter} MAX."], errors="coerce").to_numpy()
    ave_vals = pd.to_numeric(df_raw[f"BOX {src_letter} AVE."], errors="coerce").to_numpy()

    n = min(len(max_vals), len(ave_vals))
    t = generate_time(n, FS_FACE)

    out = pd.DataFrame({
        "Time_sec": t,
        f"{expected_prefix}_BoxMax": max_vals[:n],
        f"{expected_prefix}_BoxAve": ave_vals[:n],
    })
    out = finalize_df(out)
    out.to_csv(out_csv, index=False)
    plot_lines(out, expected_prefix, out_png)
# ============================================================


# =============== ログ出力（要求フォーマット） ===============
def log_subject_header(subject_id: str) -> None:
    print(f"# Subject {subject_id}")

def log_ok(device: str, out_csv: str) -> None:
    print(f"[OK]  {device} -> {out_csv}")

def log_skip(device: str, reason: str) -> None:
    print(f"[SKIP] {device}: {reason}")
# ============================================================


# ===================== メイン処理 =====================
def main_1() -> None:
    for subject_id in SUBJECT_IDS:
        subject_dir = os.path.join(BASE_DIR, subject_id)  # 人名なし（IDのみ）
        raw_dir     = os.path.join(subject_dir, "RAW")
        ensure_dir(raw_dir)

        log_subject_header(subject_id)

        # 入力ファイル
        path_mat    = os.path.join(subject_dir, f"{subject_id}.mat")  # Pulse+Sweat
        path_thermo = os.path.join(subject_dir, f"{subject_id}.CSV")

        # Face（CSV or Excel の両対応）
        faceA_candidates = [
            os.path.join(subject_dir, f"{subject_id}A.CSV"),
            os.path.join(subject_dir, f"{subject_id}_A.csv"),
            os.path.join(subject_dir, f"{subject_id}_A.xlsx"),
            os.path.join(subject_dir, f"{subject_id}A.xlsx"),
        ]
        faceB_candidates = [
            os.path.join(subject_dir, f"{subject_id}B.CSV"),
            os.path.join(subject_dir, f"{subject_id}_B.csv"),
            os.path.join(subject_dir, f"{subject_id}_B.xlsx"),
            os.path.join(subject_dir, f"{subject_id}B.xlsx"),
        ]
        faceC_candidates = [
            os.path.join(subject_dir, f"{subject_id}C.CSV"),
            os.path.join(subject_dir, f"{subject_id}_C.csv"),
            os.path.join(subject_dir, f"{subject_id}_C.xlsx"),
            os.path.join(subject_dir, f"{subject_id}C.xlsx"),
        ]
        path_faceA = first_existing(faceA_candidates)
        path_faceB = first_existing(faceB_candidates)
        path_faceC = first_existing(faceC_candidates)

        # Skinos（{id}_skinos.* を最優先）
        path_skinos = find_skinos_file(subject_dir, subject_id)

        # 出力（RAW配下）
        out_pulse_csv  = os.path.join(raw_dir, f"{subject_id}_Pulse.csv")
        out_pulse_png  = os.path.join(raw_dir, f"{subject_id}_Pulse.png")
        out_sweat_csv  = os.path.join(raw_dir, f"{subject_id}_Sweat.csv")
        out_sweat_png  = os.path.join(raw_dir, f"{subject_id}_Sweat.png")
        out_thermo_csv = os.path.join(raw_dir, f"{subject_id}_Thermo.csv")
        out_thermo_png = os.path.join(raw_dir, f"{subject_id}_Thermo.png")
        out_skinos_csv = os.path.join(raw_dir, f"{subject_id}_Skinos.csv")
        out_skinos_png = os.path.join(raw_dir, f"{subject_id}_Skinos.png")
        out_faceA_csv  = os.path.join(raw_dir, f"{subject_id}_FaceA.csv")
        out_faceA_png  = os.path.join(raw_dir, f"{subject_id}_FaceA.png")
        out_faceB_csv  = os.path.join(raw_dir, f"{subject_id}_FaceB.csv")
        out_faceB_png  = os.path.join(raw_dir, f"{subject_id}_FaceB.png")
        out_faceC_csv  = os.path.join(raw_dir, f"{subject_id}_FaceC.csv")
        out_faceC_png  = os.path.join(raw_dir, f"{subject_id}_FaceC.png")

        # Pulse (+Sweat) from MAT
        if os.path.exists(path_mat):
            try:
                process_pulse_mat(path_mat, out_pulse_csv, out_pulse_png)
                log_ok("Pulse", out_pulse_csv)
                if os.path.exists(out_sweat_csv):
                    log_ok("Sweat", out_sweat_csv)
                else:
                    log_skip("Sweat", "channel not found in MAT")
            except Exception as e:
                log_skip("Pulse", str(e))
                log_skip("Sweat", "skipped due to Pulse error")
        else:
            log_skip("Pulse", f"file not found: {path_mat}")
            log_skip("Sweat", "file not found (MAT required)")

        # Thermo
        if os.path.exists(path_thermo):
            try:
                process_thermo_csv(path_thermo, out_thermo_csv, out_thermo_png)
                log_ok("Thermo", out_thermo_csv)
            except Exception as e:
                log_skip("Thermo", str(e))
        else:
            log_skip("Thermo", f"file not found: {path_thermo}")

        # Skinos
        if path_skinos and os.path.exists(path_skinos):
            try:
                process_skinos_csv(path_skinos, out_skinos_csv, out_skinos_png)
                log_ok("Skinos", out_skinos_csv)
            except Exception as e:
                log_skip("Skinos", str(e))
        else:
            log_skip("Skinos", "file not found ({id}_skinos.* or *skinos*.*)")

        # FaceA
        if path_faceA:
            try:
                process_face_table(path_faceA, "FaceA", out_faceA_csv, out_faceA_png)
                log_ok("FaceA", out_faceA_csv)
            except Exception as e:
                log_skip("FaceA", str(e))
        else:
            log_skip("FaceA", "file not found (A: CSV/XLSX)")

        # FaceB
        if path_faceB:
            try:
                process_face_table(path_faceB, "FaceB", out_faceB_csv, out_faceB_png)
                log_ok("FaceB", out_faceB_csv)
            except Exception as e:
                log_skip("FaceB", str(e))
        else:
            log_skip("FaceB", "file not found (B: CSV/XLSX)")

        # FaceC
        if path_faceC:
            try:
                process_face_table(path_faceC, "FaceC", out_faceC_csv, out_faceC_png)
                log_ok("FaceC", out_faceC_csv)
            except Exception as e:
                log_skip("FaceC", str(e))
        else:
            log_skip("FaceC", "file not found (C: CSV/XLSX)")

    print("\nAll done.")


# エントリポイント
if __name__ == "__main__":
    main_1()


In [None]:
"""
RAW -> OFFSET
- Pulse / Thermo / FaceA / FaceB / FaceC / Sweat: 被験者×生体信号のオフセットを Time_sec に加算
    - FaceB/C は FaceA を継承（OFFSETSにFaceB/Cは不要）
    - Sweat は Pulse を継承（OFFSETSにSweatは不要）
- Skinos: 被験者ごとに「最初 x 秒」をカットし、t := t - x（x秒地点を t=0 とする）
- PNG出力（mm:ss 軸 / linewidth=1.5 / ハイライト: HILIGHT_START〜HILIGHT_END）
    - かつ ±PLOT_PAD_SEC の範囲のみ描画（= [HILIGHT_START-PLOT_PAD_SEC, HILIGHT_END+PLOT_PAD_SEC]）
    - 赤ハッチ開始直前30秒を灰色の斜線帯で表示
- scope画像: OFFSET/scope/ に、生体信号ごと1枚（横並び2窓）
    - 左: 灰色帯開始（= HILIGHT_START-30）±15秒
    - 右: 赤帯終了（= HILIGHT_END）±15秒
    - 横軸は「秒」表記（mm:ssではない）
- ログ:
    # Subject {sid}
    [OK]  {Device} -> {output_csv_path}
    [SKIP] {Device}: {reason}
"""

from typing import List, Dict, Any, Tuple
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib.transforms import blended_transform_factory
from matplotlib.patches import Rectangle

# ===================== ユーザー設定 =====================
BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"

# 対象ID（氏名は使わない）
SUBJECT_IDS: List[str] = [
    "10031","10061","10062","10063","10064",
    "10071","10072","10073","10074",
    "10081","10082","10083","10084",
    "10091","10092","10093","10094",
    "10101","10102","10103",
]

# ★被験者×生体信号のオフセット（秒）
#   - FaceB/C は FaceA を継承、Sweat は Pulse を継承（OFFSETSに記述不要）
OFFSETS: Dict[str, Dict[str, float]] = {
    "10061": {"Pulse": 1770.0-5,  "Thermo": 1710.0+5,  "FaceA": 1770.0-10},
    "10063": {"Pulse": 1770.0-5,  "Thermo": 1710.0,    "FaceA": 1740.0+5},
    "10064": {"Pulse": 1770.0-1,  "Thermo": 1710.0,    "FaceA": 1740.0+5},

    "10071": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+1},
    "10072": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},
    "10073": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},
    "10074": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},

    "10081": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},
    "10082": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},
    "10083": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},
    "10084": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},

    "10091": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},
    "10092": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},
    "10093": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},
    "10094": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},

    "10101": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+10},
    "10102": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},
    "10103": {"Pulse": 1590.0,    "Thermo": 1530.0,    "FaceA": 1560.0+5},
}


# ★Skinos の「最初 x 秒カット」— 被験者ごとに設定（未指定は 0.0）
SKINOS_TRIM_SECS: Dict[str, float] = {
    "10031": 0.0, "10061": 0.0, "10062": 600.0, "10063": 0.0, "10064": 0.0,
    "10071": 0.0, "10072": 0.0, "10073": 0.0, "10074": 0.0,
    "10081": 0.0, "10082": 0.0, "10083": 0.0, "10084": 0.0,
    "10091": 30.0, "10092": 0.0, "10093": 0.0, "10094": 300.0,
    "10101": 0.0, "10102": 0.0, "10103": 0.0,
}
# ※ カット後は常に t := t - x（x秒地点を t=0 に揃える）

# 出力・描画オプション
MAKE_PLOTS: bool = True
CLAMP_MIN_ZERO: bool = False          # （オフセット後）最小時刻を0へ平行移動
HILIGHT_START: float = 1800.0
HILIGHT_END: float = 2400.0
PLOT_PAD_SEC: float = 60.0            # 描画ウィンドウの前後余白（秒）
TIME_COL: str = "Time_sec"

# ===================== センサ設定（順序保持） =====================
SENSORS: List[Dict[str, Any]] = [
    {"name": "Pulse",  "enabled": True,  "file": "{sid}_Pulse.csv",
     "y_cols": ["Pulse"], "title": "Pulse"},
    {"name": "Thermo", "enabled": True,  "file": "{sid}_Thermo.csv",
     "y_cols": ["Thermo1"], "title": "Thermo"},
    {"name": "Sweat",  "enabled": True,  "file": "{sid}_Sweat.csv",
     "y_cols": ["Sweat"], "title": "Sweat"},
    {"name": "Skinos", "enabled": True,  "file": "{sid}_Skinos.csv",
     "y_cols": ["Sweat_Rate", "Heart_Rate", "Skin_Temp"], "title": "Skinos"},
    {"name": "FaceA",  "enabled": True,  "file": "{sid}_FaceA.csv",
     "y_cols": ["FaceA_BoxMax", "FaceA_BoxAve"], "title": "FaceA"},
    {"name": "FaceB",  "enabled": True,  "file": "{sid}_FaceB.csv",
     "y_cols": ["FaceB_BoxMax", "FaceB_BoxAve"], "title": "FaceB"},
    {"name": "FaceC",  "enabled": True,  "file": "{sid}_FaceC.csv",
     "y_cols": ["FaceC_BoxMax", "FaceC_BoxAve"], "title": "FaceC"},
]

# ===================== ユーティリティ =====================
def read_csv_robust(path: str) -> pd.DataFrame:
    for enc in ("utf-8-sig", "utf-8", "cp932"):
        try:
            return pd.read_csv(path, encoding=enc)
        except Exception:
            continue
    return pd.read_csv(path, encoding_errors="ignore")

def ensure_dir(path: str) -> None:
    os.makedirs(path, exist_ok=True)

def mmss_formatter(x: float, pos: int) -> str:
    sign = "-" if x < 0 else ""
    x = abs(x); m = int(x // 60); s = int(round(x - m*60))
    if s == 60: m += 1; s = 0
    return f"{sign}{m:02d}:{s:02d}"

def apply_offset(df: pd.DataFrame, offset_sec: float, clamp_min_zero: bool) -> pd.DataFrame:
    if TIME_COL not in df.columns:
        raise ValueError(f"{TIME_COL} not found: {list(df.columns)}")
    out = df.copy()
    out[TIME_COL] = pd.to_numeric(out[TIME_COL], errors="coerce") + float(offset_sec)
    if clamp_min_zero:
        min_t = out[TIME_COL].min()
        if pd.notna(min_t) and min_t != 0:
            out[TIME_COL] = out[TIME_COL] - min_t
    return out

def add_red_hatched_band(ax, x0: float, x1: float) -> None:
    ax.axvspan(x0, x1, color="red", alpha=0.08, zorder=0)
    trans = blended_transform_factory(ax.transData, ax.transAxes)
    rect = Rectangle((x0, 0), x1-x0, 1, transform=trans,
                     fill=False, hatch="////", edgecolor="red",
                     linewidth=0.0, zorder=1, alpha=0.5)
    ax.add_patch(rect)

def add_gray_hatched_before(ax, start: float, width: float = 30.0) -> None:
    """赤ハッチ開始直前の [start-width, start] を灰色の帯＋灰色ハッチで示す。"""
    x0 = float(start) - float(width)
    x1 = float(start)
    ax.axvspan(x0, x1, color="gray", alpha=0.10, zorder=0)
    trans = blended_transform_factory(ax.transData, ax.transAxes)
    rect = Rectangle((x0, 0), x1 - x0, 1,
                     transform=trans, fill=False, hatch="////",
                     edgecolor="gray", linewidth=0.0, zorder=1, alpha=0.5)
    ax.add_patch(rect)

def crop_for_window(df: pd.DataFrame,
                    x_col: str,
                    center_start: float,
                    center_end: float,
                    pad: float) -> pd.DataFrame:
    """[center_start - pad, center_end + pad] の範囲だけにデータを絞る（空なら元DFを返す）"""
    x0 = center_start - pad
    x1 = center_end + pad
    xv = pd.to_numeric(df[x_col], errors="coerce")
    mask = (xv >= x0) & (xv <= x1)
    dfw = df.loc[mask].copy()
    return dfw if not dfw.empty else df

def plot_timeseries(df: pd.DataFrame,
                    y_cols: List[str],
                    out_png: str,
                    title: str,
                    hilight_range: Tuple[float, float],
                    pad_sec: float) -> None:
    """±pad_sec のウィンドウのみ描画し、その中に灰色(直前30秒)と赤ハッチ帯を重ねてPNG保存。"""
    dfw = crop_for_window(df, TIME_COL, hilight_range[0], hilight_range[1], pad_sec)

    plt.figure(figsize=(10, 5))
    ax = plt.gca()
    for c in y_cols:
        if c in dfw.columns:
            ax.plot(dfw[TIME_COL], pd.to_numeric(dfw[c], errors="coerce"),
                    label=c, linewidth=1.5)

    # 直前30秒（灰）
    add_gray_hatched_before(ax, start=hilight_range[0], width=30.0)
    # 本番区間（赤）
    add_red_hatched_band(ax, *hilight_range)

    # 表示範囲固定
    ax.set_xlim(hilight_range[0] - pad_sec, hilight_range[1] + pad_sec)

    # 体裁（mm:ss）
    ax.set_title(title, fontsize=30)
    ax.set_xlabel("Time (mm:ss)", fontsize=24)
    ax.set_ylabel("Value", fontsize=24)
    ax.xaxis.set_major_formatter(FuncFormatter(mmss_formatter))
    ax.tick_params(axis="both", labelsize=20)
    ax.legend(fontsize=20)

    plt.tight_layout()
    plt.savefig(out_png, dpi=200)
    plt.close()

# === 継承ロジック（Sweat→Pulse, FaceB/C→FaceA） ===
def resolve_sensor_offset(per_sensor_offset: Dict[str, float], sensor_name: str) -> float:
    """継承ルール:
       - FaceB/C → FaceA と同じ
       - Sweat   → Pulse と同じ
       - それ以外→ 自分のキー（無ければ 0.0）
    """
    if sensor_name in ("FaceB", "FaceC"):
        return float(per_sensor_offset.get("FaceA", 0.0))
    if sensor_name == "Sweat":
        return float(per_sensor_offset.get("Pulse", 0.0))
    return float(per_sensor_offset.get(sensor_name, 0.0))

# === scope 画像（灰色帯の開始前後 / 赤帯の終了前後）を横並びで保存 ===
def plot_scope_two_windows(df: pd.DataFrame,
                           y_cols: List[str],
                           out_png: str,
                           title: str,
                           gray_start: float,
                           red_end: float,
                           half_window: float = 15.0) -> None:
    """
    左: [gray_start-15, gray_start+15], 右: [red_end-15, red_end+15]
    横軸は「秒」表記（mm:ss ではない）。
    """
    import matplotlib.ticker as mticker
    plt.figure(figsize=(12, 4))
    fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)

    windows = [(gray_start - half_window, gray_start + half_window),
               (red_end   - half_window, red_end   + half_window)]
    titles = [f"{title}  (gray-start ±{int(half_window)}s)",
              f"{title}  (red-end   ±{int(half_window)}s)"]

    for ax, (x0, x1), st in zip(axes, windows, titles):
        # 窓データ
        xv = pd.to_numeric(df[TIME_COL], errors="coerce")
        mask = (xv >= x0) & (xv <= x1)
        sub = df.loc[mask].copy()

        for c in y_cols:
            if c in sub.columns:
                ax.plot(sub[TIME_COL], pd.to_numeric(sub[c], errors="coerce"),
                        linewidth=1.5, label=c)

        ax.set_xlim(x0, x1)
        ax.set_title(st, fontsize=16)
        ax.set_xlabel("Time (sec)", fontsize=12)
        ax.xaxis.set_major_locator(mticker.MaxNLocator(6))
        ax.tick_params(axis="both", labelsize=10)
        # 中央基準線
        ax.axvline((x0 + x1) / 2, linestyle="--", linewidth=1)

    axes[0].set_ylabel("Value", fontsize=12)
    # 凡例（右）
    handles, labels = axes[-1].get_legend_handles_labels()
    if handles:
        axes[-1].legend(handles, labels, fontsize=10, loc="best")

    plt.tight_layout()
    plt.savefig(out_png, dpi=200)
    plt.close()

# ===================== コア処理 =====================
def process_sensor_offset(raw_dir: str,
                          out_dir: str,
                          sensor_cfg: Dict[str, Any],
                          sid: str,
                          per_sensor_offset: Dict[str, float]) -> str:
    """Pulse/Thermo/FaceA/FaceB/FaceC/Sweat: オフセット加算 + 保存 + PNG + scope画像"""
    name = sensor_cfg["name"]
    in_csv = os.path.join(raw_dir, sensor_cfg["file"].format(sid=sid))
    if not os.path.exists(in_csv):
        return f"[SKIP] {name}: not found -> {in_csv}"

    try:
        df = read_csv_robust(in_csv)
        offset_val = resolve_sensor_offset(per_sensor_offset, name)
        df2 = apply_offset(df, offset_val, CLAMP_MIN_ZERO)

        ensure_dir(out_dir)
        out_csv = os.path.join(out_dir, os.path.basename(in_csv))
        df2.to_csv(out_csv, index=False)

        if MAKE_PLOTS:
            # 全体（mm:ss）PNG
            out_png = os.path.splitext(out_csv)[0] + ".png"
            plot_timeseries(df=df2,
                            y_cols=sensor_cfg["y_cols"],
                            out_png=out_png,
                            title=sensor_cfg["title"],
                            hilight_range=(HILIGHT_START, HILIGHT_END),
                            pad_sec=PLOT_PAD_SEC)

            # scope 画像（秒表示・2窓）
            scope_dir = os.path.join(out_dir, "scope")
            ensure_dir(scope_dir)
            gray_start = HILIGHT_START - 30.0
            red_end = HILIGHT_END
            out_scope = os.path.join(scope_dir, f"{sid}_{name}_scope.png")
            plot_scope_two_windows(df=df2,
                                   y_cols=sensor_cfg["y_cols"],
                                   out_png=out_scope,
                                   title=sensor_cfg["title"],
                                   gray_start=gray_start,
                                   red_end=red_end,
                                   half_window=15.0)

        return f"[OK]  {name} -> {out_csv}"
    except Exception as e:
        return f"[SKIP] {name}: {e}"

def process_skinos_trim(raw_dir: str,
                        out_dir: str,
                        sensor_cfg: Dict[str, Any],
                        sid: str) -> str:
    """
    Skinos: 最初 x 秒をカット（Time_sec >= x の行のみ残す）し、常に t := t - x（x秒地点を t=0）。
    オフセットは適用しない。scope画像も出力。
    """
    name = sensor_cfg["name"]
    in_csv = os.path.join(raw_dir, sensor_cfg["file"].format(sid=sid))
    if not os.path.exists(in_csv):
        return f"[SKIP] {name}: not found -> {in_csv}"

    try:
        df = read_csv_robust(in_csv)
        if TIME_COL not in df.columns:
            return f"[SKIP] {name}: {TIME_COL} not found"

        trim_x = float(SKINOS_TRIM_SECS.get(sid, 0.0))
        df[TIME_COL] = pd.to_numeric(df[TIME_COL], errors="coerce")

        df2 = df[df[TIME_COL] >= trim_x].copy()
        if df2.empty:
            return f"[SKIP] {name}: empty after trim ({trim_x}s)"

        df2[TIME_COL] = df2[TIME_COL] - trim_x  # x秒地点を原点化

        ensure_dir(out_dir)
        out_csv = os.path.join(out_dir, os.path.basename(in_csv))
        df2.to_csv(out_csv, index=False)

        if MAKE_PLOTS:
            # 全体（mm:ss）PNG
            out_png = os.path.splitext(out_csv)[0] + ".png"
            plot_timeseries(df=df2,
                            y_cols=sensor_cfg["y_cols"],
                            out_png=out_png,
                            title=sensor_cfg["title"],
                            hilight_range=(HILIGHT_START, HILIGHT_END),
                            pad_sec=PLOT_PAD_SEC)

            # scope 画像（秒表示・2窓）
            scope_dir = os.path.join(out_dir, "scope")
            ensure_dir(scope_dir)
            gray_start = HILIGHT_START - 30.0
            red_end = HILIGHT_END
            out_scope = os.path.join(scope_dir, f"{sid}_{name}_scope.png")
            plot_scope_two_windows(df=df2,
                                   y_cols=sensor_cfg["y_cols"],
                                   out_png=out_scope,
                                   title=sensor_cfg["title"],
                                   gray_start=gray_start,
                                   red_end=red_end,
                                   half_window=15.0)

        return f"[OK]  {name} -> {out_csv}"
    except Exception as e:
        return f"[SKIP] {name}: {e}"

def process_subject(sid: str) -> str:
    subj_dir = os.path.join(BASE_DIR, sid)
    raw_dir = os.path.join(subj_dir, "RAW")
    out_dir = os.path.join(subj_dir, "OFFSET2")

    per_sensor_offset = OFFSETS.get(sid, {})  # 未登録は空→0.0扱い
    logs = [f"# Subject {sid}"]

    for sensor_cfg in SENSORS:
        if not sensor_cfg.get("enabled", True):
            logs.append(f"[SKIP] {sensor_cfg['name']}: skipped by config")
            continue

        if sensor_cfg["name"] == "Skinos":
            logs.append(process_skinos_trim(raw_dir, out_dir, sensor_cfg, sid))
        else:
            logs.append(process_sensor_offset(raw_dir, out_dir, sensor_cfg, sid, per_sensor_offset))

    return "\n".join(logs)

def main() -> None:
    all_logs: List[str] = []
    for sid in SUBJECT_IDS:
        all_logs.append(process_subject(sid))
    print("\n".join(all_logs))

if __name__ == "__main__":
    main()


In [None]:
from typing import List, Dict, Tuple
import os
import pandas as pd
import numpy as np

# ===== ユーザー設定 =====
BASE_DIR = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"
SUBJECT_IDS: List[str] = [
    "10061","10063","10064",
    "10071","10072","10073","10074",
    "10081","10082","10083",
    "10091","10092","10093","10094",
    "10101","10102","10103",
]


# 対象デバイス（Skinosはトリム x を推定）
DEVICES = [
    ("Pulse",  "{sid}_Pulse.csv"),
    ("Thermo", "{sid}_Thermo.csv"),
    ("Sweat",  "{sid}_Sweat.csv"),
    ("FaceA",  "{sid}_FaceA.csv"),
    ("FaceB",  "{sid}_FaceB.csv"),
    ("FaceC",  "{sid}_FaceC.csv"),
    ("Skinos", "{sid}_Skinos.csv"),  # 特別扱い：トリム x 推定
]
TIME_COL = "Time_sec"

# ===== ユーティリティ =====
def read_csv_robust(path: str) -> pd.DataFrame:
    for enc in ("utf-8-sig", "utf-8", "cp932"):
        try:
            return pd.read_csv(path, encoding=enc)
        except Exception:
            continue
    return pd.read_csv(path, encoding_errors="ignore")

def _to_num(s: pd.Series) -> pd.Series:
    return pd.to_numeric(s, errors="coerce")

# ===== 推定ロジック =====
def estimate_offset_general(df_raw: pd.DataFrame, df_off: pd.DataFrame) -> Tuple[float, bool, str]:
    """
    一般デバイス: Δ ≈ median(Time_off - Time_raw)
    - 行対応（長い側は切り詰め）
    - CLAMP検知: min(Time_off)≈0 かつ min(Time_raw)>0 なら True
    返り値: (offset_sec_est, clamp_detected, method_str)
    """
    tr = _to_num(df_raw[TIME_COL])
    to = _to_num(df_off[TIME_COL])

    # 同じ行数に揃える（先頭揃え）
    n = min(len(tr), len(to))
    tr = tr.iloc[:n].reset_index(drop=True)
    to = to.iloc[:n].reset_index(drop=True)

    # 差の頑健推定（中央値）
    diffs = (to - tr).dropna()
    if diffs.empty:
        return (float("nan"), False, "NA(empty diffs)")

    delta_med = float(np.median(diffs.values))

    # CLAMP（最小0への再平行移動）が掛かっていると、真のΔは消える
    clamp = (abs(float(to.min())) < 1e-6) and (float(tr.min()) > 0.0)
    method = "median(to - tr)"
    if clamp:
        method += " + CLAMP_DETECTED"

    return (delta_med, clamp, method)

def estimate_trim_skinos(df_raw: pd.DataFrame, df_off: pd.DataFrame) -> Tuple[float, bool, str]:
    """
    Skinos: RAWの先頭x秒をカットし、t := t - x で原点化（最小≈0）
    推定: x ≈ RAWのうち保持分の先頭時刻
      - 長さ差が head トリムに起因 → 近似的に raw[len_raw - len_off] を先頭とみなすとズレるので、
        ここでは「offの最小は≈0」を利用し、rawの時刻列のうち、offと行数を合わせた先頭側の時刻中央値を採用。
      - より単純に: x_est ≈ raw.time.min() if off.min≈0 and offが原点化直後のみ、だが
        実運用では保持先頭の生データ時刻を使うのが安定。
    返り値: (trim_x_sec_est, clamp_detected, method_str)
    """
    tr = _to_num(df_raw[TIME_COL]).dropna().reset_index(drop=True)
    to = _to_num(df_off[TIME_COL]).dropna().reset_index(drop=True)
    if tr.empty or to.empty:
        return (float("nan"), False, "NA(empty series)")

    clamp = (abs(float(to.min())) < 1e-6)  # 原点化されているはず
    # 先頭合わせ（オフセットは0想定、長さは raw >= off のことが多い）
    # off が原点から始まるので、off の先頭に対応する raw の時刻が trim_x の近似
    # ここでは raw の最小値～先頭近傍の中央値を使って頑健化
    # アプローチ: rawの先頭から len_off 件の時刻を取り、その中央値を trim_x とする
    m = min(len(tr), len(to))
    # 先頭 m 点の中央値（先頭のノイズに弱いなら、先頭 m の上位30%を除外する等の工夫も可）
    trim_x = float(np.median(tr.iloc[:m].values))

    return (trim_x, clamp, "median(raw[:m]) with m=len_overlap & off.min≈0")

# ===== 本体 =====
def main():
    for sid in SUBJECT_IDS:
        raw_dir = os.path.join(BASE_DIR, sid, "RAW")
        off_dir = os.path.join(BASE_DIR, sid, "OFFSET")
        print(f"# Subject {sid}")
        for dev, fname_tmpl in DEVICES:
            fn = fname_tmpl.format(sid=sid)
            p_raw = os.path.join(raw_dir, fn)
            p_off = os.path.join(off_dir, fn)

            if not (os.path.exists(p_raw) and os.path.exists(p_off)):
                print(f"[SKIP] {dev}: missing file(s) -> RAW:{os.path.exists(p_raw)} OFF:{os.path.exists(p_off)}")
                continue

            try:
                df_r = read_csv_robust(p_raw)
                df_o = read_csv_robust(p_off)
                if TIME_COL not in df_r.columns or TIME_COL not in df_o.columns:
                    print(f"[SKIP] {dev}: Time_sec not found")
                    continue

                if dev == "Skinos":
                    trim_x, clamp, method = estimate_trim_skinos(df_r, df_o)
                    clamp_str = " CLAMP" if clamp else ""
                    print(f"[OK]  {dev}: trim_x ≈ {trim_x:.3f} s  ({method}{clamp_str})")
                else:
                    delta, clamp, method = estimate_offset_general(df_r, df_o)
                    clamp_str = " CLAMP" if clamp else ""
                    print(f"[OK]  {dev}: offset ≈ {delta:.3f} s  ({method}{clamp_str})")

            except Exception as e:
                print(f"[SKIP] {dev}: {e}")

if __name__ == "__main__":
    main()


In [None]:
"""
Skinos（OFFSET内CSV）に任意のオフセット（秒）を加算して **上書き保存** する専用セル。
- 対象: BASE_DIR/{SID}/OFFSET/{SID}_Skinos.csv
- 仕様: Time_sec := Time_sec + OFFSET_SEC[SID]
- 入力: OFFSETS_SKINOS に {SID: offset_sec} を記述（秒, 負値可）
- 出力: 同じCSVを上書き保存（必要に応じて .bak を作成）

ログ形式:
  # Subject {sid}
  [OK]  Skinos -> overwrite {path} (Δ={offset:.3f}s, rows={n})
  [SKIP] Skinos: {reason}
"""

from typing import Dict, List
import os
import shutil
import pandas as pd

# ===================== ユーザー設定 =====================
BASE_DIR = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"

# ここに「上書きしたいSIDとオフセット秒」を記入（例：{"10061": +2.5, "10074": -1.0}）
OFFSETS_SKINOS: Dict[str, float] = {
    "10061": -90.0, "10063": -90.0, "10064": -90.0,
    "10071": -90.0, "10072": -90.0, "10073": -90.0, "10074": -90.0,
    "10081": -90.0, "10082": -90.0, "10083": -90.0,
    "10091": -90.0, "10092": -90.0, "10093": -90.0, "10094": -90.0,
    "10101": -90.0, "10102": -90.0, "10103": -90.0,
}


# バックアップを残すなら True（{SID}_Skinos.csv.bak を作成）
MAKE_BACKUP: bool = True

TIME_COL = "Time_sec"

# ===================== ユーティリティ =====================
def read_csv_robust(path: str) -> pd.DataFrame:
    for enc in ("utf-8-sig", "utf-8", "cp932"):
        try:
            return pd.read_csv(path, encoding=enc)
        except Exception:
            continue
    # 最終フォールバック
    return pd.read_csv(path, encoding_errors="ignore")

def process_one_sid(base_dir: str, sid: str, delta_sec: float) -> str:
    subj_dir = os.path.join(base_dir, sid)
    in_csv = os.path.join(subj_dir, "OFFSET", f"{sid}_Skinos.csv")
    if not os.path.exists(in_csv):
        return f"[SKIP] Skinos: not found -> {in_csv}"

    try:
        df = read_csv_robust(in_csv)
        if TIME_COL not in df.columns:
            return f"[SKIP] Skinos: {TIME_COL} not found"

        # 数値化 + オフセット加算
        df[TIME_COL] = pd.to_numeric(df[TIME_COL], errors="coerce")
        n_before = len(df)
        df[TIME_COL] = df[TIME_COL] + float(delta_sec)

        # バックアップ（任意）
        if MAKE_BACKUP:
            bak_path = in_csv + ".bak"
            try:
                shutil.copy2(in_csv, bak_path)
            except Exception:
                # バックアップ失敗は処理続行（上書きが優先）
                pass

        # 上書き保存（エンコーディングは既定/安全側）
        df.to_csv(in_csv, index=False, encoding="utf-8-sig")
        return f"[OK]  Skinos -> overwrite {in_csv} (Δ={delta_sec:.3f}s, rows={n_before})"
    except Exception as e:
        return f"[SKIP] Skinos: {e}"

# ===================== 本体 =====================
def main():
    if not OFFSETS_SKINOS:
        print("[SKIP] OFFSETS_SKINOS is empty (nothing to do)")
        return

    for sid, delta in OFFSETS_SKINOS.items():
        print(f"# Subject {sid}")
        msg = process_one_sid(BASE_DIR, sid, delta)
        print(msg)

if __name__ == "__main__":
    main()


In [None]:
"""
scope 画像の集約コピー専用スクリプト（描画なし／元画像は変更しない）

探索場所:
  BASE_DIR/{sid}/OFFSET/scope/
  BASE_DIR/{sid}/OFFSET2/scope/     # 任意

対象ファイル:
  - "{sid}_*_scope.png" をすべて対象
  - 例: "{sid}_{Sensor}_scope.png", "{sid}_各生体信号_scope.png" など

出力先:
  BASE_DIR/SCOPE/ に複製保存（shutil.copy2 でタイムスタンプも維持）

重名対策:
  同名ファイルが既に存在する場合は、自動で "_1", "_2", … を末尾に付けて保存。

ログ:
  # Subject {sid}
  [OK]   copy -> {src_rel}  =>  {dst_rel}
  [SKIP] {reason}

※ 元ファイルの上書き・変更は行わない。
"""

from typing import List, Tuple
import os, glob, shutil

# ===================== ユーザー設定 =====================
BASE_DIR = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"

# 対象SID（必要な分だけ）
SUBJECT_IDS: List[str] = [
    "10031","10061","10062","10063","10064",
    "10071","10072","10073","10074",
    "10081","10082","10083","10084",
    "10091","10092","10093","10094",
    "10101","10102","10103",
]

# 画像を探すサブディレクトリ
SOURCE_OFFSET_DIRNAMES: List[str] = ["OFFSET"]  # 片方だけでもOK
SCOPE_SUBDIR = "scope"

# 複製保存先
CENTRAL_SCOPE_DIR = os.path.join(BASE_DIR, "SCOPE")

# 既存ファイルがあっても強制上書きするなら True（既定: False = 自動リネーム）
OVERWRITE = False

# ===================== ユーティリティ =====================
def ensure_dir(path: str) -> None:
    os.makedirs(path, exist_ok=True)

def _safe_dest_path(dst_dir: str, base_filename: str, overwrite: bool=False) -> str:
    """
    保存先に同名がある場合、_1, _2, ... を付けて衝突回避。
    overwrite=True のときはそのまま上書きパスを返す。
    """
    dst = os.path.join(dst_dir, base_filename)
    if overwrite or not os.path.exists(dst):
        return dst
    root, ext = os.path.splitext(base_filename)
    k = 1
    while True:
        cand = os.path.join(dst_dir, f"{root}_{k}{ext}")
        if not os.path.exists(cand):
            return cand
        k += 1

def _rel(path: str, base: str) -> str:
    try:
        return os.path.relpath(path, base)
    except Exception:
        return path

# ===================== 本体 =====================
def main():
    ensure_dir(CENTRAL_SCOPE_DIR)
    total_found = 0
    total_copied = 0

    for sid in SUBJECT_IDS:
        print(f"# Subject {sid}")
        subj_root = os.path.join(BASE_DIR, sid)
        if not os.path.isdir(subj_root):
            print(f"[SKIP] subject root not found -> {subj_root}")
            continue

        found_for_sid = 0
        copied_for_sid = 0

        for offdir in SOURCE_OFFSET_DIRNAMES:
            scope_dir = os.path.join(subj_root, offdir, SCOPE_SUBDIR)
            if not os.path.isdir(scope_dir):
                print(f"[SKIP] scope dir not found -> { _rel(scope_dir, BASE_DIR) }")
                continue

            # パターン: "{sid}_*_scope.png" を広めに拾う
            pattern = os.path.join(scope_dir, f"{sid}_*_scope.png")
            files = sorted(glob.glob(pattern))
            if not files:
                print(f"[SKIP] no files -> { _rel(pattern, BASE_DIR) }")
                continue

            for src in files:
                found_for_sid += 1
                base = os.path.basename(src)
                dst = _safe_dest_path(CENTRAL_SCOPE_DIR, base, overwrite=OVERWRITE)
                try:
                    shutil.copy2(src, dst)  # タイムスタンプ等を維持
                    copied_for_sid += 1
                    print(f"[OK]   copy -> { _rel(src, BASE_DIR) }  =>  { _rel(dst, BASE_DIR) }")
                except Exception as e:
                    print(f"[SKIP] copy failed -> { _rel(src, BASE_DIR) }: {e}")

        total_found += found_for_sid
        total_copied += copied_for_sid
        if found_for_sid == 0:
            print("[SKIP] no scope images for this subject")

    print("\n===== SUMMARY =====")
    print(f"found : {total_found}")
    print(f"copied: {total_copied}")
    print(f"dest  : { _rel(CENTRAL_SCOPE_DIR, BASE_DIR) }")

if __name__ == "__main__":
    main()


In [None]:
# -*- coding: utf-8 -*-
"""
(3a) Feature Engineering - FaceTemp_raw（LP=1 Hz、サンプル時刻そのまま）
----------------------------------------------------------------
入力:
  {BASE_DIR}\{sid}\OFFSET\{sid}_FaceA.csv
  {BASE_DIR}\{sid}\OFFSET\{sid}_FaceB.csv
  {BASE_DIR}\{sid}\OFFSET\{sid}_FaceC.csv
出力:
  {BASE_DIR}\{sid}\FEATURE\{sid}_FaceTemp_raw.csv (UTF-8-SIG)

ログ:
  # Subject {sid}
  [OK]  FaceTemp_raw -> {output_csv_path}
  または
  [SKIP] FaceTemp_raw: {reason}

処理概要:
  1) FaceA/B/C の Max/Mean 列を自動検出（'box a/b/c' などを優先）
  2) 1 Hz ローパス（Butterworth, order=2, filtfilt）。不均一サンプリングは等間隔へ一旦補間→filt→元時刻に逆補間
  3) Time_sec 内積で inner join（重複時刻は平均化）
  4) ご指定の出力9列を作成（ABはA/Bの平均、CはC単独、ABCは3点平均）
     - AB_max      = (Amax + Bmax)/2
     - C_max       = Cmax
     - ABC_max     = (Amax + Bmax + Cmax)/3
     - AB_mean     = (Amean + Bmean)/2
     - C_mean      = Cmean
     - ABC_mean    = (Amean + Bmean + Cmean)/3
     - AB_maxdiff  = |Amax - Bmax|/2
     - AB_meandiff = |Amean - Bmean|/2
備考:
  - ハイパスは既定オフ（HP_CUTOFF_HZ=None）
  - 欠損はフィルタ前に線形補間（端は最近傍延長）、フィルタ後に欠損位置へNaNを戻す
"""

import os
import re
from typing import List, Tuple
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt

# ===== ユーザー指定 =====
BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"

# 対象ID（氏名なし）
SUBJECT_IDS: List[str] = [
    "10041","10061","10062","10063","10064",
    "10071","10072","10073","10074",
    "10081","10082","10083","10084",
    "10091","10092","10093","10094",
    "10101","10102","10103",
]

# ===== フィルタ設定（温度は 1 Hz ローパス）=====
LP_CUTOFF_HZ: float = 1.0                  # ローパス
HP_CUTOFF_HZ: float | None = None          # 例: 0.01。既定は None（Face温度では通常HPしない）
BUTTER_ORDER: int = 2
UNIFORMITY_TOL: float = 0.02               # サンプリング間隔の変動係数が2%以内なら等間隔とみなす


# ---------- ユーティリティ ----------
def _normalize_colname(col: str) -> str:
    s = col.lower()
    s = re.sub(r"[．。]", ".", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

def _select_face_columns(df: pd.DataFrame, box_hint: str) -> Tuple[str, str]:
    """
    Face系CSVから Max/Mean 列を自動検出。
    優先: 'box a/b/c' を含み、かつ 'max' / 'ave|mean' を含む列。
    """
    norm = {c: _normalize_colname(c) for c in df.columns}

    def pick(words):
        return [c for c in df.columns if all(w in norm[c] for w in words)]

    # Max 列
    max_c = pick([f"box {box_hint}", "max"]) or pick(["max"])
    # Mean 列（'ave' 'mean' あたりを広めに拾う）
    mean_c = (pick([f"box {box_hint}", "ave"]) or
              pick([f"box {box_hint}", "mean"]) or
              pick(["ave"]) or pick(["mean"]))
    if not max_c or not mean_c:
        raise ValueError(f"required columns not found for box {box_hint}. columns={list(df.columns)}")
    return max_c[0], mean_c[0]

def _estimate_fs(times: np.ndarray) -> tuple[float, bool]:
    """サンプリング周期の代表値から fs 推定と“ほぼ等間隔”かどうか。"""
    dt = np.diff(times)
    dt = dt[np.isfinite(dt)]
    if dt.size == 0:
        return 0.0, False
    dt_med = float(np.median(dt))
    if dt_med <= 0:
        return 0.0, False
    cv = float(np.std(dt) / dt_med)
    fs = 1.0 / dt_med
    return fs, (cv <= UNIFORMITY_TOL)

def _make_filter(fs: float, lp: float | None, hp: float | None):
    nyq = 0.5 * fs
    if lp and hp:
        wn = [max(1e-6, hp/nyq), min(0.999999, lp/nyq)]
        b, a = butter(BUTTER_ORDER, wn, btype="bandpass")
    elif lp:
        wn = min(0.999999, lp/nyq)
        b, a = butter(BUTTER_ORDER, wn, btype="lowpass")
    elif hp:
        wn = max(1e-6, hp/nyq)
        b, a = butter(BUTTER_ORDER, wn, btype="highpass")
    else:
        b, a = None, None
    return b, a

def _filter_series_with_resample(times: np.ndarray, values: np.ndarray,
                                 lp: float | None, hp: float | None) -> np.ndarray:
    """
    不均一サンプリングに頑健なフィルタ：
      ・等間隔でなければ一旦等間隔に線形補間→filtfilt→元の時刻に逆補間
      ・等間隔なら欠損を線形補間→filtfilt→欠損位置へNaNを復元
    """
    fs, is_uniform = _estimate_fs(times)
    vals = np.asarray(values, float)

    if not np.isfinite(vals).any():
        return vals

    if not is_uniform:
        t0, t1 = float(times[0]), float(times[-1])
        dt = 1.0 / max(1.0, fs if fs > 0 else 10.0)  # fs不明時は 10 Hz 仮置き
        tg = np.arange(t0, t1 + 1e-9, dt)
        valid = np.isfinite(vals)
        if valid.sum() < 2:
            return vals
        vg = np.interp(tg, times[valid], vals[valid])
        b, a = _make_filter(1.0/dt, lp, hp)
        vf = vg if b is None else filtfilt(b, a, vg, method="gust")
        return np.interp(times, tg, vf)
    else:
        b, a = _make_filter(fs, lp, hp)
        if b is None:
            return vals
        valid = np.isfinite(vals)
        if valid.sum() < 2:
            return vals
        vlin = np.interp(times, times[valid], vals[valid])
        vf = filtfilt(b, a, vlin, method="gust")
        out = vf.copy()
        out[~valid] = np.nan
        return out

def _prepare_face_df(df: pd.DataFrame, label: str, box_hint: str) -> pd.DataFrame:
    """FaceA/B/CのDFから Time_sec, Face{label}_Max/Mean を生成（重複時刻は平均）＋LP適用。"""
    if "Time_sec" not in df.columns:
        raise ValueError("Time_sec column missing")

    max_col, mean_col = _select_face_columns(df, box_hint=box_hint)

    df = df.copy()
    df["Time_sec"] = pd.to_numeric(df["Time_sec"], errors="coerce")
    df[max_col]    = pd.to_numeric(df[max_col],    errors="coerce")
    df[mean_col]   = pd.to_numeric(df[mean_col],   errors="coerce")

    # 同一時刻は平均
    tmp = df[["Time_sec", max_col, mean_col]].groupby("Time_sec", as_index=False).mean(numeric_only=True)

    # フィルタ（LP=1 Hz、HPは任意）
    t  = tmp["Time_sec"].to_numpy(float)
    vx = tmp[max_col].to_numpy(float)
    vm = tmp[mean_col].to_numpy(float)

    vx_f = _filter_series_with_resample(t, vx, lp=LP_CUTOFF_HZ, hp=HP_CUTOFF_HZ)
    vm_f = _filter_series_with_resample(t, vm, lp=LP_CUTOFF_HZ, hp=HP_CUTOFF_HZ)

    tmp[max_col]  = vx_f
    tmp[mean_col] = vm_f

    # 列名整形
    rename_map = {
        max_col:  f"Face{label}_Max",
        mean_col: f"Face{label}_Mean",
    }
    tmp = tmp.rename(columns=rename_map)
    return tmp[["Time_sec", f"Face{label}_Max", f"Face{label}_Mean"]]

def merge_and_compute_face_features(dfA: pd.DataFrame, dfB: pd.DataFrame, dfC: pd.DataFrame) -> pd.DataFrame:
    """
    FaceA/B/C を Time_sec で inner join し、指定9指標を算出。
    出力列（この順）:
      Time_sec,
      AB_max, C_max, ABC_max,
      AB_mean, C_mean, ABC_mean,
      AB_maxdiff, AB_meandiff
    """
    merged = pd.merge(dfA, dfB, on="Time_sec", how="inner", validate="one_to_one")
    merged = pd.merge(merged, dfC, on="Time_sec", how="inner", validate="one_to_one")
    if merged.empty:
        raise ValueError("no overlapping Time_sec among FaceA/B/C")

    Amax = merged["FaceA_Max"].to_numpy(float)
    Bmax = merged["FaceB_Max"].to_numpy(float)
    Cmax = merged["FaceC_Max"].to_numpy(float)

    Amean = merged["FaceA_Mean"].to_numpy(float)
    Bmean = merged["FaceB_Mean"].to_numpy(float)
    Cmean = merged["FaceC_Mean"].to_numpy(float)

    out = pd.DataFrame({
        "Time_sec":   merged["Time_sec"].to_numpy(float),

        "AB_max":     (Amax + Bmax) / 2.0,
        "C_max":      Cmax,
        "ABC_max":    (Amax + Bmax + Cmax) / 3.0,

        "AB_mean":    (Amean + Bmean) / 2.0,
        "C_mean":     Cmean,
        "ABC_mean":   (Amean + Bmean + Cmean) / 3.0,

        "AB_maxdiff": 0.5 * np.abs(Amax - Bmax),
        "AB_meandiff":0.5 * np.abs(Amean - Bmean),
    })

    # 列順固定
    cols = [
        "Time_sec",
        "AB_max", "C_max", "ABC_max",
        "AB_mean", "C_mean", "ABC_mean",
        "AB_maxdiff", "AB_meandiff",
    ]
    return out[cols]

def process_face_temp_raw_for_subject(base_root: str, sid: str) -> None:
    """1被験者分: FaceTemp_raw を作成（ご指定9指標）"""
    subject_dir = os.path.join(base_root, f"{sid}")
    offset_dir  = os.path.join(subject_dir, "OFFSET")
    feature_dir = os.path.join(subject_dir, "FEATURE")
    os.makedirs(feature_dir, exist_ok=True)

    path_A = os.path.join(offset_dir, f"{sid}_FaceA.csv")
    path_B = os.path.join(offset_dir, f"{sid}_FaceB.csv")
    path_C = os.path.join(offset_dir, f"{sid}_FaceC.csv")
    out_path = os.path.join(feature_dir, f"{sid}_FaceTemp_raw.csv")

    print(f"# Subject {sid}")
    for p in (path_A, path_B, path_C):
        if not os.path.exists(p):
            print(f"[SKIP] FaceTemp_raw: missing -> {p}")
            return

    try:
        dfA = pd.read_csv(path_A, encoding="utf-8-sig")
        dfB = pd.read_csv(path_B, encoding="utf-8-sig")
        dfC = pd.read_csv(path_C, encoding="utf-8-sig")
    except Exception as e:
        print(f"[SKIP] FaceTemp_raw: failed to read CSVs ({e})")
        return

    try:
        dfA_prep = _prepare_face_df(dfA, label="A", box_hint="a")
        dfB_prep = _prepare_face_df(dfB, label="B", box_hint="b")
        dfC_prep = _prepare_face_df(dfC, label="C", box_hint="c")
    except Exception as e:
        print(f"[SKIP] FaceTemp_raw: {e}")
        return

    try:
        out = merge_and_compute_face_features(dfA_prep, dfB_prep, dfC_prep)
        out.to_csv(out_path, index=False, encoding="utf-8-sig")
        print(f"[OK]  FaceTemp_raw -> {out_path}")
    except Exception as e:
        print(f"[SKIP] FaceTemp_raw: {e}")

def main_3a_facetemp_raw():
    """処理(3a): 全被験者の FaceTemp_raw を作成（LP=1Hz, HP=任意）"""
    for sid in SUBJECT_IDS:
        process_face_temp_raw_for_subject(BASE_DIR, sid)

# 実行
if __name__ == "__main__":
    main_3a_facetemp_raw()


In [None]:
# -*- coding: utf-8 -*-
"""
(3a') FaceTemp (30s Epoch Aggregates) — 新FaceTemp_raw仕様に対応
----------------------------------------------------------------
入力 : FEATURE/{sid}_FaceTemp_raw.csv
      （列: Time_sec, AB_max, C_max, ABC_max, AB_mean, C_mean, ABC_mean, AB_maxdiff, AB_meandiff）
出力 : FEATURE/{sid}_FaceTemp.csv
      （列: Epoch_start, Epoch_end, <各入力列>_mean/_std/_slope）
仕様 :
- 解析区間は 1800〜2400 秒
- エポックは 30 秒幅、右端アンカーは 1800+30, 1800+60, …, 2400
- 補間は行わない（該当エポックにサンプルが無ければ NaN）
- slope はエポック内で y ≈ beta * t + alpha の beta（単位/秒）
"""

import os
from typing import List
import numpy as np
import pandas as pd

# ===== パスと対象ID =====
BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"

SUBJECT_IDS: List[str] = [
    "10041","10061","10062","10063","10064",
    "10071","10072","10073","10074",
    "10081","10082","10083","10084",
    "10091","10092","10093","10094",
    "10101","10102","10103",
]

# ===== エポック設定 =====
EPOCH_WIN = 30
START_SEC = 1800
END_SEC   = 2400

# ===== 入力で期待する FaceTemp_raw の列 =====
EXPECTED_COLS = [
    "AB_max", "C_max", "ABC_max",
    "AB_mean", "C_mean", "ABC_mean",
    "AB_maxdiff", "AB_meandiff",
]

def _epoch_stats_from_raw(df_raw: pd.DataFrame,
                          start_sec: int,
                          end_sec: int,
                          win_sec: int) -> pd.DataFrame:
    """FaceTemp_raw（サンプル時刻粒度）から30秒エポックの mean/std/slope を算出。"""
    if "Time_sec" not in df_raw.columns:
        raise ValueError("required column missing: Time_sec")

    # 実在する列だけを対象にする（空ならエラー）
    value_cols = [c for c in EXPECTED_COLS if c in df_raw.columns]
    if not value_cols:
        raise ValueError(f"no expected FaceTemp columns found. need any of {EXPECTED_COLS}")

    df = df_raw.copy()
    df["Time_sec"] = pd.to_numeric(df["Time_sec"], errors="coerce")

    for c in value_cols:
        df[c] = pd.to_numeric(df[c], errors="coerce")

    # 区間抽出＆時刻昇順
    df = df[(df["Time_sec"] >= start_sec) & (df["Time_sec"] <= end_sec)].sort_values("Time_sec")
    times = df["Time_sec"].to_numpy(dtype=float)

    anchors = np.arange(start_sec + win_sec, end_sec + 1, win_sec, dtype=float)
    epoch_starts = anchors - win_sec
    epoch_ends   = anchors.copy()

    # 出力器
    out = {"Epoch_start": epoch_starts.copy(), "Epoch_end": epoch_ends.copy()}
    for col in value_cols:
        out[f"{col}_mean"]  = np.full_like(anchors, np.nan, dtype=float)
        out[f"{col}_std"]   = np.full_like(anchors, np.nan, dtype=float)
        out[f"{col}_slope"] = np.full_like(anchors, np.nan, dtype=float)

    # 走査（右端アンカーごとの trailing 30秒）
    idx_left = 0
    for i, t_end in enumerate(anchors):
        t_start = t_end - win_sec
        while idx_left < len(times) and times[idx_left] < t_start:
            idx_left += 1
        idx_right = idx_left
        while idx_right < len(times) and times[idx_right] <= t_end:
            idx_right += 1

        if idx_right - idx_left <= 0:
            continue

        seg = df.iloc[idx_left:idx_right]
        t = seg["Time_sec"].to_numpy(dtype=float)

        for col in value_cols:
            y = seg[col].to_numpy(dtype=float)
            valid = np.isfinite(y)
            if not valid.any():
                continue

            v = y[valid]
            out[f"{col}_mean"][i] = float(np.mean(v))
            out[f"{col}_std"][i]  = float(np.std(v, ddof=1)) if v.size >= 2 else 0.0

            # slope は有効点が2点以上のときのみ
            msk = np.isfinite(t) & np.isfinite(y)
            if msk.sum() >= 2:
                beta, _ = np.polyfit(t[msk], y[msk], 1)  # y ≈ beta*t + alpha
                out[f"{col}_slope"][i] = float(beta)

    # 列順を固定
    cols = ["Epoch_start", "Epoch_end"]
    for col in value_cols:
        cols += [f"{col}_mean", f"{col}_std", f"{col}_slope"]
    return pd.DataFrame(out, columns=cols)

def process_face_temp_epoch_for_subject(base_root: str, sid: str) -> None:
    """{sid}_FaceTemp_raw.csv → 30秒エポック集計 {sid}_FaceTemp.csv"""
    subject_dir = os.path.join(base_root, f"{sid}")  # 氏名なしパス
    feature_dir = os.path.join(subject_dir, "FEATURE")
    in_path  = os.path.join(feature_dir, f"{sid}_FaceTemp_raw.csv")
    out_path = os.path.join(feature_dir, f"{sid}_FaceTemp.csv")

    print(f"# Subject {sid} (FaceTemp epoch)")
    if not os.path.exists(in_path):
        print(f"[SKIP] FaceTemp: FaceTemp_raw not found -> {in_path}")
        return

    try:
        df_raw = pd.read_csv(in_path, encoding="utf-8-sig")
        out_df = _epoch_stats_from_raw(df_raw, START_SEC, END_SEC, EPOCH_WIN)
        out_df.to_csv(out_path, index=False, encoding="utf-8-sig")
        print(f"[OK]  FaceTemp -> {out_path}")
    except Exception as e:
        print(f"[SKIP] FaceTemp: {e}")

def main_3a_facetemp_epoch(base_root: str, subject_ids: List[str]):
    for sid in subject_ids:
        process_face_temp_epoch_for_subject(base_root, sid)

# 実行例:
if __name__ == "__main__":
    main_3a_facetemp_epoch(BASE_DIR, SUBJECT_IDS)


In [None]:
# -*- coding: utf-8 -*-
"""
(3b) RR extraction (RAW) & corrected (RAW2) & 30s-epoch RR export
+ 30s Pulse window plots with beat markers (○=normal, ✕=anomaly)
-----------------------------------------------------------------
入力 : {BASE_DIR}\{sid}\OFFSET\{sid}_Pulse.csv
出力 : {BASE_DIR}\{sid}\FEATURE\
  - {sid}_RR_raw.csv     …… 検出そのまま（Time_sec, RR_interval_sec, HeartRate_BPM）
  - {sid}_RR_raw.png     …… RR_raw の時系列（IQR±k逸脱は赤丸）
  - {sid}_RR_raw2.csv    …… “中間ピーク削除”で修正後のビート系列（同3列）
  - {sid}_RR.png         …… 修正後 RR の時系列プロット
  - {sid}_RR.csv         …… 修正後（raw2）を用いた 30秒エポック平均RR
                             （Epoch_start, Epoch_end, RR_interval）
  - Pulse/{sid}_Pulse_{start}-{end}.png …… 30秒ごとの脈拍画像（filt後の波形に検出位置を重畳）
       ・正常: 赤丸 ○ / 異常: 赤い ✕（IQR±k逸脱 or NaN RR）
仕様:
- 解析区間: 1800–2400秒（変更は START_TIME, END_TIME を編集）
- 検出: bandpass(0.5–8Hz) → find_peaks(distance≥0.30s) → SPKI/NPKI適応閾値
- RR妥当域: 0.15–2.0 s
- 修正: 「短すぎるRRのみ」中間ピーク削除（長すぎるRRは図示のみで未補間）
- 30秒エポックは右端アンカー（1800+30,…,2400）の trailing 30秒
- 補間なし。窓内に有効RRが無ければ NaN
"""

import os
from typing import List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from scipy.signal import butter, filtfilt, find_peaks

# ===== ユーザー環境 =====
BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"

SUBJECT_IDS: List[str] = [
    "10041","10061","10062","10063","10064",
    "10071","10072","10073","10074",
    "10081","10082","10083","10084",
    "10091","10092","10093","10094",
    "10101","10102","10103",
]

# ===== 時間・図の設定 =====
START_TIME: int = 1800
END_TIME:   int = 2400
WIN_30:     int = 30

DPI = 300
FIGSIZE = (12, 4)
LINEWIDTH = 1.5
TITLE_FONTSIZE = 30
LABEL_FONTSIZE = 24
TICK_FONTSIZE  = 20

# ===== フィルタ設定（PPG想定） =====
LOWCUT_HZ, HIGHCUT_HZ = 0.5, 8.0
FILTER_ORDER = 2

# ===== 検出・RR判定 =====
RR_MIN_SEC: float = 0.15
RR_MAX_SEC: float = 2.0
PEAK_MIN_DISTANCE_SEC: float = 0.30
TH_K: float = 0.18       # 適応閾値係数（SPKI/NPKI から決定）

# ===== IQR 閾値 =====
IQR_K: float = 3.0


# ---------- ヘルパ ----------
def mmss_formatter(x: float, _pos=None) -> str:
    m = int(x) // 60
    s = int(x) % 60
    return f"{m}:{s:02d}"

def bandpass_filter(signal: np.ndarray, fs: int,
                    lowcut: float = LOWCUT_HZ, highcut: float = HIGHCUT_HZ,
                    order: int = FILTER_ORDER) -> np.ndarray:
    nyq = 0.5 * fs
    low = max(1e-6, lowcut / nyq)
    high = min(0.999999, highcut / nyq)
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, signal)

def detect_waveform_column(df: pd.DataFrame):
    cols = [c for c in df.columns if c != "Time_sec"]
    # よくある列名の自動検出
    for key in ["Pulse", "PPG", "pulse", "ppg"]:
        if key in cols:
            return key
    return cols[0] if cols else None

def rolling_trailing_ranges(times: np.ndarray, anchors: np.ndarray, win_sec: int):
    """各アンカー t について [t-win_sec, t] に入る times のスライス (l, r) を返す（rは非包含）"""
    n = len(times)
    l = 0
    for t in anchors:
        start = t - win_sec
        while l < n and times[l] < start:
            l += 1
        r = l
        while r < n and times[r] <= t:
            r += 1
        yield l, r

def iqr_bounds(x: pd.Series, k: float = IQR_K):
    s = x.dropna()
    if s.empty:
        return -np.inf, np.inf
    q1, q3 = s.quantile([0.25, 0.75])
    iqr = q3 - q1
    return float(q1 - k*iqr), float(q3 + k*iqr)

def plot_rr(time_sec: np.ndarray, rr_sec: np.ndarray, title: str, out_png: str,
            highlight_mask: np.ndarray | None = None):
    fig, ax = plt.subplots(figsize=FIGSIZE)
    ax.plot(time_sec, rr_sec, marker='o', linestyle='-', linewidth=LINEWIDTH, label='RR')
    if highlight_mask is not None and np.any(highlight_mask):
        ax.plot(time_sec[highlight_mask], rr_sec[highlight_mask], 'o',
                color='red', markerfacecolor='none', markersize=8, markeredgewidth=2,
                label='anomaly (IQR±k)')
    ax.set_xlabel('Time (mm:ss)', fontsize=LABEL_FONTSIZE)
    ax.set_ylabel('RR Interval (sec)', fontsize=LABEL_FONTSIZE)
    ax.set_title(title, fontsize=TITLE_FONTSIZE)
    ax.xaxis.set_major_formatter(FuncFormatter(mmss_formatter))
    ax.tick_params(labelsize=TICK_FONTSIZE)
    ax.grid(True); ax.legend(fontsize=TICK_FONTSIZE)
    plt.tight_layout(); plt.savefig(out_png, dpi=DPI); plt.close(fig)

def plot_pulse_window(times: np.ndarray, signal: np.ndarray,
                      beat_times: np.ndarray,
                      rr_raw: np.ndarray, outlier_mask: np.ndarray,
                      t0: float, t1: float, out_png: str):
    """
    30秒窓の脈波を描画。正常=赤丸○、異常=赤い✕。
    異常条件: IQR±k逸脱 or 無効RR（NaN） ※先頭ビートのNaNは許容
    """
    win_mask = (times >= t0) & (times <= t1)
    if np.count_nonzero(win_mask) < 2:
        return  # データ点が少なすぎる窓はスキップ

    fig, ax = plt.subplots(figsize=FIGSIZE)
    ax.plot(times[win_mask], signal[win_mask], linewidth=LINEWIDTH, label='Pulse (bandpass)')

    # この窓に入る検出ビート
    idx = np.where((beat_times >= t0) & (beat_times <= t1))[0]
    for j in idx:
        t = beat_times[j]
        is_anom = bool(outlier_mask[j]) or (np.isnan(rr_raw[j]) and j != 0)
        y = np.interp(t, times[win_mask], signal[win_mask])
        if is_anom:
            ax.plot([t], [y], marker='x', color='red', markersize=10, linestyle='None')
        else:
            ax.plot([t], [y], marker='o', markerfacecolor='none', markeredgecolor='red',
                    markersize=8, linestyle='None')

    ax.set_xlim(t0, t1)
    ax.set_xlabel('Time (mm:ss)', fontsize=LABEL_FONTSIZE)
    ax.set_ylabel('Pulse (a.u.)', fontsize=LABEL_FONTSIZE)
    ax.set_title(f"Pulse {int(t0)}–{int(t1)} sec", fontsize=TITLE_FONTSIZE)
    ax.xaxis.set_major_formatter(FuncFormatter(mmss_formatter))
    ax.tick_params(labelsize=TICK_FONTSIZE)
    ax.grid(True)
    plt.tight_layout(); plt.savefig(out_png, dpi=DPI); plt.close(fig)


# ---------- メイン処理 ----------
def process_rr_extraction_for_subject(base_root: str, sid: str) -> None:
    """(3b) 1被験者分の RR 抽出（RAW/RAW2保存）＋30sエポックRR出力＋30s脈拍画像出力"""
    subj_dir = os.path.join(base_root, f"{sid}")
    in_csv  = os.path.join(subj_dir, 'OFFSET', f'{sid}_Pulse.csv')
    out_dir = os.path.join(subj_dir, 'FEATURE')
    pulse_plot_dir = os.path.join(out_dir, 'Pulse')  # 30秒ごとの脈拍画像保存先
    os.makedirs(out_dir, exist_ok=True)
    os.makedirs(pulse_plot_dir, exist_ok=True)

    print(f"# Subject {sid}")

    if not os.path.exists(in_csv):
        print(f"[SKIP] {sid}: input not found -> {in_csv}")
        return

    df = pd.read_csv(in_csv)
    if "Time_sec" not in df.columns:
        print(f"[SKIP] {sid}: Time_sec missing")
        return

    # 対象範囲
    df = df.sort_values('Time_sec').reset_index(drop=True)
    df = df[(df['Time_sec'] >= START_TIME) & (df['Time_sec'] <= END_TIME)].reset_index(drop=True)
    times = df['Time_sec'].to_numpy(float)

    wave_col = detect_waveform_column(df)
    if wave_col is None:
        print(f"[SKIP] {sid}: waveform column not found")
        return
    sig = pd.to_numeric(df[wave_col], errors="coerce").to_numpy(float)

    if times.size < 2:
        print(f"[SKIP] {sid}: not enough samples")
        return

    # fs推定
    dt = float(np.mean(np.diff(times)))
    if dt <= 0:
        print(f"[SKIP] {sid}: invalid Time_sec sequence")
        return
    fs = int(round(1.0 / dt))

    # バンドパス→ピーク検出
    filt = bandpass_filter(sig, fs)
    distance = max(1, int(PEAK_MIN_DISTANCE_SEC * fs))
    peaks, _ = find_peaks(filt, distance=distance)
    if peaks.size < 3:
        print(f"[SKIP] {sid}: insufficient peaks")
        return

    # 適応しきい値（SPKI/NPKI 初期化）
    SPKI = float(np.percentile(filt[peaks], 90))
    NPKI = float(np.percentile(filt[peaks], 10))
    thr = NPKI + TH_K * (SPKI - NPKI)

    beat_t, beat_v = [], []
    for idx in peaks:
        val = float(filt[idx]); t = float(times[idx])
        if val > thr:
            if beat_t:
                rr_tmp = t - beat_t[-1]
                if not (RR_MIN_SEC <= rr_tmp <= RR_MAX_SEC):
                    # 異常RRなら NPKI を更新し次へ
                    NPKI = 0.125 * val + 0.875 * NPKI
                    thr   = NPKI + TH_K * (SPKI - NPKI)
                    continue
            SPKI = 0.125 * val + 0.875 * SPKI
            beat_t.append(t); beat_v.append(val)
        else:
            NPKI = 0.125 * val + 0.875 * NPKI
        thr = NPKI + TH_K * (SPKI - NPKI)

    if len(beat_t) < 3:
        print(f"[SKIP] {sid}: insufficient beats after thresholding")
        return

    # ===== RR_raw.csv =====
    rr = np.insert(np.diff(beat_t), 0, np.nan)
    valid = np.isfinite(rr) & (rr >= RR_MIN_SEC) & (rr <= RR_MAX_SEC)
    rr_raw = rr.copy()
    rr_raw[~valid] = np.nan

    df_rr_raw = pd.DataFrame({
        "Time_sec": beat_t,
        "RR_interval_sec": rr_raw,
        "HeartRate_BPM": [np.nan if not np.isfinite(x) else 60.0/x for x in rr_raw],
    })
    raw_csv = os.path.join(out_dir, f"{sid}_RR_raw.csv")
    df_rr_raw.to_csv(raw_csv, index=False, encoding='utf-8-sig')
    print(f"[OK]  RR_raw -> {raw_csv}")

    # 異常判定: IQR±k
    low_iqr, up_iqr = iqr_bounds(df_rr_raw['RR_interval_sec'])
    out_mask = df_rr_raw['RR_interval_sec'].lt(low_iqr) | df_rr_raw['RR_interval_sec'].gt(up_iqr)

    # 図：RR_raw.png（IQR±k 逸脱を赤丸）
    raw_png = os.path.join(out_dir, f"{sid}_RR_raw.png")
    plot_rr(df_rr_raw["Time_sec"].to_numpy(float),
            df_rr_raw["RR_interval_sec"].to_numpy(float),
            title=f"{sid} RR Interval (RAW)",
            out_png=raw_png,
            highlight_mask=out_mask.to_numpy())
    print(f"[IQR] k={IQR_K}, lower={low_iqr:.3f}, upper={up_iqr:.3f}")

    # ===== RR_raw2.csv（修正: 中間ピーク削除：短すぎるRRのみ）=====
    def correct_rr_by_middle_peak_deletion(qrs_times, lower, upper):
        qrs = list(map(float, qrs_times))
        i = 1
        while i < len(qrs):
            rr = qrs[i] - qrs[i-1]
            if np.isfinite(rr) and rr < lower and i+1 < len(qrs):
                t_prev, t_mid, t_next = qrs[i-1], qrs[i], qrs[i+1]
                new_rr = t_next - t_prev
                if lower <= new_rr <= upper:
                    del qrs[i]
                    continue  # 同インデックスで再評価
            i += 1
        return qrs

    beat_t_corr = correct_rr_by_middle_peak_deletion(beat_t, low_iqr, up_iqr)
    rr_corr = np.insert(np.diff(beat_t_corr), 0, np.nan)
    df_rr_corr = pd.DataFrame({
        "Time_sec": beat_t_corr,
        "RR_interval_sec": rr_corr,
        "HeartRate_BPM": [np.nan if not np.isfinite(x) else 60.0/x for x in rr_corr],
    })
    raw2_csv = os.path.join(out_dir, f"{sid}_RR_raw2.csv")
    df_rr_corr.to_csv(raw2_csv, index=False, encoding='utf-8-sig')
    print(f"[OK]  RR_raw2 -> {raw2_csv}")

    # 図：修正後 RR.png
    corr_png = os.path.join(out_dir, f"{sid}_RR.png")
    plot_rr(np.asarray(beat_t_corr, float),
            rr_corr,
            title=f"{sid} RR Interval (corrected by deletion)",
            out_png=corr_png)

    # ===== 30秒ごとの脈拍画像（Pulse/{sid}_Pulse_{start}-{end}.png）=====
    anchors_30 = np.arange(START_TIME + WIN_30, END_TIME + 1, WIN_30, dtype=float)
    ep_st = anchors_30 - WIN_30
    ep_ed = anchors_30.copy()

    # 脈拍プロットは RAW の検出結果（beat_t, rr_raw, out_mask）で描画
    beat_times_raw = np.asarray(beat_t, float)
    rr_raw_arr     = df_rr_raw["RR_interval_sec"].to_numpy(float)
    outlier_arr    = out_mask.to_numpy()

    for s, e in zip(ep_st, ep_ed):
        out_png = os.path.join(pulse_plot_dir, f"{sid}_Pulse_{int(s)}-{int(e)}.png")
        plot_pulse_window(times, filt, beat_times_raw, rr_raw_arr, outlier_arr, float(s), float(e), out_png)

    # ===== RR.csv（30秒エポック平均RR：raw2ベース）=====
    times_beats = df_rr_corr["Time_sec"].to_numpy(float)
    rr_beats    = df_rr_corr["RR_interval_sec"].to_numpy(float)
    rr_epoch = np.full_like(anchors_30, np.nan, dtype=float)
    for i, (l, r) in enumerate(rolling_trailing_ranges(times_beats, anchors_30, WIN_30)):
        if r - l <= 0:
            continue
        w = rr_beats[l:r]
        m = np.isfinite(w) & (w >= RR_MIN_SEC) & (w <= RR_MAX_SEC)
        if np.any(m):
            rr_epoch[i] = float(np.mean(w[m]))

    df_epoch = pd.DataFrame({
        "Epoch_start": ep_st,
        "Epoch_end":   ep_ed,
        "RR_interval": rr_epoch,
    })
    rr_csv = os.path.join(out_dir, f"{sid}_RR.csv")
    df_epoch.to_csv(rr_csv, index=False, encoding='utf-8-sig')
    print(f"[OK]  RR (30s epoch from RAW2) -> {rr_csv}")


def main_3b_rr_extraction(base_root: str, subject_ids: List[str]):
    for sid in subject_ids:
        process_rr_extraction_for_subject(base_root, sid)

# 実行:
if __name__ == "__main__":
    main_3b_rr_extraction(BASE_DIR, SUBJECT_IDS)


In [None]:
# -*- coding: utf-8 -*-
"""
(3c) RR-derived features from RRtime（補間なし, 30s/120sエポック出力）
--------------------------------------------------------------------
入力 : FEATURE/{sid}_RRtime.csv   （列: Time_sec ← R波検出時刻 [sec]）
出力 : FEATURE/{sid}_<FeatureName>.csv
       （各CSVは 3列: Epoch_start, Epoch_end, <Feature>）

要件:
- RRは Time_sec の差分から算出（= 連続R間隔）
- 有効RRは生理境界 0.25〜2.0 s のみ
- 30秒窓（1770〜2400）: HeartRate, RMSSD, SDSD, SD1, SD2, CSI, CVI, pNN50, RR_interval
- 120秒窓（1800〜2400）: LF_power, HF_power, LF_HF_ratio（Lomb-Scargleで不等間隔のまま解析）
- アンカー:
  * 30s:  START_30+30, …, 2400  （START_30=1770）
  * 120s: START_120+120, …, 2400（START_120=1800）
- 各CSVの列名は Epoch_start, Epoch_end, <Feature> とする
"""

import os
from typing import List
import numpy as np
import pandas as pd
from scipy.signal import lombscargle

# ===== ユーザー環境（別セルにある場合はそちらを使用） =====
try:
    BASE_DIR
except NameError:
    BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"

try:
    SUBJECT_IDS
except NameError:
    SUBJECT_IDS: List[str] = [
        "10041","10061","10062","10063","10064",
        "10071","10072","10073","10074",
        "10081","10082","10083","10084",
        "10091","10092","10093","10094",
        "10101","10102","10103",
    ]

# ===== 時間設定 =====
START_30: int  = 1770   # 30秒窓の開始（右端アンカーは 1800, 1830, …, 2400）
START_120: int = 1800   # 120秒窓の開始（右端アンカーは 1920, 2040, …, 2400）
END_TIME: int   = 2400

WIN_30:  int = 30
WIN_120: int = 120

# ===== RR 生理境界 =====
RR_MIN: float = 0.25
RR_MAX: float = 2.0

# ===== 周波数領域（Hz） =====
LF_BAND = (0.04, 0.15)
HF_BAND = (0.15, 0.40)
F_MAX   = 0.5
N_FREQ  = 512


# ---------- ヘルパ ----------
def rolling_trailing_ranges(times: np.ndarray, anchors: np.ndarray, win_sec: int):
    """各アンカー t に対し [t-win_sec, t] に含まれる times の slice (l, r)（rは非包含）を返す"""
    n = len(times)
    l = 0
    for t in anchors:
        start = t - win_sec
        while l < n and times[l] < start:
            l += 1
        r = l
        while r < n and times[r] <= t:
            r += 1
        yield l, r

def valid_rr_mask(rr: np.ndarray) -> np.ndarray:
    return np.isfinite(rr) & (rr >= RR_MIN) & (rr <= RR_MAX)

def successive_diffs(rr: np.ndarray) -> np.ndarray:
    if rr.size < 2:
        return np.array([], dtype=float)
    return np.diff(rr)

def pnn50(drr: np.ndarray) -> float:
    if drr.size == 0:
        return np.nan
    return float(np.mean(np.abs(drr) > 0.05))

def sd1_sd2(rr: np.ndarray):
    """Poincaré解析に基づく SD1/SD2（標本分散, ddof=1）。不定形は NaN を返す。"""
    if rr.size < 2:
        return np.nan, np.nan
    drr = successive_diffs(rr)
    if drr.size < 2:
        return np.nan, np.nan
    sdrr   = np.std(rr, ddof=1)
    sddiff = np.std(drr, ddof=1)
    if not (np.isfinite(sdrr) and np.isfinite(sddiff)):
        return np.nan, np.nan
    sd1 = np.sqrt(0.5) * sddiff
    val = 2.0 * (sdrr ** 2) - 0.5 * (sddiff ** 2)
    sd2 = np.sqrt(val) if val > 0 else np.nan
    return sd1, sd2

def lomb_band_power(t_sec: np.ndarray, rr_sec: np.ndarray, band):
    """不等間隔 RR(t) を Lomb-Scargle で解析し、帯域パワーを積分（相対量）"""
    if t_sec.size < 4 or rr_sec.size < 4:
        return np.nan
    tt = t_sec - t_sec[0]
    x  = rr_sec - np.nanmean(rr_sec)
    f = np.linspace(0.0001, F_MAX, N_FREQ)
    w = 2.0 * np.pi * f
    try:
        p = lombscargle(tt, x, w, precenter=False, normalize=True)
    except TypeError:
        p = lombscargle(tt, x, w, precenter=False)
        var = np.nanvar(x)
        if var > 0:
            p = p / var
    m = (f >= band[0]) & (f <= band[1])
    if not np.any(m):
        return np.nan
    try:
        return float(np.trapezoid(p[m], f[m]))
    except AttributeError:
        return float(np.trapz(p[m], f[m]))

def save_epoch_csv(feature_dir: str, sid: str, name: str,
                   ep_start: np.ndarray, ep_end: np.ndarray, values: np.ndarray) -> str:
    path = os.path.join(feature_dir, f"{sid}_{name}.csv")
    pd.DataFrame({"Epoch_start": ep_start, "Epoch_end": ep_end, name: values}) \
      .to_csv(path, index=False, encoding="utf-8-sig")
    return path


# ---------- メイン ----------
def compute_rr_features_from_rrtime_for_subject(base_root: str, sid: str) -> None:
    """
    入力: FEATURE/{sid}_RRtime.csv（列: Time_sec=R波時刻[sec]）
    出力: 30s系8+1種, 120s系3種 を別CSVに保存
    """
    subject_dir = os.path.join(base_root, f"{sid}")
    feature_dir = os.path.join(subject_dir, "FEATURE")
    os.makedirs(feature_dir, exist_ok=True)

    rrtime_csv = os.path.join(feature_dir, f"{sid}_RRtime.csv")
    print(f"# Subject {sid} (RR-features from RRtime)")

    if not os.path.exists(rrtime_csv):
        print(f"[SKIP] RR-features: RRtime CSV not found -> {rrtime_csv}")
        return

    try:
        df = pd.read_csv(rrtime_csv, encoding="utf-8-sig")
    except Exception as e:
        print(f"[SKIP] RR-features: failed to read RRtime ({e})")
        return

    if "Time_sec" not in df.columns:
        print("[SKIP] RR-features: Time_sec column missing")
        return

    # R波時刻の整形
    t_beats = pd.to_numeric(df["Time_sec"], errors="coerce").to_numpy(dtype=float)
    t_beats = t_beats[np.isfinite(t_beats)]
    if t_beats.size < 3:
        print("[SKIP] RR-features: too few R-peaks")
        return
    t_beats = np.unique(np.sort(t_beats))  # 単調化 & 重複除去

    # RR列（先頭はNaN）
    rr_all = np.insert(np.diff(t_beats), 0, np.nan)

    # ------ アンカー定義 ------
    anchors_30  = np.arange(START_30  + WIN_30,  END_TIME + 1, WIN_30,  dtype=float)
    anchors_120 = np.arange(START_120 + WIN_120, END_TIME + 1, WIN_120, dtype=float)

    ep30_st,  ep30_ed  = anchors_30 - WIN_30,  anchors_30.copy()
    ep120_st, ep120_ed = anchors_120 - WIN_120, anchors_120.copy()

    # ===== 30秒窓 =====
    HR    = np.full_like(anchors_30, np.nan, dtype=float)
    RMSSD = np.full_like(anchors_30, np.nan, dtype=float)
    SDSD  = np.full_like(anchors_30, np.nan, dtype=float)
    SD1   = np.full_like(anchors_30, np.nan, dtype=float)
    SD2   = np.full_like(anchors_30, np.nan, dtype=float)
    CSI   = np.full_like(anchors_30, np.nan, dtype=float)
    CVI   = np.full_like(anchors_30, np.nan, dtype=float)
    PNN50 = np.full_like(anchors_30, np.nan, dtype=float)
    RRmn  = np.full_like(anchors_30, np.nan, dtype=float)  # 30sのRR平均

    for i, (l, r) in enumerate(rolling_trailing_ranges(t_beats, anchors_30, WIN_30)):
        if r - l <= 1:
            continue
        rr_w = rr_all[l:r]
        rr_w = rr_w[valid_rr_mask(rr_w)]
        if rr_w.size < 2:
            continue

        drr = successive_diffs(rr_w)
        RRmn[i]  = float(np.mean(rr_w)) if rr_w.size > 0 else np.nan
        HR[i]    = 60.0 / RRmn[i] if np.isfinite(RRmn[i]) and RRmn[i] > 0 else np.nan
        RMSSD[i] = float(np.sqrt(np.mean(drr ** 2))) if drr.size > 0 else np.nan
        SDSD[i]  = float(np.std(drr, ddof=1)) if drr.size >= 2 else np.nan
        sd1, sd2 = sd1_sd2(rr_w)
        SD1[i], SD2[i] = sd1, sd2
        if np.isfinite(sd1) and sd1 > 0 and np.isfinite(sd2) and sd2 > 0:
            CSI[i] = sd2 / sd1
            CVI[i] = np.log10(sd1 * sd2)
        PNN50[i] = pnn50(drr)

    # 保存（30秒窓）
    try:
        for name, arr in [
            ("HeartRate", HR),
            ("RMSSD", RMSSD),
            ("SDSD", SDSD),
            ("SD1", SD1),
            ("SD2", SD2),
            ("CSI", CSI),
            ("CVI", CVI),
            ("pNN50", PNN50),
            ("RR_interval", RRmn),  # ★ RRはmeanのみ
        ]:
            p = save_epoch_csv(feature_dir, sid, name, ep30_st, ep30_ed, arr)
            print(f"[OK]  30s -> {p}")
    except Exception as e:
        print(f"[SKIP] RR-features: save failed (30s) ({e})")

    # ===== 120秒窓（周波数領域） =====
    LFp = np.full_like(anchors_120, np.nan, dtype=float)
    HFp = np.full_like(anchors_120, np.nan, dtype=float)
    LFr = np.full_like(anchors_120, np.nan, dtype=float)

    for i, (l, r) in enumerate(rolling_trailing_ranges(t_beats, anchors_120, WIN_120)):
        if r - l <= 3:
            continue
        # 120s窓に入るRRとその対応時刻（先頭NaNは除外）
        rr_w = rr_all[l:r]
        t_w  = t_beats[l:r]
        # 先頭NaN除去のため、同じ範囲で再計算
        rr_w = np.diff(t_w)
        if rr_w.size < 4:
            continue
        m = valid_rr_mask(rr_w)
        rr_w = rr_w[m]; t_w = t_w[1:][m]
        if rr_w.size < 4:
            continue

        lf = lomb_band_power(t_w, rr_w, LF_BAND)
        hf = lomb_band_power(t_w, rr_w, HF_BAND)
        LFp[i] = lf
        HFp[i] = hf
        if np.isfinite(lf) and np.isfinite(hf) and hf > 0:
            LFr[i] = lf / hf

    try:
        for name, arr in [
            ("LF_power", LFp),
            ("HF_power", HFp),
            ("LF_HF_ratio", LFr),
        ]:
            p = save_epoch_csv(feature_dir, sid, name, ep120_st, ep120_ed, arr)
            print(f"[OK]  120s -> {p}")
    except Exception as e:
        print(f"[SKIP] RR-features: save failed (120s) ({e})")


def main_3c_rr_features_from_rrtime(base_root: str, subject_ids: List[str]):
    for sid in subject_ids:
        compute_rr_features_from_rrtime_for_subject(base_root, sid)


# 実行例:
if __name__ == "__main__":
    main_3c_rr_features_from_rrtime(BASE_DIR, SUBJECT_IDS)


In [None]:
# -*- coding: utf-8 -*-
"""
(3d) Feature Engineering - Skinos（30s epoch, mean/std/slope with watch_ prefix）
-------------------------------------------------------------------------------
入力:
  {BASE_DIR}\{sid}\OFFSET\{sid}_Skinos.csv
    期待列: Time_sec（= 時刻[sec]）, その他は数値列（例: Sweat_Rate, Heart_Rate, Skin_Temp）

出力:
  {BASE_DIR}\{sid}\FEATURE\{sid}_Skinos.csv
    列: Epoch_start, Epoch_end, watch_<metric>_mean, watch_<metric>_std, watch_<metric>_slope

仕様:
  - 30秒エポックは「後ろ詰め」の移動窓：[t-30, t]。アンカーは 1800, 1830, …, 2400
  - 特徴量の計算区間は 1770〜2400（= 最初のエポック [1770,1800] を含めるため）
  - 補間なし。エポックにサンプルが無い場合は mean/slope=NaN、stdは n>=2 のときのみ算出（未満は 0.0）
  - OFFSET にファイルが無ければ SKIP
  - Time_sec 列が無い場合は、列名に 'time' を含む列を自動検出して Time_sec として扱う
"""

import os
from typing import List
import numpy as np
import pandas as pd

# ===== ユーザー環境（別セルにある場合はそちらを使用） =====
try:
    BASE_DIR
except NameError:
    BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"

try:
    SUBJECT_IDS
except NameError:
    SUBJECT_IDS: List[str] = [
        "10041","10061","10062","10063","10064",
        "10071","10072","10073","10074",
        "10081","10082","10083","10084",
        "10091","10092","10093","10094",
        "10101","10102","10103",
    ]

# ===== 時間設定 =====
START_30: int = 1770   # 計算範囲の下限（最初のエポック [1770,1800] のため）
GRID_START: int = 1800  # アンカー開始
GRID_END:   int = 2400
WIN_30:     int = 30

# ---------- ヘルパ ----------
def _rolling_trailing_ranges(times: np.ndarray, anchors: np.ndarray, win_sec: int):
    """各アンカー t に対し [t-win_sec, t] に含まれる times の slice (l, r)（rは非包含）を返す"""
    n = len(times)
    l = 0
    for t in anchors:
        start = t - win_sec
        while l < n and times[l] < start:
            l += 1
        r = l
        while r < n and times[r] <= t:
            r += 1
        yield l, r

def _read_csv_any(path: str) -> pd.DataFrame:
    try:
        return pd.read_csv(path, encoding="utf-8-sig")
    except Exception:
        # 日本語Windows想定のフォールバック
        return pd.read_csv(path, encoding="cp932")

def _ensure_time_column(df: pd.DataFrame) -> pd.DataFrame:
    """Time_sec が無ければ 'time' を含む列を探して Time_sec にリネーム"""
    if "Time_sec" in df.columns:
        return df
    cand = [c for c in df.columns if "time" in str(c).lower()]
    if not cand:
        raise ValueError("Time_sec column missing (no column contains 'time').")
    df = df.rename(columns={cand[0]: "Time_sec"}).copy()
    return df

def _pick_metric_columns(df: pd.DataFrame) -> List[str]:
    """Time_sec 以外の数値列を候補として採用（少なくとも1つ有効値がある列）"""
    metrics = []
    for c in df.columns:
        if c == "Time_sec":
            continue
        v = pd.to_numeric(df[c], errors="coerce")
        if np.isfinite(v).any():
            metrics.append(c)
    if not metrics:
        raise ValueError("no numeric metric columns found.")
    return metrics

def _epoch_aggregate_watch(df: pd.DataFrame,
                           anchors: np.ndarray,
                           win_sec: int,
                           metric_cols: List[str]) -> pd.DataFrame:
    """30秒エポックで mean/std/slope を計算。列名は watch_<metric>_{mean|std|slope}。"""
    epoch_starts = anchors - win_sec
    out = {
        "Epoch_start": epoch_starts.copy().astype(int),
        "Epoch_end":   anchors.copy().astype(int),
    }
    # 先に全列を初期化
    for col in metric_cols:
        base = f"watch_{col}"
        out[f"{base}_mean"]  = np.full_like(anchors, np.nan, dtype=float)
        out[f"{base}_std"]   = np.full_like(anchors, np.nan, dtype=float)  # 標準偏差
        out[f"{base}_slope"] = np.full_like(anchors, np.nan, dtype=float)  # 単位/秒

    times = df["Time_sec"].to_numpy(dtype=float)

    for i, (l, r) in enumerate(_rolling_trailing_ranges(times, anchors, win_sec)):
        if r - l <= 0:
            continue
        seg = df.iloc[l:r]
        t = pd.to_numeric(seg["Time_sec"], errors="coerce").to_numpy(dtype=float)

        for col in metric_cols:
            y = pd.to_numeric(seg[col], errors="coerce").to_numpy(dtype=float)
            valid = np.isfinite(y)
            if not valid.any():
                continue

            v = y[valid]
            out[f"watch_{col}_mean"][i] = float(np.mean(v))
            out[f"watch_{col}_std"][i]  = float(np.std(v, ddof=1)) if v.size >= 2 else 0.0

            # slope は有効点が2点以上のときのみ
            msk = np.isfinite(t) & np.isfinite(y)
            if msk.sum() >= 2:
                beta, _alpha = np.polyfit(t[msk], y[msk], 1)  # y ≈ beta*t + alpha
                out[f"watch_{col}_slope"][i] = float(beta)

    # 列順を固定
    cols_order = ["Epoch_start", "Epoch_end"]
    for col in metric_cols:
        base = f"watch_{col}"
        cols_order += [f"{base}_mean", f"{base}_std", f"{base}_slope"]

    return pd.DataFrame(out, columns=cols_order)

# ---------- メイン ----------
def process_skinos_features_for_subject(base_root: str, sid: str) -> None:
    """OFFSET/{sid}_Skinos.csv → FEATURE/{sid}_Skinos.csv（30秒エポック mean/std/slope）"""
    subject_dir = os.path.join(base_root, f"{sid}")
    offset_path = os.path.join(subject_dir, "OFFSET", f"{sid}_Skinos.csv")
    feature_dir = os.path.join(subject_dir, "FEATURE")
    out_path = os.path.join(feature_dir, f"{sid}_Skinos.csv")
    os.makedirs(feature_dir, exist_ok=True)

    print(f"# Subject {sid}")

    if not os.path.exists(offset_path):
        print(f"[SKIP] Skinos: input not found -> {offset_path}")
        return

    try:
        df0 = _read_csv_any(offset_path)
    except Exception as e:
        print(f"[SKIP] Skinos: failed to read CSV ({e})")
        return

    try:
        df0 = _ensure_time_column(df0)
    except Exception as e:
        print(f"[SKIP] Skinos: {e}")
        return

    # 数値化 & 範囲フィルタ & 重複平均化
    df0 = df0.copy()
    df0["Time_sec"] = pd.to_numeric(df0["Time_sec"], errors="coerce")
    df0 = df0.loc[(df0["Time_sec"] >= START_30) & (df0["Time_sec"] <= GRID_END)]
    if df0.empty:
        print("[SKIP] Skinos: empty time window")
        return

    try:
        metric_cols = _pick_metric_columns(df0)
    except Exception as e:
        print(f"[SKIP] Skinos: {e}")
        return

    # 同一 Time_sec の重複は平均化
    df = df0[["Time_sec"] + metric_cols].groupby("Time_sec", as_index=False).mean(numeric_only=True)

    # アンカー（30s）: 1800, 1830, …, 2400
    anchors_30 = np.arange(GRID_START, GRID_END + 1, WIN_30, dtype=float)

    # 集計
    try:
        out_df = _epoch_aggregate_watch(df, anchors=anchors_30, win_sec=WIN_30, metric_cols=metric_cols)
        out_df.to_csv(out_path, index=False, encoding="utf-8-sig")
        print(f"[OK]  Skinos -> {out_path}")
    except Exception as e:
        print(f"[SKIP] Skinos: failed to save ({e})")

def main_3d_skinos(base_root: str = BASE_DIR, subject_ids: List[str] = SUBJECT_IDS):
    """(3d) Skinos: 全被験者の 30秒エポック特徴を出力"""
    for sid in subject_ids:
        process_skinos_features_for_subject(base_root, sid)

# 実行例:
if __name__ == "__main__":
    main_3d_skinos()


In [None]:
# -*- coding: utf-8 -*-
"""
(3e) Sweat (30s Epoch Aggregates) — 平均/標準偏差/勾配
----------------------------------------------------------------
入力  : OFFSET/{sid}_Sweat.csv  （列: Time_sec, <sweat-like column>）
出力  : FEATURE/{sid}_Sweat.csv （列: Epoch_start, Epoch_end,
                                 Sweat_mean, Sweat_std, Sweat_slope）
仕様  :
- 解析区間は 1800〜2400 秒（右端アンカー: 1800+30, 1800+60, …, 2400）
- 補間なし（エポック内にサンプルが無ければ NaN、std は n>=2 で算出、未満は 0.0）
- 勾配 slope はエポック内で y ≈ beta * t + alpha の beta（単位/秒）
- 入力の sweat 列は自動検出（優先: 'Sweat', 'GSR', 'EDA' を含む列名）
- OFFSET/{sid}_Sweat.csv が無ければ例外（エラー）を投げる
"""

import os
from typing import List, Optional
import numpy as np
import pandas as pd

# ルート
BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"

# 対象ID（氏名なし）
SUBJECT_IDS: List[str] = [
    "10041","10061","10062","10063","10064",
    "10071","10072","10073","10074",
    "10081","10082","10083","10084",
    "10091","10092","10093","10094",
    "10101","10102","10103",
]

# エポック設定
EPOCH_WIN = 30
START_SEC = 1800
END_SEC   = 2400


# ---- helpers ----
def _pick_sweat_col(df: pd.DataFrame) -> Optional[str]:
    """Time_sec 以外で数値化できる列から汗量っぽい列を自動検出。"""
    cand = [c for c in df.columns if c != "Time_sec"]
    if not cand:
        return None
    # 数値だけに絞る
    num_cand = []
    for c in cand:
        s = pd.to_numeric(df[c], errors="coerce")
        if s.notna().sum() > 0:
            num_cand.append(c)
    if not num_cand:
        return None
    # 優先キーワード
    priority = ["sweat", "gsr", "eda"]
    lower_map = {c: c.lower() for c in num_cand}
    for key in priority:
        hits = [c for c in num_cand if key in lower_map[c]]
        if hits:
            return hits[0]
    # 見つからなければ最初の数値列
    return num_cand[0]


def _epoch_aggregate_sweat(df: pd.DataFrame,
                           start_sec: int,
                           end_sec: int,
                           win_sec: int,
                           col: str) -> pd.DataFrame:
    """Sweat 原系列で mean/std/slope を計算し DataFrame を返す。"""
    df = df.copy()
    df["Time_sec"] = pd.to_numeric(df["Time_sec"], errors="coerce")
    df[col]        = pd.to_numeric(df[col],        errors="coerce")

    # 指定区間・時刻順
    df = df[(df["Time_sec"] >= start_sec) & (df["Time_sec"] <= end_sec)].sort_values("Time_sec")
    # 同一時刻の重複は平均
    df = df.groupby("Time_sec", as_index=False).mean(numeric_only=True)

    times = df["Time_sec"].to_numpy(float)
    anchors = np.arange(start_sec + win_sec, end_sec + 1, win_sec, dtype=float)
    ep_starts = anchors - win_sec
    ep_ends   = anchors.copy()

    # 出力バッファ
    out = {
        "Epoch_start": ep_starts.copy(),
        "Epoch_end":   ep_ends.copy(),
        "Sweat_mean":  np.full_like(anchors, np.nan, dtype=float),
        "Sweat_std":   np.full_like(anchors, np.nan, dtype=float),
        "Sweat_slope": np.full_like(anchors, np.nan, dtype=float),
    }

    idx_left = 0
    vals = df[col].to_numpy(float)

    for i, t_end in enumerate(anchors):
        t_start = t_end - win_sec
        # 左端を進める
        while idx_left < len(times) and times[idx_left] < t_start:
            idx_left += 1
        # 右端（非包含）
        idx_right = idx_left
        while idx_right < len(times) and times[idx_right] <= t_end:
            idx_right += 1

        if idx_right - idx_left <= 0:
            continue

        seg_t = times[idx_left:idx_right]
        seg_y = vals[idx_left:idx_right]
        vmask = np.isfinite(seg_y)
        if not vmask.any():
            continue

        v = seg_y[vmask]
        out["Sweat_mean"][i] = float(np.mean(v))
        out["Sweat_std"][i]  = float(np.std(v, ddof=1)) if v.size >= 2 else 0.0

        # slope（有効点が2点以上）
        msk = np.isfinite(seg_t) & np.isfinite(seg_y)
        if msk.sum() >= 2:
            beta, _alpha = np.polyfit(seg_t[msk], seg_y[msk], 1)
            out["Sweat_slope"][i] = float(beta)

    return pd.DataFrame(out, columns=["Epoch_start", "Epoch_end", "Sweat_mean", "Sweat_std", "Sweat_slope"])


def process_sweat_epoch_for_subject(base_root: str, sid: str) -> None:
    """OFFSET/{sid}_Sweat.csv を読み、30秒エポック特徴 {sid}_Sweat.csv を出力。"""
    subj_dir   = os.path.join(base_root, sid)
    offset_csv = os.path.join(subj_dir, "OFFSET", f"{sid}_Sweat.csv")
    out_csv    = os.path.join(subj_dir, "FEATURE", f"{sid}_Sweat.csv")
    os.makedirs(os.path.join(subj_dir, "FEATURE"), exist_ok=True)

    print(f"# Subject {sid} (Sweat epoch)")
    if not os.path.exists(offset_csv):
        raise FileNotFoundError(f"OFFSET not found -> {offset_csv}")

    df0 = pd.read_csv(offset_csv, encoding="utf-8-sig")
    if "Time_sec" not in df0.columns:
        raise ValueError("Time_sec column missing in Sweat CSV")

    col = _pick_sweat_col(df0)
    if col is None:
        raise ValueError("No numeric sweat-like column found in Sweat CSV")

    df = df0[["Time_sec", col]].copy()
    out_df = _epoch_aggregate_sweat(df, START_SEC, END_SEC, EPOCH_WIN, col)

    out_df.to_csv(out_csv, index=False, encoding="utf-8-sig")
    print(f"[OK]  Sweat -> {out_csv}")


def main_3e_sweat_epoch(base_root: str, subject_ids: List[str]):
    for sid in subject_ids:
        process_sweat_epoch_for_subject(base_root, sid)


# 実行例:
main_3e_sweat_epoch(BASE_DIR, SUBJECT_IDS)


In [None]:
# -*- coding: utf-8 -*-
"""
心拍数ズレ検証（HR vs WatchHRMean）— ラグ最適化一括処理

- 入力：
  C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\本実験結果\\<SID>\\FEATURE\\<SID>_HeartRate.csv
  C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\本実験結果\\<SID>\\FEATURE\\<SID>_Skinos.csv
  ※ 1行目がラベル（ヘッダ），2行目から数値．
  ※ 1列目=Epoch_start（秒），2列目=Epoch_end（秒），
     HeartRate.csvの3列目=HeartRate（bpm），Skinos.csvの6列目=watch_Heart_Rate_mean（bpm）

- 出力：C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\本実験結果\\ズレ検証\
  詳細は関数内コメント参照

- 仕様：
  * Watch（Skinos）側をシフト（+で遅らせる，-で早める）して完全一致結合
  * ラグ範囲：[-120, +120] 秒，刻み 5 秒（変更可）
  * 指標：RMSE, AbsDiff_mean など
  * 図の体裁：linewidth=1.5，フォント（Title=30, Label=24, Legend=20, Ticks=20），横軸 mm:ss

実行方法：
    python hr_watch_lag_analysis.py
"""
from __future__ import annotations

import os
import math
from dataclasses import dataclass, field
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# ======== ユーザ設定 ========
BASE_DIR = r"C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\本実験結果"
OUT_ROOT = os.path.join(BASE_DIR, "ズレ検証")

SUBJECT_IDS: List[str] = [
    "10061","10063","10064",
    "10071","10072","10073","10074",
    "10081","10082","10083",
    "10091","10092","10093","10094",
    "10101","10102","10103",
]

# ラグ探索パラメータ
LAG_MIN = -120  # 秒（watchを早める最大）
LAG_MAX = 120   # 秒（watchを遅らせる最大）
LAG_STEP = 5    # 秒
MIN_OVERLAP = 3 # この未満は評価しない
TARGET_METRIC = "RMSE"  # or "AbsDiff_mean"

# 任意：手動ラグ（既知のオフセット）を適用したい場合に指定（秒）
MANUAL_LAG_MAP: Dict[str, int] = {
    # 例: "10063": 90,
}

# 図の体裁（ユーザ規約）
LINEWIDTH = 1.5
FS_TITLE = 30
FS_LABEL = 24
FS_LEGEND = 20
FS_TICKS = 20

# ======== ユーティリティ ========

def mmss_formatter(x: float, pos: int) -> str:
    try:
        x = float(x)
    except Exception:
        return ""
    m = int(x // 60)
    s = int(x % 60)
    return f"{m:02d}:{s:02d}"


def ensure_dir(p: str) -> None:
    os.makedirs(p, exist_ok=True)


def epoch_midpoint(start: pd.Series, end: pd.Series) -> pd.Series:
    return (pd.to_numeric(start, errors="coerce") + pd.to_numeric(end, errors="coerce")) / 2.0


@dataclass
class Metrics:
    SID: str
    Lag_sec: int
    N_overlap: int
    Diff_mean: float
    Diff_median: float
    Diff_std: float
    AbsDiff_mean: float
    AbsDiff_median: float
    AbsDiff_p95: float
    RMSE: float

    def to_row(self) -> Dict[str, object]:
        return {
            "SID": self.SID,
            "Lag_sec": self.Lag_sec,
            "N_overlap": self.N_overlap,
            "Diff_mean": self.Diff_mean,
            "Diff_median": self.Diff_median,
            "Diff_std": self.Diff_std,
            "AbsDiff_mean": self.AbsDiff_mean,
            "AbsDiff_median": self.AbsDiff_median,
            "AbsDiff_p95": self.AbsDiff_p95,
            "RMSE": self.RMSE,
        }


# ======== IO ========

def path_hr_csv(sid: str) -> str:
    return os.path.join(BASE_DIR, sid, "FEATURE", f"{sid}_HeartRate.csv")


def path_watch_csv(sid: str) -> str:
    return os.path.join(BASE_DIR, sid, "FEATURE", f"{sid}_Skinos.csv")


def load_hr_csv(path: str) -> pd.DataFrame:
    """HeartRate.csv 読込（1行目がラベル，2行目から数値）
    1列目: Epoch_start, 2列目: Epoch_end, 3列目: HeartRate
    列名は存在しない可能性があるため，位置で取得する．
    """
    if not os.path.exists(path):
        raise FileNotFoundError(f"[ERROR] HR CSV not found: {path}")
    df = pd.read_csv(path, header=0)
    if df.shape[1] < 3:
        raise ValueError(f"[ERROR] HR CSV must have >=3 columns: {path}")
    out = pd.DataFrame({
        "Epoch_start": pd.to_numeric(df.iloc[:, 0], errors="coerce"),
        "Epoch_end": pd.to_numeric(df.iloc[:, 1], errors="coerce"),
        "HR": pd.to_numeric(df.iloc[:, 2], errors="coerce"),
    })
    # 欠損除外
    out = out.dropna(subset=["Epoch_start", "Epoch_end", "HR"]).reset_index(drop=True)
    return out


def load_watch_csv(path: str) -> pd.DataFrame:
    """Skinos.csv 読込（1行目がラベル，2行目から数値）
    1列目: Epoch_start, 2列目: Epoch_end, 6列目: watch_Heart_Rate_mean
    列名は存在しない可能性があるため，位置で取得する．
    """
    if not os.path.exists(path):
        raise FileNotFoundError(f"[ERROR] Skinos CSV not found: {path}")
    df = pd.read_csv(path, header=0)
    if df.shape[1] < 6:
        raise ValueError(f"[ERROR] Skinos CSV must have >=6 columns: {path}")
    out = pd.DataFrame({
        "Epoch_start": pd.to_numeric(df.iloc[:, 0], errors="coerce"),
        "Epoch_end": pd.to_numeric(df.iloc[:, 1], errors="coerce"),
        "WatchHRMean": pd.to_numeric(df.iloc[:, 5], errors="coerce"),
    })
    out = out.dropna(subset=["Epoch_start", "Epoch_end", "WatchHRMean"]).reset_index(drop=True)
    return out


# ======== コア処理 ========

def shift_epochs(df: pd.DataFrame, seconds: int) -> pd.DataFrame:
    out = df.copy()
    out["Epoch_start"] = pd.to_numeric(out["Epoch_start"], errors="coerce") + seconds
    out["Epoch_end"] = pd.to_numeric(out["Epoch_end"], errors="coerce") + seconds
    return out


def merge_on_epoch(hr_df: pd.DataFrame, watch_df: pd.DataFrame, watch_label: str) -> pd.DataFrame:
    merged = pd.merge(
        hr_df[["Epoch_start", "Epoch_end", "HR"]],
        watch_df[["Epoch_start", "Epoch_end", "WatchHRMean"]],
        on=["Epoch_start", "Epoch_end"], how="inner",
    )
    merged["Time_sec"] = epoch_midpoint(merged["Epoch_start"], merged["Epoch_end"])
    merged["Diff"] = pd.to_numeric(merged["HR"], errors="coerce") - pd.to_numeric(merged["WatchHRMean"], errors="coerce")
    merged["AbsDiff"] = merged["Diff"].abs()
    cols = ["Epoch_start", "Epoch_end", "Time_sec", "HR", "WatchHRMean", "Diff", "AbsDiff"]
    merged = merged[cols].sort_values("Time_sec").reset_index(drop=True)
    merged = merged.rename(columns={"WatchHRMean": watch_label})
    return merged


def compute_metrics(df: pd.DataFrame, sid: str, lag_sec: int) -> Metrics:
    if df.empty:
        return Metrics(sid, lag_sec, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan)
    diff = pd.to_numeric(df["Diff"], errors="coerce").dropna()
    if diff.empty:
        return Metrics(sid, lag_sec, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan)
    absdiff = diff.abs()
    rmse = float(np.sqrt(np.mean(diff.values ** 2)))
    p95 = float(np.quantile(absdiff, 0.95))
    return Metrics(
        SID=sid,
        Lag_sec=lag_sec,
        N_overlap=int(diff.shape[0]),
        Diff_mean=float(diff.mean()),
        Diff_median=float(diff.median()),
        Diff_std=float(diff.std(ddof=1)) if diff.shape[0] > 1 else 0.0,
        AbsDiff_mean=float(absdiff.mean()),
        AbsDiff_median=float(absdiff.median()),
        AbsDiff_p95=p95,
        RMSE=rmse,
    )


def select_best(metrics_df: pd.DataFrame, target_metric: str = TARGET_METRIC) -> pd.Series:
    if metrics_df.empty:
        return pd.Series(dtype=object)
    df = metrics_df.dropna(subset=[target_metric]).copy()
    if df.empty:
        return pd.Series(dtype=object)
    # 1) target_metric 最小
    df = df.sort_values([target_metric, "AbsDiff_mean", "N_overlap"], ascending=[True, True, False])
    return df.iloc[0]


# ======== 可視化 ========

def _setup_time_axis(ax, title: str) -> None:
    ax.xaxis.set_major_formatter(FuncFormatter(mmss_formatter))
    ax.set_xlabel("Time (mm:ss)", fontsize=FS_LABEL)
    ax.set_ylabel("", fontsize=FS_LABEL)
    ax.tick_params(axis='both', which='major', labelsize=FS_TICKS)
    ax.set_title(title, fontsize=FS_TITLE)


def plot_series(df: pd.DataFrame, title: str, outpng: str, hr_label: str = "HeartRate", watch_label: str = "WatchHRMean") -> None:
    if df.empty:
        return
    plt.figure(figsize=(12, 5))
    plt.plot(df["Time_sec"], df["HR"], linewidth=LINEWIDTH, label=hr_label)
    plt.plot(df["Time_sec"], df[watch_label], linewidth=LINEWIDTH, label=watch_label)
    _setup_time_axis(plt.gca(), title)
    plt.legend(fontsize=FS_LEGEND)
    plt.tight_layout()
    plt.savefig(outpng, bbox_inches="tight")
    plt.close()


def plot_diff(df: pd.DataFrame, title: str, outpng: str) -> None:
    if df.empty:
        return
    plt.figure(figsize=(12, 5))
    plt.plot(df["Time_sec"], df["Diff"], linewidth=LINEWIDTH, label="Diff = HR - Watch")
    _setup_time_axis(plt.gca(), title)
    plt.axhline(0.0, linestyle="--", linewidth=1)
    plt.legend(fontsize=FS_LEGEND)
    plt.tight_layout()
    plt.savefig(outpng, bbox_inches="tight")
    plt.close()


def plot_hist_absdiff(df: pd.DataFrame, title: str, outpng: str) -> None:
    if df.empty:
        return
    plt.figure(figsize=(8, 5))
    plt.hist(pd.to_numeric(df["AbsDiff"], errors="coerce").dropna().values, bins=10)
    plt.xlabel("Absolute Difference (bpm)", fontsize=FS_LABEL)
    plt.ylabel("Count", fontsize=FS_LABEL)
    plt.tick_params(axis='both', which='major', labelsize=FS_TICKS)
    plt.title(title, fontsize=FS_TITLE)
    plt.tight_layout()
    plt.savefig(outpng, bbox_inches="tight")
    plt.close()


# ======== メイン処理 ========

def process_sid(sid: str) -> Tuple[pd.DataFrame, pd.DataFrame, pd.Series]:
    sid_dir = os.path.join(OUT_ROOT, sid)
    d_merged = os.path.join(sid_dir, "MERGED")
    d_sweep = os.path.join(sid_dir, "LAG_SWEEP")
    d_summary = os.path.join(sid_dir, "SUMMARY")
    d_plots = os.path.join(sid_dir, "PLOTS")
    for d in (sid_dir, d_merged, d_sweep, d_summary, d_plots):
        ensure_dir(d)

    # 読込
    hr_path = path_hr_csv(sid)
    watch_path = path_watch_csv(sid)
    try:
        hr_df = load_hr_csv(hr_path)
        watch_df = load_watch_csv(watch_path)
        print(f"[OK] SID={sid} read HR: {len(hr_df)} rows, Watch: {len(watch_df)} rows")
    except Exception as e:
        print(f"[ERROR] SID={sid} read failed: {e}")
        return pd.DataFrame(), pd.DataFrame(), pd.Series(dtype=object)

    # --- ノーシフト比較 ---
    merged0 = merge_on_epoch(hr_df, watch_df, watch_label="WatchHRMean")
    merged0.to_csv(os.path.join(d_merged, "HR_vs_Watch_noShift.csv"), index=False, encoding="utf-8-sig")
    print(f"[OK] SID={sid} no-shift overlap: {len(merged0)}")

    plot_series(merged0, "HR vs WatchHRMean (no shift)", os.path.join(d_plots, "series_noShift.png"), watch_label="WatchHRMean")
    plot_diff(merged0, "Difference (no shift)", os.path.join(d_plots, "diff_noShift.png"))

    # --- ラグスイープ（watch側シフト） ---
    rows: List[Dict[str, object]] = []
    for lag in range(LAG_MIN, LAG_MAX + 1, LAG_STEP):
        w_shift = shift_epochs(watch_df, lag)
        merged = merge_on_epoch(hr_df, w_shift, watch_label="WatchHRMean_shift")
        if len(merged) < MIN_OVERLAP:
            print(f"[SKIP] SID={sid} lag={lag:+d}: overlap < {MIN_OVERLAP}")
            continue
        # 保存（行が多いときは容量注意）
        merged.to_csv(os.path.join(d_sweep, f"LAG_{lag:+d}.csv"), index=False, encoding="utf-8-sig")
        m = compute_metrics(merged, sid, lag)
        rows.append(m.to_row())
        print(f"[OK] SID={sid} lag={lag:+4d} sec overlap={m.N_overlap} RMSE={m.RMSE:.3f} AbsMean={m.AbsDiff_mean:.3f}")

    metrics_df = pd.DataFrame(rows)
    metrics_path = os.path.join(d_summary, "lag_metrics.csv")
    metrics_df.to_csv(metrics_path, index=False, encoding="utf-8-sig")

    best = select_best(metrics_df, TARGET_METRIC)
    if best.empty:
        print(f"[WARN] SID={sid} no best lag (no valid overlap)")
        return merged0, metrics_df, best

    # --- ベストラグで可視化 ---
    best_lag = int(best["Lag_sec"])  # type: ignore
    w_best = shift_epochs(watch_df, best_lag)
    merged_best = merge_on_epoch(hr_df, w_best, watch_label="WatchHRMean_best")

    plot_series(merged_best, f"HR vs WatchHRMean (best lag {best_lag:+d}s)", os.path.join(d_plots, "series_bestLag.png"), watch_label="WatchHRMean_best")
    plot_diff(merged_best, f"Difference (best lag {best_lag:+d}s)", os.path.join(d_plots, "diff_bestLag.png"))
    plot_hist_absdiff(merged_best, f"|HR - Watch| (best lag {best_lag:+d}s)", os.path.join(d_plots, "hist_absdiff_bestLag.png"))

    # --- 手動ラグ（任意） ---
    if sid in MANUAL_LAG_MAP:
        man_lag = int(MANUAL_LAG_MAP[sid])
        w_man = shift_epochs(watch_df, man_lag)
        merged_man = merge_on_epoch(hr_df, w_man, watch_label="WatchHRMean_manual")
        merged_man.to_csv(os.path.join(d_sweep, f"LAG_{man_lag:+d}_manual.csv"), index=False, encoding="utf-8-sig")
        plot_series(merged_man, f"HR vs WatchHRMean (manual lag {man_lag:+d}s)", os.path.join(d_plots, "series_manualLag.png"), watch_label="WatchHRMean_manual")
        plot_diff(merged_man, f"Difference (manual lag {man_lag:+d}s)", os.path.join(d_plots, "diff_manualLag.png"))
        plot_hist_absdiff(merged_man, f"|HR - Watch| (manual lag {man_lag:+d}s)", os.path.join(d_plots, "hist_absdiff_manualLag.png"))

    return merged0, metrics_df, best


def main() -> None:
    ensure_dir(OUT_ROOT)
    best_rows: List[pd.Series] = []

    for sid in SUBJECT_IDS:
        merged0, metrics_df, best = process_sid(sid)
        if best is not None and not isinstance(best, pd.Series):
            # guard — but select_best returns Series
            pass
        if best is not None and not best.empty:
            best_rows.append(best)

    if best_rows:
        all_best = pd.DataFrame(best_rows)
        all_best = all_best[["SID", "Lag_sec", "N_overlap", "AbsDiff_mean", "RMSE"]]
        all_best = all_best.sort_values(["SID"]).reset_index(drop=True)
        all_best.to_csv(os.path.join(OUT_ROOT, "ALL_SID_best_lag.csv"), index=False, encoding="utf-8-sig")
        print(f"[OK] saved -> {os.path.join(OUT_ROOT, 'ALL_SID_best_lag.csv')}")
    else:
        print("[WARN] No best rows generated (no overlaps across SIDs?)")


if __name__ == "__main__":
    main()


In [None]:
# -*- coding: utf-8 -*-
r"""
(4) Epoch merge — FEATURE統合（Epoch_start/Epoch_endベース → 30秒グリッド）

- FEATURE\ の CSV を "リストに列挙" し、raw/raw2 付きは除外
- 入力CSV: 1,2列目が Epoch_start/Epoch_end、3列目以降が特徴量
- 出力CSV: 1,2列目が Epoch_start/Epoch_end、3列目 FMS、4列目以降が特徴量
- HF_power / LF_power / LF_HF_ratio は 120秒系列（30秒化後、内部 NaN を bfill）
- さらに 120秒系列については「最初の30秒(1770–1800)」も“直後(1800–1830)”の値で補完
- FMS は被験者ごとのスコア列（30秒刻み）を挿入
  * 本コードでは FMS_TEXT に“スペース区切り文字列”として与えた系列を使用
  * 基準開始=1770s（=1800-30）、30s刻み、fms_text_shift_steps で時刻シフト可能（既定0）
- 出力: {BASE_DIR}\{sid}\EPOCH\{sid}_epoch.csv （上書き）
依存: numpy, pandas
"""

from __future__ import annotations
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import numpy as np
import pandas as pd

# ===================== CONFIG =====================
BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\本実験結果"

# --- 被験者IDのみ（氏名なし） ---
SUBJECT_IDS: List[str] = [
    "10041","10061","10062","10063","10064",
    "10071","10072","10073","10074",
    "10081","10082","10083","10084",
    "10091","10092","10093","10094",
    "10101","10102","10103",
]
subjects: List[Tuple[str, str]] = [(sid, "") for sid in SUBJECT_IDS]

# --- FMS（スペース区切りの 30秒刻み系列；開始=1770s） ---
FMS_TEXT: Dict[str, str] = {
    "10041": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1",
    "10061": "0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 0 2 1",
    "10062": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1",
    "10063": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
    "10064": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1",
    "10071": "0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 1 1 1 1 1",
    "10072": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
    "10073": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
    "10074": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
    "10081": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1",
    "10082": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1",
    "10083": "0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1",
    "10084": "0 0 1 0 0 0 0 0 1 1 2 2 2 2 3 3 3 3 3 3 3",
    "10091": "0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1",
    "10092": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
    "10093": "0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 2 2 3 3 4 4",
    "10094": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 2 2 2",
    "10101": "0 0 0 0 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 3 3",
    "10102": "0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2",
    "10103": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 2 2 3 3 4",
}
fms_text_base_start: int   = 1770
fms_text_step_sec: int     = 30
fms_text_shift_steps: int  = 0   # 例: -1 で 1エポック前へ

# FMS_MAP は今回は未使用（優先度: MAP > TEXT）。必要ならここに配列を入れる。
FMS_MAP: Dict[str, List[float]] = {}

# 出力グリッド（30s）。要求に合わせ 1770 開始（= 120秒系列の先頭30秒を埋めるため）
grid_start_sec: int = 1770
epoch_len_sec: int  = 30

# 120秒エポック扱いのファイル名（末尾名）
FEATURE_120S: set = {"LF_power", "HF_power", "LF_HF_ratio"}

# 上書き保存
overwrite: bool = True
# ==================================================


def list_candidate_files(feat_dir: Path) -> List[Path]:
    """FEATURE ディレクトリから統合候補CSVをリストで返す（*raw* を除外）。"""
    files = sorted([p for p in feat_dir.glob("*.csv")
                    if ("raw" not in p.stem.lower() and "raw2" not in p.stem.lower())])
    return files


def ensure_epoch_columns(df: pd.DataFrame) -> Optional[pd.DataFrame]:
    """
    先頭2列を Epoch_start / Epoch_end として解釈し、列名を揃える。
    3列目以降を特徴量とみなす。列数<3なら None。
    """
    if df is None or df.empty or df.shape[1] < 3:
        return None
    cols = list(df.columns)
    df = df.rename(columns={cols[0]: "Epoch_start", cols[1]: "Epoch_end"}).copy()
    # 整数秒に変換
    try:
        df["Epoch_start"] = pd.to_numeric(df["Epoch_start"], errors="coerce").round().astype(int)
        df["Epoch_end"]   = pd.to_numeric(df["Epoch_end"],   errors="coerce").round().astype(int)
    except Exception:
        return None
    # 単調性チェック（非減少）
    s = df["Epoch_start"].to_numpy()
    if s.size >= 2 and not (s[1:] >= s[:-1]).all():
        return None
    return df


def build_epoch_grid(start_sec: int, end_sec: int, step_sec: int) -> pd.DataFrame:
    """30秒グリッド（左閉右開）。"""
    starts = np.arange(start_sec, end_sec, step_sec, dtype=int)
    ends = starts + step_sec
    return pd.DataFrame({"Epoch_start": starts, "Epoch_end": ends}, dtype=int)


def resample_epoch_to_30s(df: pd.DataFrame, grid: pd.DataFrame) -> pd.DataFrame:
    """
    任意長エポック（30/60/120sなど）→30秒グリッドへ展開。
    ・B=[b,b+30) と E=[s,e) の重なり>0なら採用
    ・複数重なりは各列の単純平均（全NaNはNaN）
    """
    value_cols = list(df.columns)[2:]  # 3列目以降が特徴量
    out = pd.DataFrame(index=grid.index, columns=value_cols, dtype=float)

    s_arr = df["Epoch_start"].to_numpy()
    e_arr = df["Epoch_end"].to_numpy()
    vals = df[value_cols].to_numpy(dtype=float)

    grid_se = grid[["Epoch_start", "Epoch_end"]].to_numpy()
    for i, (b_start, b_end) in enumerate(grid_se):
        mask = ~((e_arr <= b_start) | (s_arr >= b_end))  # 重なり > 0
        if not mask.any():
            continue
        block = pd.DataFrame(vals[mask], columns=value_cols)
        out.iloc[i, :] = block.mean(axis=0, skipna=True).to_numpy()
    return out


def merge_features_on_grid(grid: pd.DataFrame, frames: List[Tuple[str, pd.DataFrame]]) -> pd.DataFrame:
    """30秒グリッドに各FEATURE（30秒化済）を横結合。列衝突は 'col (feat)' に退避。"""
    out = grid.copy()
    for feat_name, df30 in frames:
        for col in df30.columns:
            new_col = col
            if new_col in out.columns:
                new_col = f"{col} ({feat_name})"
            out[new_col] = df30[col].values
    return out


def bfill_internal_columns(merged: pd.DataFrame, cols: List[str]) -> None:
    """
    指定列について、最初の有効値 ~ 最後の有効値 の“内部 NaN”を bfill。
    先頭より前/末尾より後は NaN のまま。
    """
    for col in cols:
        if col not in merged.columns:
            continue
        s = merged[col]
        i0 = s.first_valid_index()
        i1 = s.last_valid_index()
        if i0 is None or i1 is None or i1 <= i0:
            continue
        merged.loc[i0:i1, col] = s.loc[i0:i1].bfill()


def front_fill_first_bin_from_next(merged: pd.DataFrame, cols: List[str]) -> List[str]:
    """
    120秒系列に限り、最初の30秒（Epoch_start==1770）の値を、
    “直後(=最初の有効値)”で上書きする（1ビンのみ）。
    """
    changed: List[str] = []
    if merged.empty:
        return changed
    if int(merged.loc[0, "Epoch_start"]) != grid_start_sec:
        return changed
    for col in cols:
        if col not in merged.columns:
            continue
        if pd.isna(merged.loc[0, col]):
            idx = merged[col].first_valid_index()
            if idx is not None:
                merged.loc[0, col] = merged.loc[idx, col]
                changed.append(col)
    return changed


# ===================== FMS helpers =====================
def _fms_series_from_text_for_grid(sid: str, grid: pd.DataFrame) -> Optional[np.ndarray]:
    """FMS_TEXT[sid] を 1770s基準+シフトで時刻化し、30sグリッドに左結合して配列で返す。"""
    txt = FMS_TEXT.get(sid)
    if not txt:
        return None
    tokens = [t for t in str(txt).strip().split() if t != ""]
    vals = np.array([float(t) for t in tokens], dtype=float)
    n = vals.size
    start0 = fms_text_base_start + fms_text_shift_steps * fms_text_step_sec
    starts = np.arange(start0, start0 + n * fms_text_step_sec, fms_text_step_sec, dtype=int)
    ends   = starts + fms_text_step_sec
    fdf = pd.DataFrame({"Epoch_start": starts, "Epoch_end": ends, "FMS": vals})
    merged = grid.merge(fdf, on=["Epoch_start", "Epoch_end"], how="left")
    return merged["FMS"].to_numpy()


def estimate_fms_end_sec(sid: str, default_end: int) -> int:
    """
    FMS_TEXT / FMS_MAP から、その被験者のFMSが占める最終 Epoch_end 秒を推定。
    見つからない場合は default_end を返す。
    """
    end_sec = default_end

    if sid in FMS_MAP and FMS_MAP[sid]:
        n = len(FMS_MAP[sid])
        if n > 0:
            end_sec = max(end_sec, grid_start_sec + n * epoch_len_sec)

    if sid in FMS_TEXT and FMS_TEXT[sid].strip():
        n = len([t for t in FMS_TEXT[sid].split() if t != ""])
        if n > 0:
            start0 = fms_text_base_start + fms_text_shift_steps * fms_text_step_sec
            end_from_text = start0 + n * fms_text_step_sec
            end_sec = max(end_sec, end_from_text)

    return end_sec


def make_fms_series_for_subject(sid: str, grid: pd.DataFrame) -> np.ndarray:
    """優先順位: FMS_MAP → FMS_TEXT → NaN."""
    length = len(grid)
    if sid in FMS_MAP and len(FMS_MAP[sid]) > 0:
        arr = np.array(FMS_MAP[sid], dtype=float).flatten()
        if arr.size < length:
            arr = np.concatenate([arr, np.full(length - arr.size, np.nan)])
        else:
            arr = arr[:length]
        print(f"[OK]  FMS -> from FMS_MAP (len={length})")
        return arr
    arr_txt = _fms_series_from_text_for_grid(sid, grid)
    if arr_txt is not None:
        print(f"[OK]  FMS -> from FMS_TEXT (len={length})")
        return arr_txt
    print(f"[OK]  FMS -> NaN (len={length})")
    return np.full(length, np.nan, dtype=float)


# ===================== MAIN =====================
def main_4(subjects: List[Tuple[str, str]] = subjects, overwrite: bool = overwrite) -> None:
    """
    工程(4) 実行：FEATURE → 30秒正規化 → 横結合 → FMS充填 → EPOCH保存。
    ログ：
      # Subject {sid}{name}
      [OK]  FEATURE_DIR -> <path>
      [OK]  LIST n files
      [OK]  {FeatureName} -> <csv path>
      [SKIP] {FeatureName}: columns mismatch or no features
      [OK]  RESAMPLE {FeatureName} -> 30s
      [OK]  BFILL(120s) -> <cols>
      [OK]  FRONT-FILL(1770) -> <cols>   （該当があれば）
      [OK]  EPOCH -> <out_csv>
      [SKIP] EPOCH: exists
    """
    base = Path(BASE_DIR)

    for sid, name in subjects:
        print(f"# Subject {sid}{name}")

        # FEATURE を優先、無ければ FEATURES を見る
        feat_dir = base / f"{sid}{name}" / "FEATURE"
        if not feat_dir.exists():
            alt = base / f"{sid}{name}" / "FEATURES"
            feat_dir = alt if alt.exists() else None

        out_dir = base / f"{sid}{name}" / "EPOCH"
        out_dir.mkdir(parents=True, exist_ok=True)
        out_csv = out_dir / f"{sid}_epoch.csv"

        if out_csv.exists() and not overwrite:
            print("[SKIP] EPOCH: exists")
            continue

        if feat_dir is None:
            print("[SKIP] FEATURES: not found")
            continue
        print(f"[OK]  FEATURE_DIR -> {str(feat_dir)}")

        # === 候補の列挙（リストに列挙） ===
        candidates: List[Path] = list_candidate_files(feat_dir)
        print(f"[OK]  LIST {len(candidates)} files")

        # === 30秒グリッドの終端を見積もり ===
        max_end = estimate_fms_end_sec(sid, default_end=grid_start_sec + epoch_len_sec)

        frames_original: List[Tuple[str, pd.DataFrame]] = []
        for p in candidates:
            feat_name = p.stem  # 例: "10041_LF_power"
            try:
                raw = pd.read_csv(p)
            except Exception:
                print(f"[SKIP] {feat_name}: read error")
                continue

            df = ensure_epoch_columns(raw)
            if df is None:
                print(f"[SKIP] {feat_name}: columns mismatch or no features")
                continue

            value_cols = list(df.columns)[2:]
            if not value_cols:
                print(f"[SKIP] {feat_name}: no feature columns (>=3rd) found")
                continue

            print(f"[OK]  {feat_name} -> {str(p)}")

            try:
                max_end = max(max_end, int(df["Epoch_end"].max()))
            except Exception:
                pass

            frames_original.append((feat_name, df))

        # === 30秒グリッド作成（1770 開始） ===
        grid = build_epoch_grid(grid_start_sec, max_end, epoch_len_sec)

        # === 30秒へリサンプル & 連結 ===
        resampled_frames: List[Tuple[str, pd.DataFrame]] = []
        cols_120s_all: List[str] = []

        for feat_name, df in frames_original:
            df30 = resample_epoch_to_30s(df, grid)
            resampled_frames.append((feat_name, df30))
            print(f"[OK]  RESAMPLE {feat_name} -> 30s")

            tail = feat_name.split("_", 1)[-1]  # 例: "LF_power"
            if tail in FEATURE_120S:
                cols_120s_all.extend(list(df30.columns))

        merged = merge_features_on_grid(grid, resampled_frames)

        # === 120秒系列：内部のみ bfill → 先頭30秒フロントフィル ===
        cols_120s_all = sorted(set([c for c in cols_120s_all if c in merged.columns]))
        if cols_120s_all:
            bfill_internal_columns(merged, cols_120s_all)
            print(f"[OK]  BFILL(120s) -> {', '.join(cols_120s_all)}")
            changed = front_fill_first_bin_from_next(merged, cols_120s_all)
            if changed:
                print(f"[OK]  FRONT-FILL(1770) -> {', '.join(changed)}")

        # === FMS 列（3列目）を注入（既存があれば差し替え）===
        if "FMS" in merged.columns:
            merged.drop(columns=["FMS"], inplace=True)
        fms_series = make_fms_series_for_subject(sid, merged[["Epoch_start", "Epoch_end"]])
        merged.insert(2, "FMS", fms_series)

        # === 保存 ===
        try:
            merged.to_csv(out_csv, index=False, encoding="utf-8-sig")
            print(f"[OK]  EPOCH -> {str(out_csv)}")
        except Exception as e:
            print(f"[SKIP] EPOCH: save failed ({e})")


if __name__ == "__main__":
    main_4()
