In [None]:
# -*- coding: utf-8 -*-
"""
処理(1): 各測定器のRAWデータを Time_sec + 値列 に正規化し、CSV/PNG を出力
- ディレクトリ: BASE_DIR/{subject_id}{person_name}/
- 出力先: 上記フォルダ内の RAW/
- サンプリング周波数は「仕様で固定」し、ファイルに含まれる時刻は使用しない
  Pulse=100Hz / Thermo=10Hz / Skinos=0.1Hz / Face=15Hz
- Skinosは "Skinos-Sweat*.csv" を自動検出（複数あれば更新日時が最新）
- Faceは「ファイル名を優先」：Bファイルの中身がA列でも、Bとして出力名・列名を統一
"""

import os
import csv
import glob
from typing import Optional, Tuple, List

import numpy as np
import pandas as pd
from scipy.io import loadmat
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter


# ===================== ユーザー設定 =====================
BASE_DIR = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果"

# 被験者一覧（この順で一括処理）
subjects: List[Tuple[str, str]] = [
    ("0521", "因幡先生"),
    ("06021", "今村さん"),
    ("06022", "梅野さん"),
    ("06271", ""),
    ("06272", ""),
    ("06273", ""),
    ("06274", ""),
    ("06275", "")
]


# サンプリング周波数（Hz）— ファイルの時刻は使わない
FS_PULSE  = 1000  # Pulse
FS_THERMO = 10.0   # Thermo
FS_SKINOS = 0.1    # Skinos (= 10s)
FS_FACE   = 15.0   # Face
# =======================================================


# ===================== 共通ユーティリティ =====================
def ensure_dir(path: str) -> None:
    """ディレクトリが無ければ作成"""
    os.makedirs(path, exist_ok=True)

def generate_time(n: int, fs_hz: float) -> np.ndarray:
    """サンプル数 n をサンプリング周波数 fs_hz に基づく等間隔時刻（秒）に変換（先頭t=0）"""
    if fs_hz <= 0:
        raise ValueError(f"fs_hz must be positive, got {fs_hz}")
    return np.arange(n, dtype=float) / fs_hz

def _fmt_mmss(x: float, _pos) -> str:
    """秒 → mm:ss（軸表示）"""
    x = float(x)
    m = int(x // 60)
    s = int(x % 60)
    return f"{m:02d}:{s:02d}"

def plot_lines(df: pd.DataFrame, title_ascii: str, out_png: str) -> None:
    """
    Time_sec + 値列を折れ線で描画しPNG保存（英数タイトル/ラベル）
    - x軸は mm:ss 表示
    """
    plt.figure(figsize=(10, 5))
    ax = plt.gca()
    for c in df.columns:
        if c == "Time_sec":
            continue
        ax.plot(df["Time_sec"], df[c], linewidth=1.5, label=c)
    ax.set_title(title_ascii, fontsize=30)
    ax.set_xlabel("Time (mm:ss)", fontsize=24)
    ax.set_ylabel("Value", fontsize=24)
    ax.tick_params(axis="both", labelsize=20)
    ax.xaxis.set_major_formatter(FuncFormatter(_fmt_mmss))
    if len(df.columns) > 2:
        ax.legend(fontsize=20)
    plt.tight_layout()
    plt.savefig(out_png, dpi=200)
    plt.close()

def finalize_df(df: pd.DataFrame) -> pd.DataFrame:
    """
    仕上げ処理：
      - Time_sec が NaN の行を除外
      - 値列が全て NaN の行を除外
      - 昇順整列（重複は想定しないが、あっても上書きしない）
    """
    df = df.dropna(subset=["Time_sec"])
    value_cols = [c for c in df.columns if c != "Time_sec"]
    if value_cols:
        df = df.dropna(how="all", subset=value_cols)
    df = df.sort_values("Time_sec").reset_index(drop=True)
    return df

def first_or_latest(pattern: str) -> Optional[str]:
    """globパターンに一致するファイルのうち、更新日時が最新のパスを返す（無ければNone）"""
    files = glob.glob(pattern)
    if not files:
        return None
    files.sort(key=lambda p: os.path.getmtime(p), reverse=True)
    return files[0]
# ============================================================


# ===================== デバイス別処理 =====================
def process_pulse_mat(mat_path: str, out_csv: str, out_png: str) -> None:
    """
    Pulse（.mat）
    - 入力: MATファイルに 'data'（1D配列）が存在すること
      * 例: 0521.mat
      * 'samplerate' は使用せず、仕様の FS_PULSE (=100Hz) で時刻生成
    - 出力: CSV(Time_sec, Pulse) + PNG
    """
    if not os.path.exists(mat_path):
        raise FileNotFoundError(mat_path)

    m = loadmat(mat_path, squeeze_me=True, struct_as_record=False)
    if "data" not in m:
        avail = [k for k in m.keys() if not k.startswith("__")]
        raise KeyError(f"Pulse MAT: 'data' がありません。available={avail}")

    x = np.ravel(m["data"]).astype(float)
    t = generate_time(len(x), FS_PULSE)

    df = pd.DataFrame({"Time_sec": t, "Pulse": x})
    df = finalize_df(df)
    df.to_csv(out_csv, index=False)
    plot_lines(df, "Pulse", out_png)


def process_thermo_csv(csv_path: str, out_csv: str, out_png: str) -> None:
    """
    Thermo（熱電対CSV）
    - 入力: ヘッダ行（'Time' を含む行）を検出して読み込む
      * 例: 0521熱電対.CSV
    - 使用列: 'U1-1[C]' -> 'Thermo1'
    - 出力: CSV(Time_sec, Thermo1) + PNG
    - 時刻は仕様 FS_THERMO (=10Hz) で生成（CSV内のTimeは使用しない）
    """
    if not os.path.exists(csv_path):
        raise FileNotFoundError(csv_path)

    header_row = None
    with open(csv_path, "r", encoding="cp932", errors="ignore") as f:
        for idx, line in enumerate(f):
            s = line.strip()
            if s.startswith('"Time"') or s.startswith("Time"):
                header_row = idx
                break
    if header_row is None:
        raise ValueError("Thermo: ヘッダ(Time, ...)が見つかりません。")

    df_raw = pd.read_csv(csv_path, header=header_row, encoding="cp932")
    if "U1-1[C]" not in df_raw.columns:
        raise KeyError(f"Thermo: 必要列 '{"U1-1[C]"}' なし。columns={list(df_raw.columns)}")

    y = pd.to_numeric(df_raw["U1-1[C]"], errors="coerce").to_numpy()
    t = generate_time(len(y), FS_THERMO)

    out = pd.DataFrame({"Time_sec": t, "Thermo1": y})
    out = finalize_df(out)
    out.to_csv(out_csv, index=False)
    plot_lines(out, "Thermo", out_png)


def process_skinos_csv(csv_path: str, out_csv: str, out_png: str) -> None:
    """
    Skinos（CSV）
    - 入力: 2行目ヘッダ（header=1）。ファイル名は "Skinos-Sweat*.csv"
    - 使用列:
        'Instance_Sweat(mg/cm^2/min)' -> 'Sweat_Rate'
        'Heart_Rate(bpm)'            -> 'Heart_Rate'
        'Skin_Temperature(degree C)' -> 'Skin_Temp'
    - 出力: CSV(Time_sec, Sweat_Rate, Heart_Rate, Skin_Temp) + PNG
    - 時刻は仕様 FS_SKINOS (=0.1Hz, 10s間隔) で生成
    """
    if not os.path.exists(csv_path):
        raise FileNotFoundError(csv_path)

    # encodingエラーは open 側で無視し、read_csv には渡さない（環境差対策）
    with open(csv_path, "r", encoding="utf-8", errors="ignore") as f:
        df_raw = pd.read_csv(f, header=1)

    need = [
        "Instance_Sweat(mg/cm^2/min)",
        "Heart_Rate(bpm)",
        "Skin_Temperature(degree C)",
    ]
    for c in need:
        if c not in df_raw.columns:
            raise KeyError(f"Skinos: 必要列 '{c}' なし。columns={list(df_raw.columns)}")

    sweat = pd.to_numeric(df_raw[need[0]], errors="coerce").to_numpy()
    hr    = pd.to_numeric(df_raw[need[1]], errors="coerce").to_numpy()
    temp  = pd.to_numeric(df_raw[need[2]], errors="coerce").to_numpy()

    t = generate_time(len(df_raw), FS_SKINOS)

    out = pd.DataFrame({
        "Time_sec": t,
        "Sweat_Rate": sweat,
        "Heart_Rate": hr,
        "Skin_Temp": temp
    })
    out = finalize_df(out)
    out.to_csv(out_csv, index=False)
    plot_lines(out, "Skinos", out_png)


def process_face_table_csv(csv_path: str, expected_prefix: str, out_csv: str, out_png: str,
                           encoding: str = "cp932") -> None:
    """
    FaceA/FaceB（表形式CSV）
    - 入力: 先頭にメタ行、ヘッダは「番号/測定日付/測定時間/...」
      * 例: {subject_id}A.CSV / {subject_id}B.CSV
    - 抽出列（ファイルの中身はA/Bどちらでも可）:
        'BOX A/B MAX.' -> '{expected_prefix}_BoxMax'
        'BOX A/B AVE.' -> '{expected_prefix}_BoxAve'
      ※ 'expected_prefix' は 'FaceA' または 'FaceB' を指定。
         列自体は内容に合わせて A または B を抽出するが、
         **出力名は expected_prefix に揃える**（「ファイル名優先」）。
    - 出力: CSV(Time_sec, {prefix}_BoxMax, {prefix}_BoxAve) + PNG
    - 時刻は仕様 FS_FACE (=15Hz) で生成
    """
    if not os.path.exists(csv_path):
        raise FileNotFoundError(csv_path)
    if expected_prefix not in ("FaceA", "FaceB"):
        raise ValueError("expected_prefix must be 'FaceA' or 'FaceB'.")

    # ヘッダ行検出
    header_row = None
    with open(csv_path, "r", encoding=encoding, errors="ignore") as f:
        for idx, line in enumerate(f):
            if ("番号" in line) and ("測定日付" in line) and ("測定時間" in line):
                header_row = idx
                break
    if header_row is None:
        raise ValueError(f"{csv_path}: ヘッダ行が見つかりません。")

    # クォート崩れ対策: QUOTE_NONE
    df_raw = pd.read_csv(csv_path, header=header_row, encoding=encoding,
                         engine="python", quoting=csv.QUOTE_NONE)

    # 中身の A/B 列を検出。まず expected を優先して探し、無ければもう一方を使う
    want_letter = expected_prefix[-1]  # 'A' or 'B'
    alt_letter  = "B" if want_letter == "A" else "A"

    def has_cols(letter: str) -> bool:
        return {f"BOX {letter} MAX.", f"BOX {letter} AVE."}.issubset(df_raw.columns)

    if has_cols(want_letter):
        src_letter = want_letter
    elif has_cols(alt_letter):
        src_letter = alt_letter
    else:
        raise KeyError(f"{csv_path}: Faceの必要列が見つかりません。columns={list(df_raw.columns)}")

    max_vals = pd.to_numeric(df_raw[f"BOX {src_letter} MAX."], errors="coerce").to_numpy()
    ave_vals = pd.to_numeric(df_raw[f"BOX {src_letter} AVE."], errors="coerce").to_numpy()

    n = min(len(max_vals), len(ave_vals))
    t = generate_time(n, FS_FACE)

    out = pd.DataFrame({
        "Time_sec": t,
        f"{expected_prefix}_BoxMax": max_vals[:n],
        f"{expected_prefix}_BoxAve": ave_vals[:n],
    })
    out = finalize_df(out)
    out.to_csv(out_csv, index=False)
    plot_lines(out, expected_prefix, out_png)

# --- ログ出力ヘルパ ---
def log_subject_header(subject_id: str, person_name: str) -> None:
    print(f"# Subject {subject_id}{person_name}")

def log_ok(device: str, out_csv: str) -> None:
    # 例: [OK]  Pulse -> C:\...\0521_Pulse.csv
    print(f"[OK]  {device} -> {out_csv}")

def log_skip(device: str, reason: str) -> None:
    # 例: [SKIP] Thermo: file not found ...
    print(f"[SKIP] {device}: {reason}")

# ============================================================


# ===================== メイン処理 =====================
def main_1() -> None:
    for subject_id, person_name in subjects:
        subject_dir = os.path.join(BASE_DIR, f"{subject_id}{person_name}")
        raw_dir     = os.path.join(subject_dir, "RAW")
        ensure_dir(raw_dir)

        log_subject_header(subject_id, person_name)

        # 入力ファイル
        path_pulse  = os.path.join(subject_dir, f"{subject_id}.mat")
        path_thermo = os.path.join(subject_dir, f"{subject_id}.CSV")
        path_faceA  = os.path.join(subject_dir, f"{subject_id}A.CSV")
        path_faceB  = os.path.join(subject_dir, f"{subject_id}B.CSV")
        path_skinos = first_or_latest(os.path.join(subject_dir, "Skinos-Sweat*.csv"))

        # 出力（RAW配下）
        out_pulse_csv  = os.path.join(raw_dir, f"{subject_id}_Pulse.csv")
        out_pulse_png  = os.path.join(raw_dir, f"{subject_id}_Pulse.png")
        out_thermo_csv = os.path.join(raw_dir, f"{subject_id}_Thermo.csv")
        out_thermo_png = os.path.join(raw_dir, f"{subject_id}_Thermo.png")
        out_skinos_csv = os.path.join(raw_dir, f"{subject_id}_Skinos.csv")
        out_skinos_png = os.path.join(raw_dir, f"{subject_id}_Skinos.png")
        out_faceA_csv  = os.path.join(raw_dir, f"{subject_id}_FaceA.csv")
        out_faceA_png  = os.path.join(raw_dir, f"{subject_id}_FaceA.png")
        out_faceB_csv  = os.path.join(raw_dir, f"{subject_id}_FaceB.csv")
        out_faceB_png  = os.path.join(raw_dir, f"{subject_id}_FaceB.png")

        # Pulse
        if os.path.exists(path_pulse):
            try:
                process_pulse_mat(path_pulse, out_pulse_csv, out_pulse_png)
                log_ok("Pulse", out_pulse_csv)
            except Exception as e:
                log_skip("Pulse", str(e))
        else:
            log_skip("Pulse", f"file not found: {path_pulse}")

        # Thermo
        if os.path.exists(path_thermo):
            try:
                process_thermo_csv(path_thermo, out_thermo_csv, out_thermo_png)
                log_ok("Thermo", out_thermo_csv)
            except Exception as e:
                log_skip("Thermo", str(e))
        else:
            log_skip("Thermo", f"file not found: {path_thermo}")

        # Skinos
        if path_skinos and os.path.exists(path_skinos):
            try:
                process_skinos_csv(path_skinos, out_skinos_csv, out_skinos_png)
                log_ok("Skinos", out_skinos_csv)
            except Exception as e:
                log_skip("Skinos", str(e))
        else:
            log_skip("Skinos", "file not found (Skinos-Sweat*.csv)")

        # FaceA
        if os.path.exists(path_faceA):
            try:
                process_face_table_csv(path_faceA, "FaceA", out_faceA_csv, out_faceA_png)
                log_ok("FaceA", out_faceA_csv)
            except Exception as e:
                log_skip("FaceA", str(e))
        else:
            log_skip("FaceA", f"file not found: {path_faceA}")

        # FaceB
        if os.path.exists(path_faceB):
            try:
                process_face_table_csv(path_faceB, "FaceB", out_faceB_csv, out_faceB_png)
                log_ok("FaceB", out_faceB_csv)
            except Exception as e:
                log_skip("FaceB", str(e))
        else:
            log_skip("FaceB", f"file not found: {path_faceB}")

    print("\nAll done.")


# エントリポイント
if __name__ == "__main__":
    main_1()


# Subject 0521因幡先生
[OK]  Pulse -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\RAW\0521_Pulse.csv
[OK]  Thermo -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\RAW\0521_Thermo.csv
[OK]  Skinos -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\RAW\0521_Skinos.csv
[OK]  FaceA -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\RAW\0521_FaceA.csv
[OK]  FaceB -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\RAW\0521_FaceB.csv
# Subject 06021今村さん
[OK]  Pulse -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\RAW\06021_Pulse.csv
[OK]  Thermo -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\RAW\06021_Thermo.csv
[OK]  Skinos -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\RAW\06021_Skinos.csv
[OK]  FaceA -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\RAW\06021_FaceA.csv
[OK]  FaceB -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研

# 処理(1) — RAWデータ整形・保存

## 目的
各測定器の生データを **Time_sec + 測定値列** に正規化し、被験者フォルダ内の `RAW` に  
**CSV** と **PNGグラフ** を出力する。

## 入出力の前提
- 入力ディレクトリ: `BASE_DIR/{subject_id}{person_name}/`
- 出力ディレクトリ: 同フォルダ内 `RAW/`
- サンプリング周波数（**内製**・ファイルの時刻は使わない）
  - Pulse: **100 Hz**
  - Thermo: **10 Hz**
  - Skinos: **0.1 Hz**（=10秒間隔）
  - Face: **15 Hz**

## 想定ファイル名（例）
- Pulse（MAT）: `0521.mat`
- Thermo（CSV）: `0521.CSV`
- Skinos（CSV）: `Skinos-Sweat*.csv`（フォルダ内の**最新**を自動選択）
- FaceA（CSV）: `0521A.CSV`
- FaceB（CSV）: `0521B.CSV`

## 出力ファイル（RAW配下）
- `{subject_id}_Pulse.csv/png`（列: `Time_sec`, `Pulse`）
- `{subject_id}_Thermo.csv/png`（列: `Time_sec`, `Thermo1`）
- `{subject_id}_Skinos.csv/png`（列: `Time_sec`, `Sweat_Rate`, `Heart_Rate`, `Skin_Temp`）
- `{subject_id}_FaceA.csv/png`（列: `Time_sec`, `FaceA_BoxMax`, `FaceA_BoxAve`）
- `{subject_id}_FaceB.csv/png`（列: `Time_sec`, `FaceB_BoxMax`, `FaceB_BoxAve`）

## 処理の流れ（全体）
1. 被験者リスト `subjects` をループ。
2. 各フォルダ内のファイルを読み取り（Skinosは `Skinos-Sweat*.csv` の**最新**を選択）。
3. **Time_sec は t=0 からサンプリングレートで等間隔生成**（ファイル内の時刻は使用しない）。
4. 必要列を数値化（`errors="coerce"`）、DataFrame作成。
5. 仕上げ（`finalize_df`：NaN整理・昇順）→ CSV保存 → 簡易PNGグラフ保存。
6. 例外はログ出力して次の被験者へ継続。

## 関数一覧（要点）
- `generate_time(n, fs)`: サンプル数 `n` を **fs(Hz)** に基づいて 0,1/fs,… の配列に変換。
- `process_pulse_mat(...)`: MATの`data`を読み、**100Hz**で`Time_sec`生成。
- `process_thermo_csv(...)`: `U1-1[C]`列を`Thermo1`として採用、**10Hz**で`Time_sec`生成。
- `process_skinos_csv(...)`: `Instance_Sweat/Heart_Rate/Skin_Temperature` を採用、**0.1Hz**で生成。
- `process_face_table_csv(..., expected_prefix)`: ヘッダ検出→`BOX A/B MAX./AVE.`抽出。  
  **ファイル名優先**で `FaceA_*/FaceB_*` にリネームして保存（中身がAでもBとして出力可能）。
- `plot_lines(...)`: `Time_sec`を横軸に複数列を重ね書き、PNG出力（x軸は mm:ss 表示）。
- `finalize_df(...)`: 欠損整理・昇順整列（重複は想定しないが安全策）。
- `first_or_latest(pattern)`: パターン一致ファイルの**最新**を返す。


In [2]:
"""
Offset RAW Time_sec per subject & per sensor, then save CSVs to OFFSET and output PNG charts.

仕様の要点:
- 入力: BASE_DIR/{subject_id}{person_name}/RAW/*.csv
- 出力: BASE_DIR/{subject_id}{person_name}/OFFSET/*.csv (+ .png)
- Skinos は処理対象外（ベースのため）
- FaceB は FaceA と同じオフセットを自動適用
- グラフは mm:ss 軸, 線幅1.5, 1500-2100秒を薄赤帯+赤ハッチでハイライト
- ログ形式（オフセット値は出さないこと）:
    # Subject {subject_id}{person_name}
    [OK]  {Device} -> {output_csv_path}
    [SKIP] {Device}: {reason}

使い方:
1) BASE_DIR と subjects を編集（被験者IDと氏名）
2) OFFSETS を編集（Thermoは全て0.0、FaceBは書かずFaceAを使う）
3) 必要に応じて MAKE_PLOTS / CLAMP_MIN_ZERO / HILIGHT_START/END を調整
4) 実行
"""

from typing import List, Tuple, Dict, Any
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib.transforms import blended_transform_factory
from matplotlib.patches import Rectangle

# ===================== ユーザー設定 =====================
BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果"

# 被験者一覧（この順で一括処理）
subjects: List[Tuple[str, str]] = [
    ("0521",  "因幡先生"),
    ("06021", "今村さん"),
    ("06022", "梅野さん"),
    ("06271", ""),
    ("06272", ""),
    ("06273", ""),
    ("06274", ""),
    ("06275", ""),
]

# 被験者ごと×生体信号のオフセット（秒）
# - Thermo はすべて 0.0
# - FaceB は FaceA と同一オフセットを自動適用（OFFSETSに記述不要）
OFFSETS: Dict[str, Dict[str, float]] = {
    "0521":  {"Pulse": 1230.0, "Thermo": 0.0, "FaceA": 1350.0},
    "06021": {"Pulse": 1380.0, "Thermo": 0.0, "FaceA": 1350.0},
    "06022": {"Pulse": 1350.0, "Thermo": 0.0, "FaceA": 1320.0},
    "06271": {"Pulse": 1380.0, "Thermo": 0.0, "FaceA": 1410.0},
    "06272": {"Pulse": 1380.0, "Thermo": 0.0, "FaceA": 1320.0},
    "06273": {"Pulse": 1470.0, "Thermo": 0.0, "FaceA": 1320.0},
    "06274": {"Pulse": 1350.0, "Thermo": 0.0, "FaceA": 1380.0},
    "06275": {"Pulse": 1380.0, "Thermo": 0.0, "FaceA": 1410.0},
}

# 出力オプション
MAKE_PLOTS: bool = True          # TrueでPNG出力
CLAMP_MIN_ZERO: bool = False     # Trueでオフセット後の最小時刻を0に再平行移動
HILIGHT_START: float = 1500.0    # ハイライト開始（秒）
HILIGHT_END: float = 2100.0      # ハイライト終了（秒）

# ===================== センサ設定（順序保持） =====================
# 注意: Skinos は処理対象外（enabled=False）だがログ順を揃えるためエントリは残す
SENSORS: List[Dict[str, Any]] = [
    {
        "name": "Pulse",
        "enabled": True,
        "file": "{sid}_Pulse.csv",
        "y_cols": ["Pulse"],
        "title": "Pulse"  # ASCII only
    },
    {
        "name": "Thermo",
        "enabled": True,
        "file": "{sid}_Thermo.csv",
        "y_cols": ["Thermo1"],
        "title": "Thermo"  # ASCII only
    },
    {
        "name": "Skinos",
        "enabled": False,  # ベースのため処理対象外
        "file": "{sid}_Skinos.csv",
        "y_cols": ["Sweat_Rate", "Heart_Rate", "Skin_Temp"],
        "title": "Skinos"  # ASCII only
    },
    {
        "name": "FaceA",
        "enabled": True,
        "file": "{sid}_FaceA.csv",
        "y_cols": ["FaceA_BoxMax", "FaceA_BoxAve"],
        "title": "FaceA"  # ASCII only
    },
    {
        "name": "FaceB",
        "enabled": True,
        "file": "{sid}_FaceB.csv",  # 短い形式で固定
        "y_cols": ["FaceB_BoxMax", "FaceB_BoxAve"],
        "title": "FaceB"  # ASCII only
    },
]

TIME_COL: str = "Time_sec"  # CSVの時刻列名（固定）

# ===================== ユーティリティ =====================
def read_csv_robust(path: str) -> pd.DataFrame:
    """CSVをエンコーディング順に試しながら読み込む．"""
    for enc in ("utf-8-sig", "utf-8", "cp932"):
        try:
            return pd.read_csv(path, encoding=enc)
        except Exception:
            continue
    # 最終フォールバック（未知の混在対策）
    return pd.read_csv(path, encoding_errors="ignore")

def ensure_dir(path: str) -> None:
    """存在しない場合はディレクトリを作成する．"""
    os.makedirs(path, exist_ok=True)

def mmss_formatter(x: float, pos: int) -> str:
    """秒を mm:ss 形式文字列に整形（負数にも対応）．"""
    sign = "-" if x < 0 else ""
    x = abs(x)
    m = int(x // 60)
    s = int(round(x - m * 60))
    if s == 60:
        m += 1
        s = 0
    return f"{sign}{m:02d}:{s:02d}"

def apply_offset(df: pd.DataFrame, offset_sec: float, clamp_min_zero: bool) -> pd.DataFrame:
    """`Time_sec` に offset_sec を加算し，必要に応じて最小値を0に合わせる．"""
    if TIME_COL not in df.columns:
        raise ValueError(f"{TIME_COL} not found: columns={list(df.columns)}")
    out = df.copy()
    out[TIME_COL] = pd.to_numeric(out[TIME_COL], errors="coerce") + float(offset_sec)
    if clamp_min_zero:
        min_t = out[TIME_COL].min()
        if pd.notna(min_t) and min_t != 0:
            out[TIME_COL] = out[TIME_COL] - min_t
    return out

def add_red_hatched_band(ax, x0: float, x1: float) -> None:
    """x軸の区間 [x0, x1] を薄赤帯+赤ハッチでハイライトする．"""
    # 薄赤の帯（背景）
    ax.axvspan(x0, x1, color="red", alpha=0.08, zorder=0)
    # 赤い網掛け（ハッチ） - 軸高全体に貼る
    trans = blended_transform_factory(ax.transData, ax.transAxes)
    rect = Rectangle((x0, 0), x1 - x0, 1,
                     transform=trans, fill=False, hatch="////",
                     edgecolor="red", linewidth=0.0, zorder=1, alpha=0.5)
    ax.add_patch(rect)

def plot_timeseries(df: pd.DataFrame,
                    y_cols: List[str],
                    out_png: str,
                    title: str,
                    hilight_range: Tuple[float, float]) -> None:
    """折れ線＋ハイライト帯のPNGを保存する．"""
    plt.figure(figsize=(10, 5))
    ax = plt.gca()

    # 実データ線（存在する列のみ）
    for c in y_cols:
        if c in df.columns:
            ax.plot(df[TIME_COL], pd.to_numeric(df[c], errors="coerce"),
                    label=c, linewidth=1.5)

    # 1500〜2100秒の帯
    x0, x1 = hilight_range
    add_red_hatched_band(ax, x0, x1)

    # 体裁（ASCIIのみ）
    ax.set_title(title, fontsize=30)
    ax.set_xlabel("Time (mm:ss)", fontsize=24)
    ax.set_ylabel("Value", fontsize=24)
    ax.xaxis.set_major_formatter(FuncFormatter(mmss_formatter))
    ax.tick_params(axis="both", labelsize=20)
    ax.legend(fontsize=20)

    plt.tight_layout()
    plt.savefig(out_png, dpi=200)
    plt.close()

def resolve_offset(per_sensor_offset: Dict[str, float], sensor_name: str) -> float:
    """FaceBはFaceAの値を返し，その他は該当センサの値（無ければ0.0）を返す．"""
    if sensor_name == "FaceB":
        return float(per_sensor_offset.get("FaceA", 0.0))
    return float(per_sensor_offset.get(sensor_name, 0.0))

# ===================== コア処理 =====================
def process_sensor(raw_dir: str,
                   out_dir: str,
                   sensor_cfg: Dict[str, Any],
                   sid: str,
                   per_sensor_offset: Dict[str, float]) -> str:
    """
    1センサ分の処理を実行し，ログ行を返す．
    成功: [OK]  {Device} -> {output_csv_path}
    スキップ: [SKIP] {Device}: {reason}
    """
    name = sensor_cfg["name"]

    # 無効センサ（Skinos）はログのみ出してスキップ
    if not sensor_cfg.get("enabled", True):
        return f"[SKIP] {name}: skipped by config"

    in_csv = os.path.join(raw_dir, sensor_cfg["file"].format(sid=sid))
    if not os.path.exists(in_csv):
        return f"[SKIP] {name}: not found -> {in_csv}"

    try:
        # 読み込み
        df = read_csv_robust(in_csv)

        # オフセット解決（FaceBはFaceAを継承）
        offset_val = resolve_offset(per_sensor_offset, name)

        # 適用
        df2 = apply_offset(df, offset_val, CLAMP_MIN_ZERO)

        # 出力
        ensure_dir(out_dir)
        out_csv = os.path.join(out_dir, os.path.basename(in_csv))
        df2.to_csv(out_csv, index=False)

        # 可視化
        if MAKE_PLOTS:
            out_png = os.path.splitext(out_csv)[0] + ".png"
            plot_timeseries(
                df=df2,
                y_cols=sensor_cfg["y_cols"],
                out_png=out_png,
                title=sensor_cfg["title"],
                hilight_range=(HILIGHT_START, HILIGHT_END),
            )

        # 成功ログ（※オフセット値は出さない）
        return f"[OK]  {name} -> {out_csv}"

    except Exception as e:
        # 例外時も [SKIP] に統一（ユーザー規約：OK/ SKIPのみ）
        return f"[SKIP] {name}: {e}"

def process_subject(sid: str, name: str, offsets_all: Dict[str, Dict[str, float]]) -> str:
    """
    被験者1名分を所定順に処理し，ログ文字列を返す．
    ログ先頭に '# Subject {sid}{name}' を付す．
    """
    subj_dir = os.path.join(BASE_DIR, f"{sid}{name}")
    raw_dir = os.path.join(subj_dir, "RAW")
    out_dir = os.path.join(subj_dir, "OFFSET")

    per_sensor_offset = offsets_all.get(sid, {})  # 無指定は空→0.0扱い
    logs = [f"# Subject {sid}{name}"]

    for sensor_cfg in SENSORS:
        logs.append(process_sensor(raw_dir, out_dir, sensor_cfg, sid, per_sensor_offset))

    return "\n".join(logs)

def main() -> None:
    """subjects を順に処理してログをまとめて出力する．"""
    all_logs: List[str] = []
    for sid, person_name in subjects:
        all_logs.append(process_subject(sid, person_name, OFFSETS))
    print("\n".join(all_logs))

if __name__ == "__main__":
    main()


# Subject 0521因幡先生
[OK]  Pulse -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\OFFSET\0521_Pulse.csv
[OK]  Thermo -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\OFFSET\0521_Thermo.csv
[SKIP] Skinos: skipped by config
[OK]  FaceA -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\OFFSET\0521_FaceA.csv
[OK]  FaceB -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\OFFSET\0521_FaceB.csv
# Subject 06021今村さん
[OK]  Pulse -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\OFFSET\06021_Pulse.csv
[OK]  Thermo -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\OFFSET\06021_Thermo.csv
[SKIP] Skinos: skipped by config
[OK]  FaceA -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\OFFSET\06021_FaceA.csv
[OK]  FaceB -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\OFFSET\06021_FaceB.csv
# Subject 06022梅野さん
[OK]  Pulse -> C:\Users\taiki\OneDrive - Science Tokyo\

# 処理(2) — OFFSET追加・保存

## 目的
- `BASE_DIR/{subject_id}{person_name}/RAW/*.csv` の `Time_sec` に **被験者ごと・生体信号ごとのオフセット（秒）** を加算し，`OFFSET/` に保存する．
- グラフ（PNG）も出力し，**1500〜2100秒** の区間を **薄赤の帯＋赤い網掛け** でハイライトする．
- ログは被験者ごとにヘッダ行 `# Subject {subject_id}{person_name}` を出し，各デバイスの成否を  
  `[OK] {Device} -> {output_csv_path}` / `[SKIP] {Device}: {reason}` で出力する（※オフセット値はログに出さない）．

## 入出力の前提
- 入力：`{sid}_Pulse.csv`, `{sid}_Thermo.csv`, `{sid}_FaceA.csv`, `{sid}_FaceB.csv`（FaceBは短い形式で固定）  
  ※Skinosはベースのため**処理対象から除外**
- 出力：`OFFSET/` 配下に **同名CSV** を保存（必要に応じて同名 `.png` も出力）

## オフセット規則
- `OFFSETS` に **被験者ID → {Pulse, Thermo, FaceA}** の秒数を直接記述する（Thermoは全て0.0）．
- **FaceB は FaceA と同じオフセット** を自動適用する．
- 設定が無い場合は **0.0秒** とみなす．
- オプション `CLAMP_MIN_ZERO=True` で，オフセット後の最小時刻を **0** に再平行移動可．

## 可視化の規約
- 線の太さ：`linewidth=1.5`
- フォント：タイトル30，軸24，凡例20，目盛20
- タイトル・ラベルは **半角英数字** のみ
- X軸は **mm:ss** 表示（内部は秒）
- ハイライト帯：**1500〜2100秒** を **薄い赤の帯＋赤ハッチ** で表示

## 関数一覧
- `read_csv_robust(path)`  
  文字コードを順に試す安全読み込み（`utf-8-sig`→`utf-8`→`cp932`→`ignore`）．
- `ensure_dir(path)`  
  保存先ディレクトリを作成（存在すれば何もしない）．
- `mmss_formatter(x, pos)`  
  秒を **mm:ss** に整形する Matplotlib 用フォーマッタ．
- `apply_offset(df, offset, clamp_min_zero)`  
  `Time_sec` を数値化してオフセットを加算．`clamp_min_zero=True` なら最小値を0に再平行移動．
- `add_red_hatched_band(ax, x0, x1)`  
  指定区間 `[x0, x1]` に薄赤帯＋赤ハッチを描画．
- `plot_timeseries(df, y_cols, out_png, title, hilight_range)`  
  折れ線（既存列のみ）を描き，ハイライト帯を重ねてPNG保存．
- `resolve_offset(per_sensor_offset, sensor)`  
  FaceBの場合はFaceAの値を返却，それ以外は該当センサの値（無ければ0.0）．
- `process_sensor(raw_dir, out_dir, sensor_cfg, sid, per_sensor_offset)`  
  1センサ分の入出力と可視化を実施し，ログ文を返す（成功は `[OK]`，失敗/無入力は `[SKIP]`）．
- `process_subject(sid, name, offsets)`  
  被験者1名分を所定順（Pulse→Thermo→Skinos→FaceA→FaceB）で処理し，ログをまとめて返す．  
  ※Skinosは **設定で無効化** とし `[SKIP] Skinos: skipped by config` を出す．
- `main()`  
  `subjects` を順に処理してログを標準出力にまとめて表示．


In [3]:
# -*- coding: utf-8 -*-
"""
(3) Feature Engineering - FaceTemp
---------------------------------
- 入力: {BASE_DIR}\{sid}{person_name}\OFFSET\{sid}_FaceA.csv, {sid}_FaceB.csv
- 出力: {BASE_DIR}\{sid}{person_name}\FEATURE\{sid}_FaceTemp.csv (UTF-8-SIG)
- ログ:
    # Subject {sid}{person_name}
    [OK]  FaceTemp -> {output_csv_path}
    または
    [SKIP] FaceTemp: {reason}
- 結合: Time_sec の inner join（重複は Time_sec で平均化）
- 出力列:
    Time_sec
    FaceTemp_Max       = (FaceA_Max + FaceB_Max) / 2
    FaceTemp_Mean      = (FaceA_Mean + FaceB_Mean) / 2
    FaceTemp_Max_Diff  = abs(FaceA_Max  - FaceB_Max)
    FaceTemp_Mean_Diff = abs(FaceA_Mean - FaceB_Mean)
"""
import os
import re
from typing import List, Tuple
import pandas as pd


# ===== ユーザー指定 =====
BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果"

# 被験者一覧（この順で一括処理）
subjects: List[Tuple[str, str]] = [
    ("0521",  "因幡先生"),
    ("06021", "今村さん"),
    ("06022", "梅野さん"),
    ("06271", ""),
    ("06272", ""),
    ("06273", ""),
    ("06274", ""),
    ("06275", ""),
]


def _normalize_colname(col: str) -> str:
    """小文字化 + 句読点/空白簡約"""
    s = col.lower()
    s = re.sub(r"[．。]", ".", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s


def _select_face_columns(df: pd.DataFrame, box_hint: str):
    """
    Face系CSVから Max/Mean 列を自動検出する。
    優先: 'box a' or 'box b' を含む列（box_hintで指定）。
    フォールバック: 'max' / 'ave|mean' を含む列。
    戻り値: (max_col, mean_col)
    失敗時: ValueError
    """
    norm = {c: _normalize_colname(c) for c in df.columns}

    def pick(cands, keywords_all):
        return [c for c in cands if all(k in norm[c] for k in keywords_all)]

    cols = list(df.columns)
    # box_hint優先で探索
    max_candidates = pick(cols, [f"box {box_hint}", "max"])
    mean_candidates = pick(cols, [f"box {box_hint}", "ave"]) or pick(cols, [f"box {box_hint}", "mean"])

    # フォールバック
    if not max_candidates:
        max_candidates = pick(cols, ["max"])
    if not mean_candidates:
        mean_candidates = pick(cols, ["ave"]) or pick(cols, ["mean"])

    max_col = max_candidates[0] if max_candidates else None
    mean_col = mean_candidates[0] if mean_candidates else None

    if max_col is None or mean_col is None:
        raise ValueError(f"required columns not found (max/mean). columns={list(df.columns)}")

    return max_col, mean_col


def _prepare_face_df(df: pd.DataFrame, label: str, box_hint: str) -> pd.DataFrame:
    """
    FaceA/BのDataFrameから Time_sec, Face{label}_Max, Face{label}_Mean に整形。
    - Time_secで重複平均化
    """
    if "Time_sec" not in df.columns:
        raise ValueError("Time_sec column missing")

    max_col, mean_col = _select_face_columns(df, box_hint=box_hint)

    # 数値変換
    for c in [max_col, mean_col]:
        df[c] = pd.to_numeric(df[c], errors="coerce")

    keep = ["Time_sec", max_col, mean_col]
    tmp = df[keep].copy()
    tmp = tmp.groupby("Time_sec", as_index=False).mean(numeric_only=True)

    rename_map = {
        max_col: f"Face{label}_Max",
        mean_col: f"Face{label}_Mean",
    }
    tmp = tmp.rename(columns=rename_map)
    return tmp[["Time_sec", f"Face{label}_Max", f"Face{label}_Mean"]]


def merge_and_compute_face_features(dfA: pd.DataFrame, dfB: pd.DataFrame) -> pd.DataFrame:
    """FaceA/Bを結合して FaceTemp 特徴量を生成"""
    merged = pd.merge(dfA, dfB, on="Time_sec", how="inner", validate="one_to_one")
    if merged.empty:
        raise ValueError("no overlapping Time_sec between FaceA and FaceB")

    out = pd.DataFrame({
        "Time_sec": merged["Time_sec"],
        "FaceTemp_Max": (merged["FaceA_Max"] + merged["FaceB_Max"]) / 2.0,
        "FaceTemp_Mean": (merged["FaceA_Mean"] + merged["FaceB_Mean"]) / 2.0,
        "FaceTemp_Max_Diff": (merged["FaceA_Max"] - merged["FaceB_Max"]).abs(),
        "FaceTemp_Mean_Diff": (merged["FaceA_Mean"] - merged["FaceB_Mean"]).abs(),
    })
    return out[["Time_sec", "FaceTemp_Max", "FaceTemp_Mean", "FaceTemp_Max_Diff", "FaceTemp_Mean_Diff"]]


def process_face_temp_for_subject(base_root: str, sid: str, person_name: str) -> None:
    """1被験者分を処理してログを出力"""
    subject_dir = os.path.join(base_root, f"{sid}{person_name}")
    offset_dir = os.path.join(subject_dir, "OFFSET")
    feature_dir = os.path.join(subject_dir, "FEATURE")
    os.makedirs(feature_dir, exist_ok=True)

    path_A = os.path.join(offset_dir, f"{sid}_FaceA.csv")
    path_B = os.path.join(offset_dir, f"{sid}_FaceB.csv")
    out_path = os.path.join(feature_dir, f"{sid}_FaceTemp.csv")

    print(f"# Subject {sid}{person_name}")
    if not os.path.exists(path_A):
        print(f"[SKIP] FaceTemp: FaceA CSV not found -> {path_A}")
        return
    if not os.path.exists(path_B):
        print(f"[SKIP] FaceTemp: FaceB CSV not found -> {path_B}")
        return

    try:
        dfA = pd.read_csv(path_A, encoding="utf-8-sig")
        dfB = pd.read_csv(path_B, encoding="utf-8-sig")
    except Exception as e:
        print(f"[SKIP] FaceTemp: failed to read CSVs ({e})")
        return

    try:
        dfA_prep = _prepare_face_df(dfA, label="A", box_hint="a")
        dfB_prep = _prepare_face_df(dfB, label="B", box_hint="b")
    except Exception as e:
        print(f"[SKIP] FaceTemp: {e}")
        return

    try:
        out = merge_and_compute_face_features(dfA_prep, dfB_prep)
        out.to_csv(out_path, index=False, encoding="utf-8-sig")
        print(f"[OK]  FaceTemp -> {out_path}")
    except Exception as e:
        print(f"[SKIP] FaceTemp: {e}")


def main_3():
    """処理③: FaceTemp特徴量設計（一括処理）"""
    for sid, person_name in subjects:
        process_face_temp_for_subject(BASE_DIR, sid, person_name)


if __name__ == "__main__":
    main_3()


# Subject 0521因幡先生


  """


[OK]  FaceTemp -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\0521_FaceTemp.csv
# Subject 06021今村さん
[OK]  FaceTemp -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\FEATURE\06021_FaceTemp.csv
# Subject 06022梅野さん
[OK]  FaceTemp -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06022梅野さん\FEATURE\06022_FaceTemp.csv
# Subject 06271
[OK]  FaceTemp -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06271\FEATURE\06271_FaceTemp.csv
# Subject 06272
[OK]  FaceTemp -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06272\FEATURE\06272_FaceTemp.csv
# Subject 06273
[OK]  FaceTemp -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06273\FEATURE\06273_FaceTemp.csv
# Subject 06274
[OK]  FaceTemp -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06274\FEATURE\06274_FaceTemp.csv
# Subject 06275
[OK]  FaceTemp -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06275\FEATURE\06275_FaceTemp.csv


In [10]:
# -*- coding: utf-8 -*-
"""
(3) Feature Engineering - RR intervals (Pulse)  --- v3.1
---------------------------------------------
仕様:
- 入力: OFFSET/{sid}_Pulse.csv
- 出力(CSV): FEATURE/{sid}_RR.csv
- 出力(画像):
    * RR 時系列: FEATURE/{sid}_RR.png
    * 30秒波形: FEATURE/Pulse/{sid}_PulseRR_{tstart}_{tend}.png
- 外れ値除去: 生理境界のみ（RR_BOUNDS、除外後は線形補間）
- 検出アルゴリズム: PPG向け改良版（v3）
  bandpass(0.5–8 Hz) → MWI(200ms) → rolling 97%tile×α → tip-lock(±150ms) →
  prominence(robust) & width(60–400ms) ゲート → 不応期300ms

追加（v3.1）:
- Series.fillna(method=...) の FutureWarning 解消（bfill().ffill() へ）
- prominence/width が 0 や計算不能のピークはスキップ（PeakPropertyWarning回避）
- 30秒PNGは .tmp へ保存後 os.replace で原子的に上書き（必ず Pulse に出力）
"""

import os
from typing import List, Tuple, Optional, Iterable
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from scipy.signal import butter, filtfilt, peak_prominences, peak_widths

# ===== ユーザー設定 =====
BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果"
subjects: List[Tuple[str, str]] = [
    ("0521",  "因幡先生"),
    ("06021", "今村さん"),
    ("06022", "梅野さん"),
    ("06271", ""),
    ("06272", ""),
    ("06273", ""),
    ("06274", ""),
    ("06275", ""),
]

START_TIME = 1500
END_TIME   = 2100
INTERVAL   = 30

# ---- 検出関連（v3） ----
BANDPASS_LOW  = 0.5      # Hz  (PPG)
BANDPASS_HIGH = 8.0      # Hz  (PPG)
BUTTER_ORDER  = 2
MWI_SEC       = 0.200    # 200 ms
ALPHA         = 0.28     # rolling 97%tile に対する倍率
LOCK_WIN_SEC  = 0.150    # ±150 ms で尖頭ロック
REFRACT_SEC   = 0.300    # 300 ms 最終不応期
WIDTH_MIN_SEC = 0.060    # 60 ms
WIDTH_MAX_SEC = 0.400    # 400 ms
PROM_LOCAL_K  = 2.5      # local MAD 係数
PROM_GLOBAL_K = 3.0      # global MAD 係数

RR_BOUNDS = (0.3, 1.5)   # 生理的境界 [sec]

# ===== ユーティリティ =====
def _format_mmss(x, _):
    m = int(x) // 60
    s = int(x) % 60
    return f"{m:02d}:{s:02d}"  # mm:ss（ゼロ埋め）

def _set_plot_style(ax):
    ax.tick_params(labelsize=20)
    ax.grid(True)
    ax.xaxis.set_major_formatter(FuncFormatter(_format_mmss))

def _bandpass_filter(sig: np.ndarray, fs: float) -> np.ndarray:
    """Zero-phase bandpass for PPG (0.5–8 Hz)."""
    nyq = 0.5 * fs
    b, a = butter(BUTTER_ORDER, [BANDPASS_LOW / nyq, BANDPASS_HIGH / nyq], btype="band")
    return filtfilt(b, a, sig)

def _mad(x: np.ndarray) -> float:
    x = np.asarray(x, dtype=float)
    return float(np.median(np.abs(x - np.median(x))) + 1e-12)

def _mwi_from_signal(sig_bp: np.ndarray, fs: float) -> np.ndarray:
    """Pan–Tompkins系列: derivative→square→moving integration (200 ms)."""
    kernel = np.array([1, 2, 0, -2, -1], dtype=float) / 8.0
    d = np.convolve(sig_bp, kernel, mode="same")
    sq = d ** 2
    win = max(1, int(round(MWI_SEC * fs)))
    return np.convolve(sq, np.ones(win) / win, mode="same")

def _coarse_candidates_local(mwi: np.ndarray, fs: float, alpha: float = ALPHA,
                             win_sec: float = 30.0) -> np.ndarray:
    """Rolling 97th percentile × α の適応しきい値で粗検出（300ms不応期込み）。"""
    s = pd.Series(mwi)
    w = int(round(win_sec * fs))
    local97 = s.rolling(window=w, center=True, min_periods=int(5 * fs)).quantile(0.97).to_numpy()
    # FutureWarning回避: bfill→ffill→全体中央値
    local97 = (
        pd.Series(local97)
          .bfill()
          .ffill()
          .fillna(np.nanmedian(mwi))
          .to_numpy()
    )
    th = alpha * local97

    refractory = int(round(REFRACT_SEC * fs))
    cand = []
    i = 0
    N = len(mwi)
    while i < N:
        if mwi[i] > th[i]:
            start = i
            end = min(i + refractory, N - 1)
            local = start + int(np.argmax(mwi[start:end + 1]))
            if cand and (local - cand[-1]) < refractory:
                if mwi[local] > mwi[cand[-1]]:
                    cand[-1] = local
            else:
                cand.append(local)
            i = end + 1
        else:
            i += 1
    return np.asarray(cand, dtype=int)

def _refine_and_gate(sig_bp: np.ndarray, fs: float, coarse_idx: np.ndarray) -> np.ndarray:
    """尖頭ロック（±150ms）、prominence×MAD、幅ゲート、最終不応期で確定."""
    w_lock     = int(round(LOCK_WIN_SEC * fs))
    refractory = int(round(REFRACT_SEC * fs))
    sigma_g    = 1.4826 * _mad(sig_bp)

    peaks = []
    N = len(sig_bp)
    for c in coarse_idx:
        s = max(0, c - w_lock)
        e = min(N - 1, c + w_lock)
        loc = s + int(np.argmax(sig_bp[s:e + 1]))  # 先端ロック

        # ローカルMAD（±2s）
        s2 = max(0, loc - int(2 * fs))
        e2 = min(N - 1, loc + int(2 * fs))
        sigma_loc = 1.4826 * _mad(sig_bp[s2:e2 + 1])

        prom_min = max(PROM_GLOBAL_K * sigma_g, PROM_LOCAL_K * sigma_loc)
        wmin     = int(round(WIDTH_MIN_SEC * fs))
        wmax     = int(round(WIDTH_MAX_SEC * fs))

        # PeakPropertyWarning 回避: 計算失敗や0なら不採用
        try:
            prom_arr  = peak_prominences(sig_bp, [loc])[0]
            w_arr     = peak_widths(sig_bp, [loc], rel_height=0.5)[0]
            prom       = float(prom_arr[0]) if getattr(prom_arr, "size", 1) else float(prom_arr)
            width_samp = float(w_arr[0])    if getattr(w_arr, "size", 1)    else float(w_arr)
        except Exception:
            continue
        if (prom <= 0.0) or (width_samp <= 0.0):
            continue

        if (prom >= prom_min) and (wmin <= width_samp <= wmax):
            if peaks and (loc - peaks[-1]) < refractory:
                prev = peaks[-1]
                # よりprominenceが大きい方を残す
                try:
                    prev_prom_arr = peak_prominences(sig_bp, [prev])[0]
                    prev_prom = float(prev_prom_arr[0]) if getattr(prev_prom_arr, "size", 1) else float(prev_prom_arr)
                except Exception:
                    prev_prom = -np.inf
                if prom > prev_prom:
                    peaks[-1] = loc
            else:
                peaks.append(loc)

    return np.asarray(peaks, dtype=int)

def _detect_waveform_column(df: pd.DataFrame) -> Optional[str]:
    cols = [c for c in df.columns if c != "Time_sec"]
    for key in ["Pulse", "PPG", "pulse", "ppg"]:
        if key in cols:
            return key
    return cols[0] if cols else None

def clean_rr(rr_sec: Iterable[float],
             phys_bounds: Tuple[float, float] = RR_BOUNDS) -> Tuple[np.ndarray, np.ndarray]:
    rr = np.asarray(list(rr_sec), dtype=float)
    mask = (rr < phys_bounds[0]) | (rr > phys_bounds[1]) | ~np.isfinite(rr)
    rr_clean = rr.copy()
    rr_clean[mask] = np.nan
    rr_interp = pd.Series(rr_clean).interpolate(method="values", limit_direction="both").to_numpy()
    return rr_interp, mask

# ===== メイン処理 =====
def process_rr_for_subject(base_root: str, sid: str, person_name: str) -> None:
    subject_dir        = os.path.join(base_root, f"{sid}{person_name}")
    offset_dir         = os.path.join(subject_dir, "OFFSET")
    feature_dir        = os.path.join(subject_dir, "FEATURE")
    feature_pulse_dir  = os.path.join(feature_dir, "Pulse")
    os.makedirs(feature_dir, exist_ok=True)
    os.makedirs(feature_pulse_dir, exist_ok=True)

    pulse_csv   = os.path.join(offset_dir, f"{sid}_Pulse.csv")
    out_csv     = os.path.join(feature_dir, f"{sid}_RR.csv")
    out_rr_png  = os.path.join(feature_dir, f"{sid}_RR.png")

    print(f"# Subject {sid}{person_name}")

    if not os.path.exists(pulse_csv):
        print(f"[SKIP] RR: Pulse CSV not found -> {pulse_csv}")
        return

    try:
        df = pd.read_csv(pulse_csv, encoding="utf-8-sig")
    except Exception as e:
        print(f"[SKIP] RR: failed to read CSV ({e})")
        return

    if "Time_sec" not in df.columns:
        print("[SKIP] RR: Time_sec column missing")
        return

    wave_col = _detect_waveform_column(df)
    if wave_col is None:
        print("[SKIP] RR: waveform column not found")
        return

    time_sec = pd.to_numeric(df["Time_sec"], errors="coerce")
    mdt = np.diff(time_sec.values)
    mdt = mdt[np.isfinite(mdt)]
    if mdt.size == 0:
        print("[SKIP] RR: invalid Time_sec spacing")
        return
    fs = round(1.0 / float(np.mean(mdt)))

    # 時間窓抽出
    df = df.loc[(time_sec >= START_TIME) & (time_sec <= END_TIME)].copy()
    if df.empty:
        print("[SKIP] RR: empty time window")
        return

    df[wave_col] = pd.to_numeric(df[wave_col], errors="coerce")
    raw = df[wave_col].to_numpy(dtype=float)
    t   = df["Time_sec"].to_numpy(dtype=float)

    # ---- v3 検出 ----
    sig_bp = _bandpass_filter(raw, fs)
    mwi    = _mwi_from_signal(sig_bp, fs)
    coarse = _coarse_candidates_local(mwi, fs, alpha=ALPHA, win_sec=30.0)
    peaks_idx = _refine_and_gate(sig_bp, fs, coarse)

    if peaks_idx.size < 3:
        print("[SKIP] RR: insufficient peaks")
        return

    qrs_times = t[peaks_idx].tolist()
    qrs_vals  = sig_bp[peaks_idx].tolist()

    # RR 計算 & クリーニング
    rr = np.insert(np.diff(qrs_times), 0, np.nan)
    rr_clean, _ = clean_rr(rr)
    df_rr = pd.DataFrame({"Time_sec": qrs_times, "RR_interval_sec": rr_clean})

    # 30秒ごとの波形（filtered + peaks）— 必ず保存、原子的に上書き
    qrs_times_arr = np.array(qrs_times)
    qrs_vals_arr  = np.array(qrs_vals)

    saved_count = 0
    for t_start in range(START_TIME, END_TIME, INTERVAL):
        t_end = t_start + INTERVAL

        win_mask = (t >= t_start) & (t < t_end)
        if not np.any(win_mask):
            continue

        pk_mask = (qrs_times_arr >= t_start) & (qrs_times_arr < t_end)
        pt = qrs_times_arr[pk_mask]
        pv = qrs_vals_arr[pk_mask]

        fig, ax = plt.subplots(figsize=(12, 4))
        ax.plot(t[win_mask], sig_bp[win_mask], linewidth=1.5, label="Filtered Pulse")

        if len(pt) > 0:
            ax.plot(pt, pv, "o", markersize=6, color="red", label="Peaks")
        else:
            ax.plot([], [], "o", markersize=6, color="red", label="Peaks (none)")

        ax.set_xlabel("Time (mm:ss)", fontsize=24)
        ax.set_ylabel("Pulse (filtered)", fontsize=24)
        ax.set_title(f"Pulse RR window {t_start}-{t_end} sec", fontsize=30)  # ASCII only
        _set_plot_style(ax)
        ax.legend(fontsize=20)
        plt.tight_layout()

        out_path = os.path.join(feature_pulse_dir, f"{sid}_PulseRR_{t_start}_{t_end}.png")
        base, ext = os.path.splitext(out_path)         # ext == ".png"
        tmp_path = f"{base}.__tmp__{ext}"              # 例: ..._PulseRR_1500_1530.__tmp__.png
        plt.savefig(tmp_path, dpi=300, bbox_inches="tight")   # <- OK（拡張子png）
        plt.close(fig)
        os.replace(tmp_path, out_path)                        # 既存があれば確実に上書き
        print(f"[IMG] {sid}: {t_start}-{t_end} saved -> {out_path}")
        saved_count += 1

    print(f"[IMG] {sid}: {saved_count} window images saved in {feature_pulse_dir}")

    # RR 時系列
    fig, ax = plt.subplots(figsize=(12, 4))
    ax.plot(df_rr["Time_sec"], df_rr["RR_interval_sec"], linewidth=1.5)
    ax.set_xlabel("Time (mm:ss)", fontsize=24)
    ax.set_ylabel("RR Interval (sec)", fontsize=24)
    ax.set_title("RR Interval Over Time", fontsize=30)  # ASCII only
    _set_plot_style(ax)
    plt.tight_layout()
    plt.savefig(out_rr_png, dpi=300)
    plt.close(fig)

    # 保存
    try:
        df_rr.to_csv(out_csv, index=False, encoding="utf-8-sig")
        print(f"[OK]  RR -> {out_csv}")
    except Exception as e:
        print(f"[SKIP] RR: failed to save CSV ({e})")

def main_rr():
    for sid, person_name in subjects:
        process_rr_for_subject(BASE_DIR, sid, person_name)

if __name__ == "__main__":
    main_rr()


# Subject 0521因幡先生
[IMG] 0521: 1500-1530 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\Pulse\0521_PulseRR_1500_1530.png
[IMG] 0521: 1530-1560 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\Pulse\0521_PulseRR_1530_1560.png
[IMG] 0521: 1560-1590 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\Pulse\0521_PulseRR_1560_1590.png
[IMG] 0521: 1590-1620 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\Pulse\0521_PulseRR_1590_1620.png
[IMG] 0521: 1620-1650 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\Pulse\0521_PulseRR_1620_1650.png
[IMG] 0521: 1650-1680 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\Pulse\0521_PulseRR_1650_1680.png
[IMG] 0521: 1680-1710 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\Pulse\0521_PulseRR_1680_1710.png
[IMG] 0521: 1710-1740 saved -

  prom_arr  = peak_prominences(sig_bp, [loc])[0]
  w_arr     = peak_widths(sig_bp, [loc], rel_height=0.5)[0]
  w_arr     = peak_widths(sig_bp, [loc], rel_height=0.5)[0]


[IMG] 06273: 1500-1530 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06273\FEATURE\Pulse\06273_PulseRR_1500_1530.png
[IMG] 06273: 1530-1560 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06273\FEATURE\Pulse\06273_PulseRR_1530_1560.png
[IMG] 06273: 1560-1590 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06273\FEATURE\Pulse\06273_PulseRR_1560_1590.png
[IMG] 06273: 1590-1620 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06273\FEATURE\Pulse\06273_PulseRR_1590_1620.png
[IMG] 06273: 1620-1650 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06273\FEATURE\Pulse\06273_PulseRR_1620_1650.png
[IMG] 06273: 1650-1680 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06273\FEATURE\Pulse\06273_PulseRR_1650_1680.png
[IMG] 06273: 1680-1710 saved -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06273\FEATURE\Pulse\06273_PulseRR_1680_1710.png
[IMG] 06273: 1710-1740 saved -> C:\Users\taiki\OneDrive

In [1]:
# -*- coding: utf-8 -*-
"""
(Preprocess) RR extraction from OFFSET/Pulse and outlier correction
------------------------------------------------------------------
入力 : OFFSET/{sid}_Pulse.csv  （列: Time_sec, Pulse）
出力 : FEATURE/
  - {sid}_RR_raw.csv    …… 検出そのまま（RR, HR）
  - {sid}_RR_raw.png    …… RAW RRの可視化（修正対象点=赤丸）
  - {sid}_RR.csv        …… IQR±k倍で外れ値を移動中央値で補完したRR（HRも更新）
  - {sid}_RR.png        …… 補完後RRの可視化

備考:
- Rピーク検出は、提示いただいたロジックに準拠（バンドパス→find_peaks→SPKI/NPKI適応閾値+RR範囲）
- 外れ値の検知は RR の全体 IQR±k 倍（デフォルト k=3.0）。
- 補完は“中心揃えの移動中央値”で、補間はRR列に対してのみ（時間方向の再サンプリングは行いません）。
- 解析範囲は START_TIME〜END_TIME にトリミングして実行します。
"""

from __future__ import annotations
import os
from typing import List, Tuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from scipy.signal import butter, filtfilt, find_peaks

# ============== CONFIG ==============
BASE_DIR: str = r"C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\実験結果"

subjects: List[Tuple[str, str]] = [
    ("0521",  "因幡先生"),
    ("06021", "今村さん"),
    ("06022", "梅野さん"),
    ("06271", ""),
    ("06272", ""),
    ("06273", ""),
    ("06274", ""),
    ("06275", ""),
]

# 解析範囲（秒）
START_TIME: int = 1380
END_TIME:   int = 2100

# 信号タイプ別のバンドパス（必要に応じて切替）
SIGNAL_TYPE = "PPG"  # "ECG" or "PPG"
if SIGNAL_TYPE.upper() == "ECG":
    LOWCUT_HZ, HIGHCUT_HZ = 5.0, 15.0
else:  # PPGデフォルト
    LOWCUT_HZ, HIGHCUT_HZ = 0.5, 8.0
FILTER_ORDER = 2

# 検出・RR関連
RR_MIN_SEC: float = 0.15
RR_MAX_SEC: float = 2.0
PEAK_MIN_DISTANCE_SEC: float = 0.30

# 補正(IQR±k & rolling median)
IQR_K: float = 3.0
ROLL_WIN_BEATS: int = 7  # 奇数推奨

# プロット
DPI = 300
FIGSIZE = (12, 4)
# ====================================


def mmss_formatter(x: float, _pos=None) -> str:
    m = int(x) // 60
    s = int(x) % 60
    return f"{m}:{s:02d}"


def bandpass_filter(signal: np.ndarray, fs: int,
                    lowcut: float = LOWCUT_HZ, highcut: float = HIGHCUT_HZ,
                    order: int = FILTER_ORDER) -> np.ndarray:
    nyq = 0.5 * fs
    low = max(1e-6, lowcut / nyq)
    high = min(0.999999, highcut / nyq)
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, signal)


def detect_qrs_adaptive(filtered: np.ndarray, fs: int, times: np.ndarray):
    """find_peaks → SPKI/NPKI適応閾値 + RR妥当範囲"""
    distance = max(1, int(PEAK_MIN_DISTANCE_SEC * fs))
    peaks, _ = find_peaks(filtered, distance=distance)
    if peaks.size == 0:
        return [], [], []

    pk_vals = filtered[peaks]
    SPKI = float(np.percentile(pk_vals, 90))
    NPKI = float(np.percentile(pk_vals, 10))
    thr = NPKI + 0.25 * (SPKI - NPKI)

    qrs_idx, qrs_t, qrs_v = [], [], []
    for idx in peaks:
        val = float(filtered[idx])
        t = float(times[idx])
        if val > thr:
            if qrs_t:
                rr = t - qrs_t[-1]
                if not (RR_MIN_SEC <= rr <= RR_MAX_SEC):
                    thr = NPKI + 0.25 * (SPKI - NPKI)
                    continue
            SPKI = 0.125 * val + 0.875 * SPKI
            qrs_idx.append(idx); qrs_t.append(t); qrs_v.append(val)
        else:
            NPKI = 0.125 * val + 0.875 * NPKI
        thr = NPKI + 0.25 * (SPKI - NPKI)
    return qrs_idx, qrs_t, qrs_v


def iqr_bounds(x: pd.Series, k: float = IQR_K):
    s = x.dropna()
    if s.empty:
        return -np.inf, np.inf
    q1, q3 = s.quantile([0.25, 0.75])
    iqr = q3 - q1
    return float(q1 - k*iqr), float(q3 + k*iqr)


def plot_rr(time_sec: np.ndarray, rr_sec: np.ndarray, title: str, out_png: str,
            highlight_mask: np.ndarray | None = None):
    fig, ax = plt.subplots(figsize=FIGSIZE)
    ax.plot(time_sec, rr_sec, marker='o', linestyle='-', linewidth=1.5, label='RR')
    if highlight_mask is not None and np.any(highlight_mask):
        ax.plot(time_sec[highlight_mask], rr_sec[highlight_mask], 'o', color='red', markerfacecolor='none', markersize=8, markeredgewidth=2, label='anomaly (IQR±k)')
    ax.set_xlabel('Time (mm:ss)'); ax.set_ylabel('RR Interval (sec)')
    ax.set_title(title); ax.grid(True)
    ax.xaxis.set_major_formatter(FuncFormatter(mmss_formatter))
    ax.legend()
    plt.tight_layout(); plt.savefig(out_png, dpi=DPI); plt.close(fig)


def process_subject(sid: str, person_name: str):
    subj_dir = os.path.join(BASE_DIR, f"{sid}{person_name}")
    in_csv  = os.path.join(subj_dir, 'OFFSET', f'{sid}_Pulse.csv')
    out_dir = os.path.join(subj_dir, 'FEATURE')
    pulse_plot_dir = os.path.join(out_dir, 'Pulse')
    os.makedirs(out_dir, exist_ok=True)
    os.makedirs(pulse_plot_dir, exist_ok=True)

    if not os.path.exists(in_csv):
        print(f"[SKIP] {sid}{person_name}: input not found -> {in_csv}")
        return

    df = pd.read_csv(in_csv)
    if not {"Time_sec", "Pulse"}.issubset(df.columns):
        print(f"[SKIP] {sid}{person_name}: columns missing in {in_csv}")
        return

    df = df.sort_values('Time_sec').reset_index(drop=True)
    # 解析範囲
    df = df[(df['Time_sec'] >= START_TIME) & (df['Time_sec'] <= END_TIME)].reset_index(drop=True)
    times = df['Time_sec'].to_numpy(float)
    pulse = df['Pulse'].to_numpy(float)

    if times.size < 2:
        print(f"[SKIP] {sid}{person_name}: not enough samples")
        return

    # サンプリング周波数推定
    dt = float(np.mean(np.diff(times)))
    if dt <= 0:
        print(f"[SKIP] {sid}{person_name}: invalid Time_sec sequence")
        return
    fs = int(round(1.0 / dt))

    # フィルタ→R検出
    filt = bandpass_filter(pulse, fs)
    qrs_idx, qrs_t, qrs_v = detect_qrs_adaptive(filt, fs, times)

    if len(qrs_t) == 0:
        print(f"[WARN] {sid}{person_name}: no peaks detected")
        # 空CSVだけ出力
        raw_csv = os.path.join(out_dir, f"{sid}_RR_raw.csv")
        pd.DataFrame(columns=["Time_sec","RR_interval_sec","HeartRate_BPM"]).to_csv(raw_csv, index=False, encoding='utf-8-sig')
        return

    # RR/HR（RAW）
    rr = [np.nan] + list(np.diff(qrs_t))
    hr = [np.nan if not np.isfinite(x) else 60.0/x for x in rr]
    df_rr = pd.DataFrame({
        'Time_sec': qrs_t,
        'RR_interval_sec': rr,
        'HeartRate_BPM': hr,
    })

    # RAW保存
    raw_csv = os.path.join(out_dir, f"{sid}_RR_raw.csv")
    df_rr.to_csv(raw_csv, index=False, encoding='utf-8-sig')

    # RAW外れ値（IQR±k）を検出して図で赤丸
    lower, upper = iqr_bounds(df_rr['RR_interval_sec'])
    out_mask = df_rr['RR_interval_sec'].lt(lower) | df_rr['RR_interval_sec'].gt(upper)
    raw_png = os.path.join(out_dir, f"{sid}_RR_raw.png")
    plot_rr(df_rr['Time_sec'].to_numpy(float), df_rr['RR_interval_sec'].to_numpy(float),
            title=f"{sid} RR Interval (RAW)", out_png=raw_png, highlight_mask=out_mask.to_numpy())

    # ---- 異常検出しきい値のログ ----
    print(f"[IQR] k={IQR_K}, lower={lower:.3f}, upper={upper:.3f}")

    # ---- 処理①：RRが小さい場合のみ『中間ピーク削除』で補正 ----
    def correct_rr_by_middle_peak_deletion(qrs_times, lower, upper):
        qrs = list(map(float, qrs_times))
        abnormal, errors = [], []
        i = 1
        while i < len(qrs):
            rr = qrs[i] - qrs[i-1]
            if np.isfinite(rr) and rr < lower:
                if i+1 < len(qrs):
                    t_mid = qrs[i]; t_prev = qrs[i-1]; t_next = qrs[i+1]
                    new_rr = t_next - t_prev
                    if (lower <= new_rr <= upper):
                        abnormal.append(t_mid)
                        del qrs[i]
                        continue  # 同じインデックスで再評価
                    else:
                        errors.append((t_mid, 'short_rr_not_fixed_by_deletion'))
                        i += 1
                else:
                    errors.append((qrs[i], 'short_rr_at_end'))
                    i += 1
            elif np.isfinite(rr) and rr > upper:
                errors.append((qrs[i], 'long_rr_not_handled'))
                i += 1
            else:
                i += 1
        return qrs, abnormal, errors

    qrs_t_corr, abnormal_times, errors = correct_rr_by_middle_peak_deletion(qrs_t, lower, upper)
    if errors:
        for t, tag in errors:
            print(f"[ERROR][{sid}] {tag} at t={t:.3f}s")

    # ---- 補正後RR/HRの生成（削除ベース、補間なし） ----
    rr_corr = [np.nan] + list(np.diff(qrs_t_corr))
    hr_corr = [np.nan if not np.isfinite(x) else 60.0/x for x in rr_corr]
    df_corr = pd.DataFrame({
        'Time_sec': qrs_t_corr,
        'RR_interval_sec': rr_corr,
        'HeartRate_BPM': hr_corr,
    })

    corr_csv = os.path.join(out_dir, f"{sid}_RR.csv")
    df_corr.to_csv(corr_csv, index=False, encoding='utf-8-sig')

    corr_png = os.path.join(out_dir, f"{sid}_RR.png")
    plot_rr(df_corr['Time_sec'].to_numpy(float), df_corr['RR_interval_sec'].to_numpy(float),
            title=f"{sid} RR Interval (corrected by deletion)", out_png=corr_png)

    # === 30秒ごとの波形図（Pulse/）: kept=丸、異常時刻=赤× ===
    qrs_times_all = np.array(qrs_t, dtype=float)
    qrs_vals_all = np.asarray(qrs_v, dtype=float) if len(qrs_v)==len(qrs_times_all) else np.interp(qrs_times_all, times, filt)
    abset = set(map(float, abnormal_times))
    keep_mask_all = np.array([t not in abset for t in qrs_times_all], dtype=bool)
    excl_mask_all = ~keep_mask_all

    t_min = int(START_TIME); t_max = int(END_TIME)
    for t_start in range(t_min, t_max, 30):
        t_end = t_start + 30
        seg_mask = (times >= t_start) & (times < t_end)
        if not np.any(seg_mask):
            continue
        tm = (qrs_times_all >= t_start) & (qrs_times_all < t_end)
        qt = qrs_times_all[tm]
        qv = qrs_vals_all[tm]
        kept = keep_mask_all[tm]
        excl = excl_mask_all[tm]

        fig, ax = plt.subplots(figsize=FIGSIZE)
        ax.plot(times[seg_mask], filt[seg_mask], linewidth=1.5, label='Filtered Pulse')
        if qt.size > 0:
            if np.any(kept):
                ax.plot(qt[kept], qv[kept], 'ro', markersize=8, label='R Peaks (kept)')
            if np.any(excl):
                ax.plot(qt[excl], qv[excl], 'rx', markersize=10, markeredgewidth=2, label='Abnormal (deleted)')
        ax.set_xlabel('Time (mm:ss)'); ax.set_ylabel('Pulse (Filtered)')
        ax.set_title(f'{sid} Pulse: {t_start}-{t_end} sec')
        ax.xaxis.set_major_formatter(FuncFormatter(mmss_formatter))
        ax.grid(True); ax.legend()
        plt.tight_layout()
        out_png2 = os.path.join(pulse_plot_dir, f'{sid}_Pulse_{t_start}_{t_end}.png')
        plt.savefig(out_png2, dpi=DPI)
        plt.close(fig)

    print(f"[OK] {sid}{person_name}: RAW -> {raw_csv} / {raw_png}; Corrected -> {corr_csv} / {corr_png}")


# --- end helpers ---

def main_rr():
    for sid, person_name in subjects:
        process_subject(sid, person_name)


if __name__ == '__main__':
    main_rr()


[IQR] k=3.0, lower=0.700, upper=1.146
[OK] 0521因幡先生: RAW -> C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\実験結果\0521因幡先生\FEATURE\0521_RR_raw.csv / C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\実験結果\0521因幡先生\FEATURE\0521_RR_raw.png; Corrected -> C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\実験結果\0521因幡先生\FEATURE\0521_RR.csv / C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\実験結果\0521因幡先生\FEATURE\0521_RR.png
[IQR] k=3.0, lower=0.663, upper=1.321
[OK] 06021今村さん: RAW -> C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\実験結果\06021今村さん\FEATURE\06021_RR_raw.csv / C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\実験結果\06021今村さん\FEATURE\06021_RR_raw.png; Corrected -> C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\実験結果\06021今村さん\FEATURE\06021_RR.csv / C:\\Users\\taiki\\OneDrive - Science Tokyo\\デスクトップ\\研究\\実験結果\06021今村さん\FEATURE\06021_RR.png
[IQR] k=3.0, lower=0.447, upper=1.068
[OK] 06022梅野さん: RAW -> C:\\Users\\taiki\\OneDrive - Science T

In [5]:
# -*- coding: utf-8 -*-
"""
(3) Feature Engineering - RR-derived features (NO interpolation)
---------------------------------------------------------------
入力:  FEATURE/{sid}_RR.csv  （列: Time_sec, RR_interval_sec）
出力:  FEATURE/{sid}_<FeatureName>.csv  を個別保存（UTF-8-SIG, index=False）

要件:
- 一切の「時間方向の補間」を行わない
- 出力の Time_sec は「計算時刻のみ」
  - 30秒窓の指標 → START_TIME から 30秒ごと（… , 1530, 1560, …）
  - 120秒窓の指標 → START_TIME から 120秒ごと
- RRの外れ値除去は生理境界のみ（0.25〜2.0 s）。それ以外の除去や補間は行わない
- 周波数領域（LF/HF）は Lomb‑Scargle で不等間隔のまま解析（補間ゼロ）

30秒窓: HeartRate, RMSSD, SDSD, pNN50, SD1, SD2, CSI, CVI
120秒窓: HF_power, LF_power, LF_HF_ratio
"""

import os
from typing import List, Tuple, Dict
import numpy as np
import pandas as pd
from scipy.signal import lombscargle  # 不等間隔データ用（補間不要）

# ============== CONFIG ==============
BASE_DIR: str = r"C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果"

subjects: List[Tuple[str, str]] = [
    ("0521",  "因幡先生"),
    ("06021", "今村さん"),
    ("06022", "梅野さん"),
    ("06271", ""),
    ("06272", ""),
    ("06273", ""),
    ("06274", ""),
    ("06275", ""),
]

# 対象区間（秒）
START_TIME: int = 1500
END_TIME:   int = 2100

# 生理境界（RR）
RR_MIN: float = 0.25   # sec
RR_MAX: float = 2.0    # sec

# 窓長（秒）
WIN_30: int = 30
WIN_120: int = 120

# 周波数領域（Hz）
LF_BAND = (0.04, 0.15)
HF_BAND = (0.15, 0.40)
F_MAX = 0.5    # HRVで見る上限（Hz）
N_FREQ = 512   # 周波数グリッド分解能（Lomb-Scargle用）
# ====================================


def _valid_rr(rr: np.ndarray) -> np.ndarray:
    """生理境界のみ適用（True=有効）。"""
    return np.isfinite(rr) & (rr >= RR_MIN) & (rr <= RR_MAX)


def _rolling_trailing_ranges(times: np.ndarray, anchors: np.ndarray, win_sec: int):
    """
    各アンカー時刻 t に対し、過去 win_sec 秒の [t-win_sec, t] を返す（right閉）。
    インデックス範囲 (l, r) は python slice 用に r 非包含。
    """
    n = len(times)
    l = 0
    for t in anchors:
        start = t - win_sec
        # 左端更新
        while l < n and times[l] < start:
            l += 1
        r = l
        while r < n and times[r] <= t:
            r += 1
        yield l, r


def _successive_diffs(rr: np.ndarray) -> np.ndarray:
    if rr.size < 2:
        return np.array([], dtype=float)
    return np.diff(rr)


def _pnn50(drr: np.ndarray) -> float:
    if drr.size == 0:
        return np.nan
    return float(np.mean(np.abs(drr) > 0.05))


def _sd1_sd2(rr: np.ndarray) -> Tuple[float, float]:
    if rr.size < 2:
        return np.nan, np.nan
    drr = _successive_diffs(rr)
    if drr.size < 2:
        return np.nan, np.nan
    sdrr = np.std(rr, ddof=1)
    sddiff = np.std(drr, ddof=1)
    if not (np.isfinite(sdrr) and np.isfinite(sddiff)):
        return np.nan, np.nan
    sd1 = np.sqrt(0.5) * sddiff
    val = 2.0 * (sdrr ** 2) - 0.5 * (sddiff ** 2)
    sd2 = np.sqrt(val) if val > 0 else np.nan
    return sd1, sd2


def _lomb_band_power(t_sec: np.ndarray, rr_sec: np.ndarray, band: Tuple[float, float]) -> float:
    """
    Lomb-Scargle で不等間隔RR(t)から帯域パワー（相対量）を推定（補間なし）。
    返り値の絶対単位は Welch と異なる可能性があるが、LF/HF 比には一貫して使える。
    """
    if t_sec.size < 4 or rr_sec.size < 4:
        return np.nan
    t0 = t_sec[0]
    tt = t_sec - t0                     # 原点合わせ
    x = rr_sec - np.mean(rr_sec)        # DC除去
    # 周波数グリッド（0〜F_MAX）
    f = np.linspace(0.0001, F_MAX, N_FREQ)
    w = 2.0 * np.pi * f
    # SciPyの lombscargle は normalize=True がない版もあるため try/except
    try:
        p = lombscargle(tt, x, w, precenter=False, normalize=True)
    except TypeError:
        p = lombscargle(tt, x, w, precenter=False)
        # 簡易正規化（相対量調整）：分散で割る
        var = np.var(x)
        if var > 0:
            p = p / var
    # 帯域積分（相対量）
    m = (f >= band[0]) & (f <= band[1])
    if not np.any(m):
        return np.nan
    return float(np.trapz(p[m], f[m]))


def _compute_features(times_beats: np.ndarray, rr_beats: np.ndarray) -> Dict[str, np.ndarray]:
    """
    出力:
      - 30秒窓群: anchors_30（= START_TIME+30, …, END_TIME）に対応
      - 120秒窓群: anchors_120（= START_TIME+120, …, END_TIME）に対応
    Time_sec は各アンカーそのもの（窓の右端）
    """
    # 30s anchors / 120s anchors（START/ENDに揃えて固定ステップ）
    anchors_30  = np.arange(START_TIME + WIN_30,  END_TIME + 1, WIN_30,  dtype=float)
    anchors_120 = np.arange(START_TIME + WIN_120, END_TIME + 1, WIN_120, dtype=float)

    out = {
        # 30s
        "Time_sec_30": anchors_30,
        "HeartRate":   np.full_like(anchors_30, np.nan, dtype=float),
        "RMSSD":       np.full_like(anchors_30, np.nan, dtype=float),
        "SDSD":        np.full_like(anchors_30, np.nan, dtype=float),
        "pNN50":       np.full_like(anchors_30, np.nan, dtype=float),
        "SD1":         np.full_like(anchors_30, np.nan, dtype=float),
        "SD2":         np.full_like(anchors_30, np.nan, dtype=float),
        "CSI":         np.full_like(anchors_30, np.nan, dtype=float),
        "CVI":         np.full_like(anchors_30, np.nan, dtype=float),
        # 120s
        "Time_sec_120": np.full_like(anchors_120, anchors_120, dtype=float),
        "LF_power":     np.full_like(anchors_120, np.nan, dtype=float),
        "HF_power":     np.full_like(anchors_120, np.nan, dtype=float),
        "LF_HF_ratio":  np.full_like(anchors_120, np.nan, dtype=float),
    }

    # ---- 30秒窓（統計・Poincaré）----
    for i, (l, r) in enumerate(_rolling_trailing_ranges(times_beats, anchors_30, WIN_30)):
        rr_w = rr_beats[l:r]
        t_w  = times_beats[l:r]
        m = _valid_rr(rr_w)
        rr_w = rr_w[m]; t_w = t_w[m]
        if rr_w.size < 3:
            continue

        drr = _successive_diffs(rr_w)
        if drr.size == 0:
            continue

        # HeartRate（窓内平均RR）
        out["HeartRate"][i] = 60.0 / np.mean(rr_w)

        # RMSSD / SDSD / pNN50
        out["RMSSD"][i] = np.sqrt(np.mean(drr ** 2))
        out["SDSD"][i]  = np.std(drr, ddof=1) if drr.size >= 2 else np.nan
        out["pNN50"][i] = _pnn50(drr)

        # SD1 / SD2 / CSI / CVI
        sd1, sd2 = _sd1_sd2(rr_w)
        out["SD1"][i], out["SD2"][i] = sd1, sd2
        if np.isfinite(sd1) and sd1 > 0 and np.isfinite(sd2) and sd2 > 0:
            out["CSI"][i] = sd2 / sd1
            out["CVI"][i] = np.log10(sd1 * sd2)

    # ---- 120秒窓（周波数：Lomb-Scargle）----
    for i, (l, r) in enumerate(_rolling_trailing_ranges(times_beats, anchors_120, WIN_120)):
        rr_w = rr_beats[l:r]
        t_w  = times_beats[l:r]
        m = _valid_rr(rr_w)
        rr_w = rr_w[m]; t_w = t_w[m]
        if rr_w.size < 4:
            continue

        lf = _lomb_band_power(t_w, rr_w, LF_BAND)
        hf = _lomb_band_power(t_w, rr_w, HF_BAND)
        out["LF_power"][i] = lf
        out["HF_power"][i] = hf
        if np.isfinite(lf) and np.isfinite(hf) and hf > 0:
            out["LF_HF_ratio"][i] = lf / hf

    return out


def _save_feature_csv(feature_dir: str, sid: str, name: str,
                      times: np.ndarray, values: np.ndarray) -> str:
    path = os.path.join(feature_dir, f"{sid}_{name}.csv")
    pd.DataFrame({"Time_sec": times, name: values}).to_csv(path, index=False, encoding="utf-8-sig")
    return path


def process_rr_features_for_subject(base_dir: str, sid: str, person_name: str) -> None:
    """
    1被験者ぶんのRR由来特徴量を計算し，個別CSVで保存（ログは統一形式）。
    """
    subject_dir = os.path.join(base_dir, f"{sid}{person_name}")
    feature_dir = os.path.join(subject_dir, "FEATURE")
    os.makedirs(feature_dir, exist_ok=True)

    rr_csv = os.path.join(feature_dir, f"{sid}_RR.csv")

    print(f"# Subject {sid}{person_name}")

    if not os.path.exists(rr_csv):
        print(f"[SKIP] RR-features: RR CSV not found -> {rr_csv}")
        return

    try:
        df = pd.read_csv(rr_csv, encoding="utf-8-sig")
    except Exception as e:
        print(f"[SKIP] RR-features: failed to read RR CSV ({e})")
        return

    if not {"Time_sec", "RR_interval_sec"}.issubset(df.columns):
        print("[SKIP] RR-features: required columns missing")
        return

    times_beats = pd.to_numeric(df["Time_sec"], errors="coerce").to_numpy(dtype=float)
    rr_beats    = pd.to_numeric(df["RR_interval_sec"], errors="coerce").to_numpy(dtype=float)

    # 計算
    feats = _compute_features(times_beats, rr_beats)

    # 保存＆ログ（30秒窓群）
    try:
        order_30 = ["HeartRate", "RMSSD", "SDSD", "pNN50", "SD1", "SD2", "CSI", "CVI"]
        for key in order_30:
            out_path = _save_feature_csv(feature_dir, sid, key, feats["Time_sec_30"], feats[key])
            print(f"[OK]  {key} -> {out_path}")
    except Exception as e:
        print(f"[SKIP] RR-features: save failed (30s) ({e})")

    # 保存＆ログ（120秒窓群）
    try:
        order_120 = ["HF_power", "LF_power", "LF_HF_ratio"]
        for key in order_120:
            out_path = _save_feature_csv(feature_dir, sid, key, feats["Time_sec_120"], feats[key])
            print(f"[OK]  {key} -> {out_path}")
    except Exception as e:
        print(f"[SKIP] RR-features: save failed (120s) ({e})")


def main_3_rr_features_no_interp() -> None:
    for sid, person_name in subjects:
        process_rr_features_for_subject(BASE_DIR, sid, person_name)


if __name__ == "__main__":
    main_3_rr_features_no_interp()


# Subject 0521因幡先生
[OK]  HeartRate -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\0521_HeartRate.csv
[OK]  RMSSD -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\0521_RMSSD.csv
[OK]  SDSD -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\0521_SDSD.csv
[OK]  pNN50 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\0521_pNN50.csv
[OK]  SD1 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\0521_SD1.csv
[OK]  SD2 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\0521_SD2.csv
[OK]  CSI -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\0521_CSI.csv
[OK]  CVI -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\0521_CVI.csv
[OK]  HF_power -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\0521因幡先生\FEATURE\0521_HF_power.csv
[OK]  LF_power -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究

  return float(np.trapz(p[m], f[m]))
  return float(np.trapz(p[m], f[m]))


[OK]  HeartRate -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\FEATURE\06021_HeartRate.csv
[OK]  RMSSD -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\FEATURE\06021_RMSSD.csv
[OK]  SDSD -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\FEATURE\06021_SDSD.csv
[OK]  pNN50 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\FEATURE\06021_pNN50.csv
[OK]  SD1 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\FEATURE\06021_SD1.csv
[OK]  SD2 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\FEATURE\06021_SD2.csv
[OK]  CSI -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\FEATURE\06021_CSI.csv
[OK]  CVI -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\FEATURE\06021_CVI.csv
[OK]  HF_power -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06021今村さん\FEATURE\06021_HF_power.csv
[OK]  LF_power -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\

  return float(np.trapz(p[m], f[m]))
  return float(np.trapz(p[m], f[m]))
  return float(np.trapz(p[m], f[m]))


[OK]  HeartRate -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06271\FEATURE\06271_HeartRate.csv
[OK]  RMSSD -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06271\FEATURE\06271_RMSSD.csv
[OK]  SDSD -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06271\FEATURE\06271_SDSD.csv
[OK]  pNN50 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06271\FEATURE\06271_pNN50.csv
[OK]  SD1 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06271\FEATURE\06271_SD1.csv
[OK]  SD2 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06271\FEATURE\06271_SD2.csv
[OK]  CSI -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06271\FEATURE\06271_CSI.csv
[OK]  CVI -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06271\FEATURE\06271_CVI.csv
[OK]  HF_power -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06271\FEATURE\06271_HF_power.csv
[OK]  LF_power -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06271\FEATURE\06271_LF_power.cs

  return float(np.trapz(p[m], f[m]))
  return float(np.trapz(p[m], f[m]))


[OK]  HeartRate -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06274\FEATURE\06274_HeartRate.csv
[OK]  RMSSD -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06274\FEATURE\06274_RMSSD.csv
[OK]  SDSD -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06274\FEATURE\06274_SDSD.csv
[OK]  pNN50 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06274\FEATURE\06274_pNN50.csv
[OK]  SD1 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06274\FEATURE\06274_SD1.csv
[OK]  SD2 -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06274\FEATURE\06274_SD2.csv
[OK]  CSI -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06274\FEATURE\06274_CSI.csv
[OK]  CVI -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06274\FEATURE\06274_CVI.csv
[OK]  HF_power -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06274\FEATURE\06274_HF_power.csv
[OK]  LF_power -> C:\Users\taiki\OneDrive - Science Tokyo\デスクトップ\研究\実験結果\06274\FEATURE\06274_LF_power.cs

  return float(np.trapz(p[m], f[m]))
