In [2]:
# -*- coding: utf-8 -*-
"""
各CSV（DLC 3段ヘッダ）の tail_set の全フレーム平均 (x̄_tail, ȳ_tail) を1回だけ算出して固定し、
全キーポイントの x,y を (x - x̄_tail, y - ȳ_tail) に変換した CSV（<元名>_relative.csv）を出力します。
- 3段ヘッダ（scorer/bodypart/(x|y|likelihood)）は維持、likelihood はそのまま。
- ディレクトリ指定なら GLOB_PATTERN に合うCSVを一括処理、ファイル指定ならその1件のみ。
- likelihoodを使って低品質フレームを欠損→補間してから平均を出すことも可能。

★ 使い方:
  1) 「パス指定・設定」欄の PROJECT_ROOT / INPUT_PATH などを環境に合わせて書き換える
  2) そのまま実行（.py or Notebook）
"""

import os
import glob
import numpy as np
import pandas as pd

# =========================
# パス指定・設定（ここを編集）
# =========================
PROJECT_ROOT = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master"
TRAIN_CSV_DIR = os.path.join(PROJECT_ROOT, "data", "train", "train_csv")
EVAL_CSV_DIR  = os.path.join(PROJECT_ROOT, "data", "test",  "eval_csv")

# 処理対象（単一CSV でも ディレクトリでもOK）
INPUT_PATH    = TRAIN_CSV_DIR        # ← 例) eval_csv を処理する。train_csv を処理したい場合は TRAIN_CSV_DIR に変更
GLOB_PATTERN  = "*.csv"             # ディレクトリ指定時の検索パターン

# tail_set 平均の算出時に likelihood < 閾値 を欠損扱いにして補間するか
USE_LIKELIHOOD       = False
MIN_KEEP_LIKELIHOOD  = 0.6

# 既に *_relative.csv が存在するときに上書きするか
OVERWRITE            = False

# =========================
# ユーティリティ
# =========================
def _norm_name(s: str) -> str:
    """小文字化 + 空白/アンダースコア/ハイフン除去（列名の揺れ吸収）"""
    return "".join(ch for ch in s.lower() if ch not in " _-")

def _resolve_tailset_name(all_bodyparts, want="tail set"):
    """bodypart 実名のうち、'tail set' に相当する列名をロバストに取得"""
    norm2orig = {}
    for bp in all_bodyparts:
        k = _norm_name(bp)
        if k not in norm2orig:
            norm2orig[k] = bp
    key = _norm_name(want)
    if key not in norm2orig:
        raise ValueError(f"'tail set' 列が見つかりません。利用可能 bodyparts: {sorted(set(all_bodyparts))}")
    return norm2orig[key]

def _series_from_multi(df, bodypart, coord):
    """
    df: MultiIndex columns (scorer, bodypart, coord)
    coord: "x" or "y" or "likelihood"
    複数 scorer がある場合は最初の列を使用（一般的に1列）。
    """
    sub = df.xs((bodypart, coord), level=[1,2], axis=1)
    if isinstance(sub, pd.DataFrame):
        return sub.iloc[:, 0]
    return sub  # Series

def _assign_to_multi(df, bodypart, coord, values):
    """df の (任意 scorer, 指定 bodypart, 指定 coord) すべてを values で置換。"""
    targets = [col for col in df.columns if col[1] == bodypart and col[2] == coord]
    if not targets:
        raise KeyError(f"列が見つかりません: ({bodypart}, {coord})")
    for col in targets:
        df[col] = values

def collect_csv_paths(inp, pattern):
    if os.path.isfile(inp):
        return [inp]
    elif os.path.isdir(inp):
        return sorted(glob.glob(os.path.join(inp, pattern)))
    else:
        raise FileNotFoundError(f"入力が見つかりません: {inp}")

# =========================
# メイン処理
# =========================
def process_file(path, use_likelihood=True, min_keep_likelihood=0.6, overwrite=False):
    out_path = os.path.splitext(path)[0] + "_relative.csv"
    if (not overwrite) and os.path.exists(out_path):
        print(f"[SKIP] 既に存在: {out_path}")
        return

    # 読み込み（DLC 3段ヘッダ）
    df = pd.read_csv(path, header=[0,1,2], index_col=0)
    bodyparts = [bp for (_, bp, _) in df.columns]
    tail_name = _resolve_tailset_name(bodyparts, want="tail set")

    # tail_set の x,y を取り出す
    tail_x = _series_from_multi(df, tail_name, "x").astype(float)
    tail_y = _series_from_multi(df, tail_name, "y").astype(float)

    # 平均を出す前に、低 likelihood を欠損扱いにして補間（任意）
    if use_likelihood:
        try:
            tail_l = _series_from_multi(df, tail_name, "likelihood").astype(float)
            mask_low = tail_l < float(min_keep_likelihood)
            tx = tail_x.copy(); ty = tail_y.copy()
            tx[mask_low] = np.nan
            ty[mask_low] = np.nan
            tx = tx.interpolate(method="linear", limit_direction="both").bfill().ffill().fillna(0.0)
            ty = ty.interpolate(method="linear", limit_direction="both").bfill().ffill().fillna(0.0)
            mean_tail_x = float(tx.mean())
            mean_tail_y = float(ty.mean())
        except KeyError:
            # likelihood 列が無いCSVは生値平均
            mean_tail_x = float(tail_x.mean())
            mean_tail_y = float(tail_y.mean())
    else:
        mean_tail_x = float(tail_x.mean())
        mean_tail_y = float(tail_y.mean())

    # 相対座標に置換（全 bodypart の x,y から平均を引く）
    df_out = df.copy()
    for bp in sorted(set(bodyparts)):
        x_series = _series_from_multi(df, bp, "x").astype(float)
        y_series = _series_from_multi(df, bp, "y").astype(float)
        _assign_to_multi(df_out, bp, "x", x_series - mean_tail_x)
        _assign_to_multi(df_out, bp, "y", y_series - mean_tail_y)

    # 保存（3段ヘッダのまま）
    df_out.to_csv(out_path, encoding="utf-8-sig")
    print(f"[OK] {os.path.basename(path)} -> {os.path.basename(out_path)}  "
          f"(tail_mean=({mean_tail_x:.3f},{mean_tail_y:.3f}))")

def main():
    paths = collect_csv_paths(INPUT_PATH, GLOB_PATTERN)
    if not paths:
        raise FileNotFoundError(f"CSV が見つかりません: {INPUT_PATH} / pattern={GLOB_PATTERN}")

    print(f"[INFO] 対象 {len(paths)} 件 from: {INPUT_PATH}")
    for p in paths:
        try:
            process_file(
                p,
                use_likelihood=USE_LIKELIHOOD,
                min_keep_likelihood=MIN_KEEP_LIKELIHOOD,
                overwrite=OVERWRITE
            )
        except Exception as e:
            print(f"[ERROR] {p}: {e}")

if __name__ == "__main__":
    main()


[INFO] 対象 158 件 from: C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\train\train_csv
[OK] ivdd1_10DLC_resnet50_IvddOct30shuffle1_100000.csv -> ivdd1_10DLC_resnet50_IvddOct30shuffle1_100000_relative.csv  (tail_mean=(687.208,256.808))
[OK] ivdd1_11DLC_resnet50_IvddOct30shuffle1_100000.csv -> ivdd1_11DLC_resnet50_IvddOct30shuffle1_100000_relative.csv  (tail_mean=(534.432,388.842))
[OK] ivdd1_12DLC_resnet50_IvddOct30shuffle1_100000.csv -> ivdd1_12DLC_resnet50_IvddOct30shuffle1_100000_relative.csv  (tail_mean=(711.530,399.533))
[OK] ivdd1_13DLC_resnet50_IvddOct30shuffle1_100000.csv -> ivdd1_13DLC_resnet50_IvddOct30shuffle1_100000_relative.csv  (tail_mean=(650.311,406.781))
[OK] ivdd1_14DLC_resnet50_IvddOct30shuffle1_100000.csv -> ivdd1_14DLC_resnet50_IvddOct30shuffle1_100000_relative.csv  (tail_mean=(945.942,458.410))
[OK] ivdd1_15DLC_resnet50_IvddOct30shuffle1_100000.csv -> ivdd1_15DLC_resnet5