In [5]:
# -*- coding: utf-8 -*-
"""
Preprocess DLC CSVs:
- Fix tail_set to absolute (640,360) every frame (anchor-translate)
- Also add per-dimension min-max normalized relative coords as nx,ny (0..1)
- (Optional) mask out frames where tail_set likelihood < threshold (no interpolation)
- Save processed CSVs to a 'process' directory.

This keeps DLC-style 3-level header. Overwrites x,y with *anchored absolute* coords
(tail_set becomes exactly (640,360)); adds normalized relative coords as nx,ny columns.
"""

import os
import glob
import numpy as np
import pandas as pd

# ====== パス設定（必要に応じて書き換え） ======
INPUT_DIR   = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\train\train_csv"
OUTPUT_DIR  = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\process"
CSV_GLOB    = "*.csv"   # 例: "*.csv" / "normal_*.csv" など

# ====== 前処理パラメータ ======
TAIL_NAME            = "tail_set"    # DLC上の表記に合わせる（大文字小文字/空白は自動吸収）
CENTER_X, CENTER_Y   = 640.0, 360.0  # 固定中央座標
USE_MASK_LOW_LIK     = False          # 外れ値フレーム（低likelihood）の除外を使うか
LIK_THRESHOLD        = 0.60          # tail_set の閾値
EPS                  = 1e-6          # 0除算回避

def _norm_name(s: str) -> str:
    return "".join(ch for ch in s.lower() if ch not in " _-")

def _resolve_tail_index(used_kps, tail_name="tail_set"):
    L = [s.lower() for s in used_kps]
    lname = tail_name.lower()
    if lname not in L:
        raise ValueError(f"'tail_set' が見つかりません: 指定='{tail_name}', 利用可能={used_kps}")
    return L.index(lname)

def load_dlc_csv(path: str):
    """DLC 3段ヘッダCSVを読み込み、ボディパーツ一覧も返す。"""
    df = pd.read_csv(path, header=[0,1,2], index_col=0)
    bodyparts = list({bp for (_, bp, _) in df.columns})
    return df, bodyparts

def to_xy_arrays(df: pd.DataFrame, bodyparts: list[str]) -> tuple[np.ndarray, np.ndarray, list[str], dict]:
    """
    DLC 3段ヘッダから x,y（T×(2B)）を抽出して返す。
    返り値: X(=x,y交互), L(likelihood or None), used_kps, col_map
    col_map: {"x": {(bp)->Series}, "y": {(bp)->Series}, "lik": {(bp)->Series or None}}
    """
    Xcols, Ycols, Lcols = {}, {}, {}
    for bp in bodyparts:
        Xcols[bp] = df.xs((bp, "x"), level=[1,2], axis=1)
        Ycols[bp] = df.xs((bp, "y"), level=[1,2], axis=1)
        try:
            Lcols[bp] = df.xs((bp, "likelihood"), level=[1,2], axis=1)
        except KeyError:
            Lcols[bp] = None

    used_kps = bodyparts
    # 結合（列順: bp0_x, bp0_y, bp1_x, bp1_y, ...）
    arr_list = []
    for bp in used_kps:
        arr_list.append(Xcols[bp].values.reshape(-1,1))
        arr_list.append(Ycols[bp].values.reshape(-1,1))
    XY = np.concatenate(arr_list, axis=1).astype(np.float32)

    # likelihood も並べて持つ（辞書のまま返す）
    return XY, None, used_kps, {"x": Xcols, "y": Ycols, "lik": Lcols}

def mask_low_likelihood_rows(df: pd.DataFrame, tail_bp: str, thr: float) -> pd.DataFrame:
    """tail_set の likelihood が閾値未満のフレームを削除（補間なし）。"""
    try:
        lik = df.xs((tail_bp, "likelihood"), level=[1,2], axis=1).values.flatten()
    except KeyError:
        # likelihood 欄が無い場合はマスクなし
        return df
    mask = (lik >= thr)
    kept = mask.sum()
    total = mask.size
    if kept == 0:
        print(f"[WARN] 全フレームが閾値未満です（{tail_bp}, thr={thr}）。元データをそのまま使用します。")
        return df
    if kept < total:
        print(f"[INFO] 低likelihoodフレーム除外: {total-kept}/{total} 行を削除")
    return df.iloc[mask, :]

def anchor_tail_and_add_norm(df: pd.DataFrame, bodyparts: list[str], tail_name=TAIL_NAME,
                             cx=CENTER_X, cy=CENTER_Y, eps=EPS) -> pd.DataFrame:
    """
    (1) tail_set を各フレーム原点 → 全KPを tail_set 相対に移動 → (x+cx, y+cy) に再配置
    (2) 相対座標（tail基準）を各次元 min-max 正規化して nx,ny 列として追加（0..1）
    """
    # 現在の x,y,lik を抽出
    Xcols, Ycols, Lcols = {}, {}, {}
    for bp in bodyparts:
        Xcols[bp] = df.xs((bp, "x"), level=[1,2], axis=1).astype(np.float32).copy()
        Ycols[bp] = df.xs((bp, "y"), level=[1,2], axis=1).astype(np.float32).copy()
        try:
            Lcols[bp] = df.xs((bp, "likelihood"), level=[1,2], axis=1)
        except KeyError:
            Lcols[bp] = None

    # tail_set のインデックス
    t_idx = _resolve_tail_index(bodyparts, tail_name)
    t_bp  = bodyparts[t_idx]
    tx = Xcols[t_bp].values.flatten()
    ty = Ycols[t_bp].values.flatten()

    # --- 相対座標（tail基準）を作る ---
    rel_arrays = []
    for bp in bodyparts:
        rx = Xcols[bp].values.flatten() - tx
        ry = Ycols[bp].values.flatten() - ty
        rel_arrays.append(rx.reshape(-1,1))
        rel_arrays.append(ry.reshape(-1,1))
    REL = np.concatenate(rel_arrays, axis=1)  # (T, 2B)

    # --- min-max 正規化（列毎） ---
    rmin = REL.min(axis=0, keepdims=True)
    rmax = REL.max(axis=0, keepdims=True)
    REL_N = (REL - rmin) / (rmax - rmin + eps)

    # --- アンカー後の絶対座標（x,y上書き）---
    #   tail_set を (cx,cy) に、他は tail からの差分を加算
    T = df.shape[0]
    for i, bp in enumerate(bodyparts):
        rx = REL[:, 2*i    ].reshape(-1)
        ry = REL[:, 2*i + 1].reshape(-1)
        Xcols[bp].iloc[:,0] = cx + rx
        Ycols[bp].iloc[:,0] = cy + ry

    # DLC 3段ヘッダで新しい DataFrame を再構築（x,y は上書き済み）
    out_cols = []
    data_blocks = []
    # まず x,y を入れる
    for bp in bodyparts:
        out_cols.append(("proc", bp, "x"))
        data_blocks.append(Xcols[bp].values.reshape(-1,1))
        out_cols.append(("proc", bp, "y"))
        data_blocks.append(Ycols[bp].values.reshape(-1,1))
        # 可能なら likelihood も残す
        if Lcols[bp] is not None:
            out_cols.append(("proc", bp, "likelihood"))
            data_blocks.append(Lcols[bp].values.reshape(-1,1))

    # つぎに正規化 nx,ny を追加（列名は nx,ny として既存学習コードと衝突しない）
    for i, bp in enumerate(bodyparts):
        out_cols.append(("proc", bp, "nx"))
        data_blocks.append(REL_N[:, 2*i    ].reshape(-1,1))
        out_cols.append(("proc", bp, "ny"))
        data_blocks.append(REL_N[:, 2*i + 1].reshape(-1,1))

    OUT = np.concatenate(data_blocks, axis=1)
    out_df = pd.DataFrame(OUT, index=df.index)
    out_df.columns = pd.MultiIndex.from_tuples(out_cols)

    return out_df

def process_one_csv(in_path: str, out_dir: str,
                    tail_name=TAIL_NAME,
                    use_mask=USE_MASK_LOW_LIK,
                    lik_thr=LIK_THRESHOLD) -> str:
    os.makedirs(out_dir, exist_ok=True)

    df, bodyparts = load_dlc_csv(in_path)

    # 低likelihoodフレームの除外（任意）
    if use_mask:
        df = mask_low_likelihood_rows(df, tail_name, lik_thr)

    # アンカー + 正規化列の追加
    out_df = anchor_tail_and_add_norm(df, bodyparts, tail_name,
                                      cx=CENTER_X, cy=CENTER_Y, eps=EPS)

    # 保存
    base = os.path.splitext(os.path.basename(in_path))[0]
    out_path = os.path.join(out_dir, f"{base}_proc.csv")
    out_df.to_csv(out_path, encoding="utf-8-sig")
    print(f"[OK] {os.path.basename(in_path)} -> {out_path}")
    return out_path

def main():
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    files = sorted(glob.glob(os.path.join(INPUT_DIR, CSV_GLOB)))
    if not files:
        raise FileNotFoundError(f"入力CSVが見つかりません: {os.path.join(INPUT_DIR, CSV_GLOB)}")

    for p in files:
        try:
            process_one_csv(p, OUTPUT_DIR, tail_name=TAIL_NAME,
                            use_mask=USE_MASK_LOW_LIK, lik_thr=LIK_THRESHOLD)
        except Exception as e:
            print(f"[ERROR] {os.path.basename(p)}: {e}")

if __name__ == "__main__":
    main()


[OK] normal_100DLC_resnet152_sotuken1Dec17shuffle1_150000.csv -> C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\process\normal_100DLC_resnet152_sotuken1Dec17shuffle1_150000_proc.csv
[OK] normal_101DLC_resnet152_sotuken1Dec17shuffle1_150000.csv -> C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\process\normal_101DLC_resnet152_sotuken1Dec17shuffle1_150000_proc.csv
[OK] normal_102DLC_resnet152_sotuken1Dec17shuffle1_150000.csv -> C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\process\normal_102DLC_resnet152_sotuken1Dec17shuffle1_150000_proc.csv
[OK] normal_103DLC_resnet152_sotuken1Dec17shuffle1_150000.csv -> C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recogniti