In [13]:
# -*- coding: utf-8 -*-
"""
Binary IVDD validation (ivdd vs normal) from DeepLabCut CSV (3-level header)

- 学習と同じ前処理を選択可能: NORM_MODE = "zscore" (train1) or "tailset_minmax" (train2)
- 5 keypoints × (x,y) = 10 dims
- 窓長/ストライドは学習と合わせる
- 学習済み .keras を読み込んで、評価・混同行列・ROC・各種CSV出力
- 出力は test/eval_outputs/YYYYMMDD-HHMMSS/ 配下に保存（eval_csv 配下には作られません）
- 単一ロジット（sigmoid）/ 2ロジット（softmax）を自動判定
"""

import os
# 安定運用（任意）
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "-1")
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")

import re
import glob
from datetime import datetime
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import (
    classification_report, confusion_matrix,
    roc_auc_score, roc_curve
)
import matplotlib.pyplot as plt

print("TF:", tf.__version__)
tf.random.set_seed(42)
np.random.seed(42)

# ========= パラメータ =========
# ルート（test 直下を指す）※ここだけ環境に合わせてください
TEST_ROOT = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\test"

# 評価用CSV（eval_csv 配下）
CSV_DIR   = os.path.join(TEST_ROOT, "eval_csv")
CSV_GLOB  = os.path.join(CSV_DIR, "*.csv")

# 出力先（eval_outputs/YYYYMMDD-HHMMSS 配下に全部出す）
OUT_BASE  = os.path.join(TEST_ROOT, "eval_outputs")
RUN_ID    = datetime.now().strftime("%Y%m%d-%H%M%S")
OUT_DIR   = os.path.join(OUT_BASE, RUN_ID)
os.makedirs(OUT_DIR, exist_ok=True)

# 学習済みモデルのパス（例：train2 の best）
CKPT_PATH = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\train\train1_model\ivdd_lstm_20251215-114108_best.keras"

# キーポイント（順序が (x,y) の並び順に反映）
KEYPOINTS = [
    "left back paw",
    "right back paw",
    "left front paw",
    "right front paw",
    "tail set",
]

# 正規化のモード: "zscore"（train1） or "tailset_minmax"（train2）
NORM_MODE = "zscore"          # ← train1 に合わせる場合
# NORM_MODE = "tailset_minmax"  # ← train2 に合わせる場合

# DLC likelihood 閾値（低信頼点の補間に使う）
USE_LIKELIHOOD      = False
MIN_KEEP_LIKELIHOOD = 0.6

# 窓長とストライド（学習と合わせる）
SEQ_LEN  = 60
STRIDE   = 30

# 次元・ネットワーク・ラベル
DIMS          = 10  # 5点×(x,y)
N_HIDDEN      = 30
CLASS_NAMES   = ["ivdd", "normal"]           # 0=ivdd, 1=normal
CLASS_TO_IDX  = {c: i for i, c in enumerate(CLASS_NAMES)}
IDX_TO_CLASS  = {v: k for k, v in CLASS_TO_IDX.items()}

# ========= ラベル推定（ファイル名から） =========
def infer_label_from_filename(path: str) -> int:
    """
    ファイル名からラベルを推定。
    ・厳密一致 'ivdd' / 'normal' を最優先
    ・次点で 'ivdd\\d*' / 'normal\\d*'（ivdd1, normal2 等）
    ・未確定なら先頭トークンが 'ivdd' / 'normal'
    ・さらに未確定なら親ディレクトリのトークンを見る（片方のみヒット）

    'IvddOct30' のような文字付き接頭辞は ivdd と見なしません（誤検出回避）。
    """
    name = os.path.basename(path).lower()
    stem = os.path.splitext(name)[0]
    tokens = [t for t in re.split(r'[^a-z0-9]+', stem) if t]
    token_set = set(tokens)

    # 1) 厳密一致
    if ('ivdd' in token_set) and ('normal' not in token_set):
        return CLASS_TO_IDX["ivdd"]
    if ('normal' in token_set) and ('ivdd' not in token_set):
        return CLASS_TO_IDX["normal"]

    # 2) ivdd[digits]* / normal[digits]*
    def looks_like(label: str, tok: str) -> bool:
        # label + digits のみ許可（例: ivdd1, normal10 は可 / ivddoct は不可）
        return re.fullmatch(rf"{label}\d*", tok) is not None

    ivdd_like   = any(looks_like("ivdd", t) for t in tokens)
    normal_like = any(looks_like("normal", t) for t in tokens)
    if ivdd_like and not normal_like:
        return CLASS_TO_IDX["ivdd"]
    if normal_like and not ivdd_like:
        return CLASS_TO_IDX["normal"]

    # 3) 先頭トークン
    if tokens and tokens[0] in CLASS_TO_IDX:
        return CLASS_TO_IDX[tokens[0]]

    # 4) 親ディレクトリ
    parent_tokens = [t for t in re.split(r'[^a-z0-9]+', os.path.dirname(path).lower()) if t]
    pset = set(parent_tokens)
    if ('ivdd' in pset) and ('normal' not in pset):
        return CLASS_TO_IDX["ivdd"]
    if ('normal' in pset) and ('ivdd' not in pset):
        return CLASS_TO_IDX["normal"]

    raise ValueError(f"ラベル不明: {name}（先頭を ivdd_ / normal_ 推奨。'ivdd1_' 等も可）")

# ========= DLC 前処理 =========
def _norm_name(s: str) -> str:
    return "".join(ch for ch in s.lower() if ch not in " _-")

def _resolve_keypoints(all_bodyparts, requested):
    norm2orig = {}
    for bp in all_bodyparts:
        k = _norm_name(bp)
        if k not in norm2orig:
            norm2orig[k] = bp
    resolved, missing = [], []
    for req in requested:
        k = _norm_name(req)
        if k in norm2orig:
            resolved.append(norm2orig[k])
        else:
            missing.append(req)
    if missing:
        raise ValueError(f"指定キーポイントがCSVで見つかりません: {missing}\n利用可能: {all_bodyparts}")
    return resolved

def read_dlc_5kp_xy(csv_path: str,
                    keypoints,
                    use_likelihood=True,
                    min_keep_likelihood=0.6):
    """
    DLC(3段ヘッダ) -> 指定5点 (x,y) 抽出 -> 低likelihood NaN -> 補間 -> 0埋め
    返: (T,10), used_kps(list[str])
    """
    df = pd.read_csv(csv_path, header=[0,1,2], index_col=0)
    bodyparts = list({bp for (_, bp, _) in df.columns})
    use_kps = _resolve_keypoints(bodyparts, keypoints)

    cols = {}
    for bp in use_kps:
        cols[f"{bp}_x"] = df.xs((bp, "x"), level=[1,2], axis=1)
        cols[f"{bp}_y"] = df.xs((bp, "y"), level=[1,2], axis=1)
    X_df = pd.concat(cols.values(), axis=1)
    X_df.columns = list(cols.keys())

    if use_likelihood:
        for bp in use_kps:
            try:
                lcol = df.xs((bp, "likelihood"), level=[1,2], axis=1).values.flatten()
                low = lcol < min_keep_likelihood
                for c in [f"{bp}_x", f"{bp}_y"]:
                    v = X_df[c].values
                    v[low] = np.nan
                    X_df[c] = v
            except KeyError:
                pass

    X_df = X_df.interpolate(method="linear", limit_direction="both", axis=0)
    X_df = X_df.bfill().ffill().fillna(0.0)

    X = X_df.values.astype(np.float32)
    return X, use_kps

# --- 正規化 ---
def zscore_per_file(X: np.ndarray, eps: float=1e-6) -> np.ndarray:
    mu = X.mean(axis=0, keepdims=True)
    sd = X.std(axis=0, keepdims=True)
    return (X - mu) / (sd + eps)

def normalize_tailset_minmax(
    X: np.ndarray, used_kps: list[str], ref_name: str = "tail set", eps: float = 1e-6
) -> np.ndarray:
    low = [s.lower() for s in used_kps]
    if ref_name.lower() not in low:
        raise ValueError(f"'{ref_name}' が used_kps に見つかりません: {used_kps}")
    ref_idx = low.index(ref_name.lower())

    # 平行移動（tail_set を原点へ）
    cx = X[:, 2*ref_idx]
    cy = X[:, 2*ref_idx + 1]
    Xc = X.copy()
    for i in range(len(used_kps)):
        Xc[:, 2*i]   -= cx
        Xc[:, 2*i+1] -= cy

    # min-max（各次元独立）
    x_min = Xc.min(axis=0, keepdims=True)
    x_max = Xc.max(axis=0, keepdims=True)
    Xn = (Xc - x_min) / (x_max - x_min + eps)
    return Xn

def apply_normalization(X_raw: np.ndarray, used_kps: list[str]) -> np.ndarray:
    if NORM_MODE == "zscore":
        return zscore_per_file(X_raw)
    elif NORM_MODE == "tailset_minmax":
        return normalize_tailset_minmax(X_raw, used_kps, ref_name="tail set")
    else:
        raise ValueError(f"未知の NORM_MODE: {NORM_MODE}")

# --- 窓切り ---
def make_windows(X: np.ndarray, seq_len: int, stride: int):
    n = X.shape[0]
    if n < seq_len:
        return np.empty((0, seq_len, X.shape[1]), dtype=X.dtype), []
    starts = list(range(0, n - seq_len + 1, stride))
    Xw = np.stack([X[s:s+seq_len] for s in starts], axis=0)
    return Xw, starts

def build_dataset(csv_paths, seq_len=SEQ_LEN, stride=STRIDE):
    X_list, y_list, file_ids, starts_all = [], [], [], []
    used_kps_any = None
    for p in csv_paths:
        y_lab = infer_label_from_filename(p)   # 0/1
        X_raw, used_kps = read_dlc_5kp_xy(
            p, keypoints=KEYPOINTS,
            use_likelihood=USE_LIKELIHOOD, min_keep_likelihood=MIN_KEEP_LIKELIHOOD
        )
        if used_kps_any is None:
            used_kps_any = used_kps
        if X_raw.shape[1] != DIMS:
            raise ValueError(f"{os.path.basename(p)}: 次元 {X_raw.shape[1]} != 期待 {DIMS}")

        X_norm = apply_normalization(X_raw, used_kps)
        X_win, starts = make_windows(X_norm, seq_len, stride)
        if X_win.shape[0] == 0:
            print(f"[WARN] {os.path.basename(p)}: フレーム不足（{seq_len}未満）でスキップ")
            continue

        X_list.append(X_win)
        y_list.append(np.full((X_win.shape[0],), y_lab, dtype=np.int64))
        file_ids += [os.path.basename(p)] * X_win.shape[0]
        starts_all += starts

    if not X_list:
        raise RuntimeError("評価用データが作れませんでした。eval_csv に CSV があるか、命名規則（ivdd/normal）を確認してください。")

    X = np.concatenate(X_list, axis=0)
    y = np.concatenate(y_list, axis=0)
    file_ids = np.array(file_ids)
    starts_all = np.array(starts_all)
    print(f"[INFO] 使用キーポイント: {used_kps_any}")
    return X, y, file_ids, starts_all

# ========= モデル読込 =========
class LSTM_RNN(keras.Model):
    def __init__(self, n_input, n_hidden, n_classes):
        super().__init__()
        self.input_dense = keras.layers.Dense(n_hidden, activation='relu')
        self.time_dist   = keras.layers.TimeDistributed(self.input_dense)
        self.lstm1 = keras.layers.LSTM(n_hidden, return_sequences=True)
        self.lstm2 = keras.layers.LSTM(n_hidden)
        self.out   = keras.layers.Dense(n_classes)  # logits

    def call(self, x, training=False):
        x = self.time_dist(x)
        x = self.lstm1(x, training=training)
        x = self.lstm2(x, training=training)
        x = self.out(x)
        return x

class LSTMWithL2(LSTM_RNN):
    def __init__(self, n_input, n_hidden, n_classes, l2_lambda=1e-4):
        super().__init__(n_input, n_hidden, n_classes)
        self.l2_lambda = l2_lambda

def load_trained_model():
    try:
        m = keras.models.load_model(
            CKPT_PATH,
            custom_objects={"LSTMWithL2": LSTMWithL2, "LSTM_RNN": LSTM_RNN},
            compile=False
        )
        print(f"[INFO] Loaded full model: {CKPT_PATH}")
        return m
    except Exception as e:
        print(f"[WARN] load_model 失敗: {e}")
        # weights のみ保存されていた場合のフォールバック
        m = LSTMWithL2(n_input=DIMS, n_hidden=N_HIDDEN, n_classes=2)
        _ = m(tf.zeros([1, SEQ_LEN, DIMS]))
        try:
            m.load_weights(CKPT_PATH)
            print(f"[INFO] Loaded weights into fresh model from: {CKPT_PATH}")
            return m
        except Exception as e2:
            raise RuntimeError(f"モデルの読み込みに失敗しました。: {e2}")

# ========= 入力CSVの収集 =========
csv_files = sorted(glob.glob(CSV_GLOB))
if not csv_files:
    raise FileNotFoundError(f"評価用CSVが見つかりません: {CSV_GLOB}")

# ========= データ構築 =========
X, y, file_ids, starts = build_dataset(csv_files, SEQ_LEN, STRIDE)
print("X:", X.shape, "y:", y.shape, "files:", len(np.unique(file_ids)))

# ========= モデル & 推論 =========
model = load_trained_model()
logits = model.predict(X, batch_size=64)
logits = np.asarray(logits)

# 単一ロジット（sigmoid）/ 2ロジット（softmax）を自動判定
if logits.ndim == 1 or logits.shape[1] == 1:
    p_ivdd   = tf.math.sigmoid(logits).numpy().reshape(-1)
    p_normal = 1.0 - p_ivdd
    probs    = np.stack([p_ivdd, p_normal], axis=1)   # 列順を ["ivdd","normal"] に合わせる
    y_pred   = (p_ivdd >= 0.5).astype(int) * CLASS_TO_IDX["ivdd"] + (p_ivdd < 0.5).astype(int) * CLASS_TO_IDX["normal"]
else:
    probs    = tf.nn.softmax(logits, axis=1).numpy()
    p_ivdd   = probs[:, CLASS_TO_IDX["ivdd"]]
    p_normal = probs[:, CLASS_TO_IDX["normal"]]
    y_pred   = probs.argmax(axis=1)

# ========= 保存パス群（すべて RUN_ID 付きファイル名） =========
paths = {
    "win_csv":           os.path.join(OUT_DIR, f"window_predictions_{RUN_ID}.csv"),
    "roc_png":           os.path.join(OUT_DIR, f"roc_window_{RUN_ID}.png"),
    "cm_window_png":     os.path.join(OUT_DIR, f"cm_window_{RUN_ID}.png"),
    "file_pred_csv":     os.path.join(OUT_DIR, f"file_level_predictions_{RUN_ID}.csv"),
    "file_errors_csv":   os.path.join(OUT_DIR, f"file_level_errors_{RUN_ID}.csv"),
    "cm_file_major_png": os.path.join(OUT_DIR, f"cm_file_majority_{RUN_ID}.png"),
    "cm_file_mean_png":  os.path.join(OUT_DIR, f"cm_file_meanprob_{RUN_ID}.png"),
}

# ========= 予測CSV（ウィンドウ） =========
df_win = pd.DataFrame({
    "file": file_ids, "start": starts, "true": y, "pred": y_pred,
    "p_ivdd": p_ivdd, "p_normal": p_normal,
}).sort_values(["file","start"])
df_win.to_csv(paths["win_csv"], index=False, encoding="utf-8-sig")
print("[INFO] 保存:", paths["win_csv"])

# ========= レポート（ウィンドウ単位） =========
print("\n[Window-level] classification_report:")
print(classification_report(y, y_pred, target_names=CLASS_NAMES, digits=4))
cm = confusion_matrix(y, y_pred, labels=[CLASS_TO_IDX["ivdd"], CLASS_TO_IDX["normal"]])
print("[Window-level] confusion matrix:\n", cm)

# ========= ROC（ウィンドウ単位; ivdd を陽性） =========
try:
    y_ivdd = (y == CLASS_TO_IDX["ivdd"]).astype(int)
    auc_val = roc_auc_score(y_ivdd, p_ivdd)
    fpr, tpr, _ = roc_curve(y_ivdd, p_ivdd, pos_label=1)
    plt.figure(figsize=(6,6))
    plt.plot(fpr, tpr, label=f"AUC = {auc_val:.3f}")
    plt.plot([0,1], [0,1], linestyle="--")
    plt.title("ROC (window-level)")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend()
    plt.tight_layout()
    plt.savefig(paths["roc_png"], dpi=150)
    plt.close()
    print("[INFO] 保存:", paths["roc_png"])
except Exception as e:
    print(f"[WARN] ROC プロット失敗: {e}")

# ========= 混同行列プロット（共通関数） =========
def plot_cm(cm_mat, labels, title, path_png):
    cmn = cm_mat.astype(np.float32) / (cm_mat.sum() + 1e-6)
    fig, ax = plt.subplots(figsize=(6,6))
    im = ax.imshow(cmn, interpolation='nearest', cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(len(labels)), yticks=np.arange(len(labels)),
           xticklabels=labels, yticklabels=labels,
           ylabel='True label', xlabel='Predicted label',
           title=title)
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
    # 生カウントを表示
    for i in range(cm_mat.shape[0]):
        for j in range(cm_mat.shape[1]):
            ax.text(j, i, int(cm_mat[i, j]), ha="center",
                    color="white" if cmn[i, j] > cmn.max()/2 else "black", fontsize=12)
    plt.tight_layout()
    plt.savefig(path_png, dpi=150)
    plt.close()

# ウィンドウCM画像
plot_cm(cm, CLASS_NAMES, "Confusion Matrix (window-level)", paths["cm_window_png"])
print("[INFO] 保存:", paths["cm_window_png"])

# ========= ファイル単位の集計 =========
# 多数決（pred の最頻）
major_pred = df_win.groupby("file")["pred"].agg(lambda a: np.bincount(a).argmax())
true_file  = df_win.groupby("file")["true"].first()

# 平均確率 → argmax
mean_prob = df_win.groupby("file")[["p_ivdd", "p_normal"]].mean()
mean_pred = mean_prob.values.argmax(axis=1)

# 予測CSV（ファイル単位）
df_file = pd.DataFrame({
    "file": mean_prob.index,
    "true": [CLASS_NAMES[t] for t in true_file.values],
    "pred_majority": [CLASS_NAMES[p] for p in major_pred.values],
    "pred_meanprob": [CLASS_NAMES[p] for p in mean_pred],
    "p_ivdd_mean": mean_prob["p_ivdd"].values,
    "p_normal_mean": mean_prob["p_normal"].values,
})
df_file.to_csv(paths["file_pred_csv"], index=False, encoding="utf-8-sig")
print("[INFO] 保存:", paths["file_pred_csv"])

# 誤分類リスト
df_errors = df_file[df_file["true"] != df_file["pred_meanprob"]].copy()
df_errors.to_csv(paths["file_errors_csv"], index=False, encoding="utf-8-sig")
print("[INFO] 保存:", paths["file_errors_csv"])

# ========= ファイル単位の混同行列（画像保存） =========
def _to_idx(series, name2idx={"ivdd":0, "normal":1}):
    return series.map(name2idx).values

y_true_f   = _to_idx(df_file["true"])
y_pred_maj = _to_idx(df_file["pred_majority"])
y_pred_mean= _to_idx(df_file["pred_meanprob"])

cm_major = confusion_matrix(y_true_f, y_pred_maj, labels=[0,1])
cm_mean  = confusion_matrix(y_true_f, y_pred_mean, labels=[0,1])

plot_cm(cm_major, CLASS_NAMES, "Confusion Matrix (File-level, Majority Vote)", paths["cm_file_major_png"])
plot_cm(cm_mean,  CLASS_NAMES, "Confusion Matrix (File-level, Mean Probability)", paths["cm_file_mean_png"])
print("[INFO] 保存:", paths["cm_file_major_png"])
print("[INFO] 保存:", paths["cm_file_mean_png"])

print("\n[Done] すべての出力は以下に保存されました：")
print(OUT_DIR)


TF: 2.19.0
[WARN] ivdd1_case1DLC_resnet50_IvddOct30shuffle1_100000.csv: フレーム不足（60未満）でスキップ
[INFO] 使用キーポイント: ['left back paw', 'right back paw', 'left front paw', 'right front paw', 'tail set']
X: (231, 60, 10) y: (231,) files: 38
[WARN] load_model 失敗: <class '__main__.LSTMWithL2'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.

config={'module': None, 'class_name': 'LSTMWithL2', 'config': {'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}}, 'registered_name': 'LSTMWithL2', 'build_config': {'input_shape': [None, 60, 10]}, 'compile_config': None}.

Exception encountered: Unable to revive model from config. When overriding the `get_config()` method, make sure that the returned config contains all items used as arguments in the  constructor to