In [None]:
# -*- coding: utf-8 -*-
"""
IVDD binary evaluation with 3 keypoints (left back paw, right back paw, tail set)

- 学習と同じ NORM_MODE を選択（"zscore" / "tailset_minmax"）
- 入力: test/eval_csv/*.csv
- 出力: test/eval_outputs/YYYYMMDD-HHMMSS/ 配下に一式
- モデル出力が 1ロジットでも2ロジットでも自動で処理
"""

import os
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "-1")
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")

import re, glob
from datetime import datetime
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

print("TF:", tf.__version__)
tf.random.set_seed(42); np.random.set_seed(42)

# ====== パス設定（必要に応じて変更） ======
TEST_ROOT = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\ivdd\test"
CSV_DIR   = os.path.join(TEST_ROOT, "eval_csv")
CSV_GLOB  = os.path.join(CSV_DIR, "*.csv")

OUT_BASE  = os.path.join(TEST_ROOT, "eval_outputs")
RUN_ID    = datetime.now().strftime("%YMMDD-%H%M%S").replace("M","m")  # YYYYMMDD-HHMMSS
OUT_DIR   = os.path.join(OUT_BASE, RUN_ID)
os.makedirs(OUT_DIR, exist_ok=True)

# 学習済みモデルの .keras（上の学習スクリプトの出力を指定）
CKPT_PATH = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\ivdd\train\train3_model\ivdd_lstm_20251211-120000_best.keras"

# ====== 設定 ======
KEYPOINTS = [
    "left back paw",
    "right back paw",
    "tail set",
]
USE_LIKELIHOOD      = False
MIN_KEEP_LIKELIHOOD = 0.6

SEQ_LEN = 60
STRIDE  = 30
DIMS    = 6

# 学習と同じ正規化
NORM_MODE = "zscore"           # train1 相当
# NORM_MODE = "tailset_minmax" # train2 相当

CLASS_NAMES  = ["ivdd", "normal"]    # 表示順（AUCや図は ivdd を陽性扱い）
CLASS_TO_IDX = {"ivdd":0, "normal":1}
NAME_TO_CLASSIDX = {"ivdd":0, "normal":1}

# ====== 前処理系（3KP版） ======
def infer_label_from_filename(path: str) -> int:
    name = os.path.basename(path).lower()
    stem = os.path.splitext(name)[0]
    tokens = [t for t in re.split(r'[^a-z0-9]+', stem) if t]
    token_set = set(tokens)
    if ('ivdd' in token_set) and ('normal' not in token_set): return CLASS_TO_IDX["ivdd"]
    if ('normal' in token_set) and ('ivdd' not in token_set): return CLASS_TO_IDX["normal"]
    def looks_like(label, tok): return re.fullmatch(rf"{label}\d*", tok) is not None
    ivdd_like   = any(looks_like("ivdd", t) for t in tokens)
    normal_like = any(looks_like("normal", t) for t in tokens)
    if ivdd_like and not normal_like: return CLASS_TO_IDX["ivdd"]
    if normal_like and not ivdd_like: return CLASS_TO_IDX["normal"]
    if tokens and tokens[0] in CLASS_TO_IDX: return CLASS_TO_IDX[tokens[0]]
    parent_tokens = [t for t in re.split(r'[^a-z0-9]+', os.path.dirname(path).lower()) if t]
    pset = set(parent_tokens)
    if ('ivdd' in pset) and ('normal' not in pset): return CLASS_TO_IDX["ivdd"]
    if ('normal' in pset) and ('ivdd' not in pset): return CLASS_TO_IDX["normal"]
    raise ValueError(f"ラベル不明: {name}（'ivdd_' / 'normal_' または 'ivdd1_' 等の形式を推奨）")

def _norm_name(s: str) -> str:
    return "".join(ch for ch in s.lower() if ch not in " _-")

def _resolve_keypoints(all_bodyparts, requested):
    norm2orig = {}
    for bp in all_bodyparts:
        k = _norm_name(bp)
        norm2orig.setdefault(k, bp)
    resolved, missing = [], []
    for req in requested:
        k = _norm_name(req)
        if k in norm2orig: resolved.append(norm2orig[k])
        else: missing.append(req)
    if missing:
        raise ValueError(f"指定キーポイントがCSVで見つかりません: {missing}\n利用可能: {all_bodyparts}")
    return resolved

def read_dlc_3kp_xy(csv_path, keypoints, use_likelihood=True, min_keep_likelihood=0.6):
    df = pd.read_csv(csv_path, header=[0,1,2], index_col=0)
    bodyparts = list({bp for (_, bp, _) in df.columns})
    use_kps = _resolve_keypoints(bodyparts, keypoints)
    cols = {}
    for bp in use_kps:
        cols[f"{bp}_x"] = df.xs((bp,"x"), level=[1,2], axis=1)
        cols[f"{bp}_y"] = df.xs((bp,"y"), level=[1,2], axis=1)
    X_df = pd.concat(cols.values(), axis=1); X_df.columns = list(cols.keys())
    if use_likelihood:
        for bp in use_kps:
            try:
                lcol = df.xs((bp,"likelihood"), level=[1,2], axis=1).values.flatten()
                low = lcol < min_keep_likelihood
                for c in [f"{bp}_x", f"{bp}_y"]:
                    v = X_df[c].values; v[low] = np.nan; X_df[c] = v
            except KeyError: pass
    X_df = X_df.interpolate(method="linear", limit_direction="both", axis=0)
    X_df = X_df.bfill().ffill().fillna(0.0)
    return X_df.values.astype(np.float32), use_kps

def zscore_per_file(X: np.ndarray, eps: float=1e-6) -> np.ndarray:
    mu = X.mean(axis=0, keepdims=True)
    sd = X.std(axis=0, keepdims=True)
    return (X - mu) / (sd + eps)

def normalize_tailset_minmax(X: np.ndarray, used_kps: list[str], ref_name="tail set", eps: float=1e-6):
    low = [s.lower() for s in used_kps]
    if ref_name.lower() not in low:
        raise ValueError(f"'{ref_name}' が used_kps に見つかりません: {used_kps}")
    ref_idx = low.index(ref_name.lower())
    cx = X[:, 2*ref_idx]; cy = X[:, 2*ref_idx + 1]
    Xc = X.copy()
    for i in range(len(used_kps)):
        Xc[:, 2*i]   -= cx
        Xc[:, 2*i+1] -= cy
    x_min = Xc.min(axis=0, keepdims=True)
    x_max = Xc.max(axis=0, keepdims=True)
    return (Xc - x_min) / (x_max - x_min + eps)

def apply_normalization(X_raw, used_kps):
    if NORM_MODE == "zscore":
        return zscore_per_file(X_raw)
    elif NORM_MODE == "tailset_minmax":
        return normalize_tailset_minmax(X_raw, used_kps, ref_name="tail set")
    else:
        raise ValueError(f"未知の NORM_MODE: {NORM_MODE}")

def make_windows(X: np.ndarray, seq_len: int, stride: int):
    n = X.shape[0]
    if n < seq_len:
        return np.empty((0, seq_len, X.shape[1]), dtype=X.dtype), []
    starts = list(range(0, n - seq_len + 1, stride))
    Xw = np.stack([X[s:s+seq_len] for s in starts], axis=0)
    return Xw, starts

def build_dataset(csv_paths, seq_len=SEQ_LEN, stride=STRIDE):
    X_list, y_list, file_ids, starts_all = [], [], [], []
    used_kps_any = None
    for p in csv_paths:
        y_lab = infer_label_from_filename(p)
        X_raw, used_kps = read_dlc_3kp_xy(p, KEYPOINTS, USE_LIKELIHOOD, MIN_KEEP_LIKELIHOOD)
        if used_kps_any is None: used_kps_any = used_kps
        if X_raw.shape[1] != DIMS:
            raise ValueError(f"{os.path.basename(p)}: 次元 {X_raw.shape[1]} != 期待 {DIMS}")
        X_norm = apply_normalization(X_raw, used_kps)
        X_win, starts = make_windows(X_norm, seq_len, stride)
        if X_win.shape[0] == 0:
            print(f"[WARN] {os.path.basename(p)}: フレーム不足（{seq_len}未満）でスキップ")
            continue
        X_list.append(X_win)
        y_list.append(np.full((X_win.shape[0],), y_lab, dtype=np.int64))
        file_ids += [os.path.basename(p)] * X_win.shape[0]
        starts_all += starts
    if not X_list:
        raise RuntimeError("評価用CSVがありません。test/eval_csv に CSV を置き、命名規則（ivdd/normal）を確認してください。")
    X = np.concatenate(X_list, axis=0)
    y = np.concatenate(y_list, axis=0)
    return X, y, np.array(file_ids), np.array(starts_all)

# ====== 入力収集 ======
csv_files = sorted(glob.glob(CSV_GLOB))
if not csv_files:
    raise FileNotFoundError(f"評価用CSVが見つかりません: {CSV_GLOB}")

X, y, file_ids, starts = build_dataset(csv_files, SEQ_LEN, STRIDE)
print("X:", X.shape, "y:", y.shape, "files:", len(np.unique(file_ids)))

# ====== モデル読込 & 推論 ======
model = keras.models.load_model(CKPT_PATH, compile=False)
logits = np.asarray(model.predict(X, batch_size=64))

# 1ロジットか2ロジットかを判別
if logits.ndim == 1 or logits.shape[1] == 1:
    p_ivdd   = tf.math.sigmoid(logits).numpy().reshape(-1)
    p_normal = 1.0 - p_ivdd
    probs    = np.stack([p_ivdd, p_normal], axis=1)  # 列順: [ivdd, normal]
    y_pred   = (p_ivdd >= 0.5).astype(int)           # 1=ivdd, 0=normal
else:
    probs    = tf.nn.softmax(logits, axis=1).numpy()
    p_ivdd   = probs[:, CLASS_TO_IDX["ivdd"]]
    p_normal = probs[:, CLASS_TO_IDX["normal"]]
    y_pred   = probs.argmax(axis=1)

# ====== 保存パス ======
paths = {
    "win_csv":           os.path.join(OUT_DIR, f"window_predictions_{RUN_ID}.csv"),
    "roc_png":           os.path.join(OUT_DIR, f"roc_window_{RUN_ID}.png"),
    "cm_window_png":     os.path.join(OUT_DIR, f"cm_window_{RUN_ID}.png"),
    "file_pred_csv":     os.path.join(OUT_DIR, f"file_level_predictions_{RUN_ID}.csv"),
    "file_errors_csv":   os.path.join(OUT_DIR, f"file_level_errors_{RUN_ID}.csv"),
    "cm_file_major_png": os.path.join(OUT_DIR, f"cm_file_majority_{RUN_ID}.png"),
    "cm_file_mean_png":  os.path.join(OUT_DIR, f"cm_file_meanprob_{RUN_ID}.png"),
}

# ====== ウィンドウ単位出力 ======
df_win = pd.DataFrame({
    "file": file_ids, "start": starts, "true": y, "pred": y_pred,
    "p_ivdd": p_ivdd, "p_normal": p_normal,
}).sort_values(["file","start"])
df_win.to_csv(paths["win_csv"], index=False, encoding="utf-8-sig")
print("[INFO] 保存:", paths["win_csv"])

print("\n[Window-level] classification_report:")
print(classification_report(y, y_pred, target_names=["ivdd","normal"], digits=4))
cm = confusion_matrix(y, y_pred, labels=[CLASS_TO_IDX["ivdd"], CLASS_TO_IDX["normal"]])
print("[Window-level] confusion matrix:\n", cm)

# ====== ROC（ivddを陽性） ======
try:
    y_pos = (y == CLASS_TO_IDX["ivdd"]).astype(int)
    auc_val = roc_auc_score(y_pos, p_ivdd)
    fpr, tpr, _ = roc_curve(y_pos, p_ivdd, pos_label=1)
    plt.figure(figsize=(6,6))
    plt.plot(fpr, tpr, label=f"AUC = {auc_val:.3f}")
    plt.plot([0,1], [0,1], linestyle="--")
    plt.title("ROC (window-level)")
    plt.xlabel("False Positive Rate"); plt.ylabel("True Positive Rate")
    plt.legend(); plt.tight_layout(); plt.savefig(paths["roc_png"], dpi=150); plt.close()
    print("[INFO] 保存:", paths["roc_png"])
except Exception as e:
    print(f"[WARN] ROC プロット失敗: {e}")

# ====== 可視化共通 ======
def plot_cm(cm_mat, labels, title, path_png):
    cmn = cm_mat.astype(np.float32) / (cm_mat.sum() + 1e-6)
    fig, ax = plt.subplots(figsize=(6,6))
    im = ax.imshow(cmn, interpolation='nearest', cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(len(labels)), yticks=np.arange(len(labels)),
           xticklabels=labels, yticklabels=labels,
           ylabel='True label', xlabel='Predicted label', title=title)
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
    for i in range(cm_mat.shape[0]):
        for j in range(cm_mat.shape[1]):
            ax.text(j, i, int(cm_mat[i, j]), ha="center",
                    color="white" if cmn[i, j] > cmn.max()/2 else "black", fontsize=12)
    plt.tight_layout(); plt.savefig(path_png, dpi=150); plt.close()

plot_cm(cm, ["ivdd","normal"], "Confusion Matrix (window-level)", paths["cm_window_png"])
print("[INFO] 保存:", paths["cm_window_png"])

# ====== ファイル単位集計 ======
major_pred = df_win.groupby("file")["pred"].agg(lambda a: np.bincount(a).argmax())
true_file  = df_win.groupby("file")["true"].first()
mean_prob  = df_win.groupby("file")[["p_ivdd","p_normal"]].mean()
mean_pred  = mean_prob.values.argmax(axis=1)

df_file = pd.DataFrame({
    "file": mean_prob.index,
    "true": ["ivdd" if t==0 else "normal" for t in true_file.values],  # y が [ivdd=0, normal=1]
    "pred_majority": ["ivdd" if p==0 else "normal" for p in major_pred.values],
    "pred_meanprob": ["ivdd" if p==0 else "normal" for p in mean_pred],
    "p_ivdd_mean": mean_prob["p_ivdd"].values,
    "p_normal_mean": mean_prob["p_normal"].values,
})
df_file.to_csv(paths["file_pred_csv"], index=False, encoding="utf-8-sig")
print("[INFO] 保存:", paths["file_pred_csv"])

df_errors = df_file[df_file["true"] != df_file["pred_meanprob"]].copy()
df_errors.to_csv(paths["file_errors_csv"], index=False, encoding="utf-8-sig")
print("[INFO] 保存:", paths["file_errors_csv"])

# ファイル単位CM
def _to_idx(series):
    return series.map({"ivdd":0, "normal":1}).values
cm_major = confusion_matrix(_to_idx(df_file["true"]), _to_idx(df_file["pred_majority"]), labels=[0,1])
cm_mean  = confusion_matrix(_to_idx(df_file["true"]), _to_idx(df_file["pred_meanprob"]), labels=[0,1])
plot_cm(cm_major, ["ivdd","normal"], "Confusion Matrix (File-level, Majority Vote)", paths["cm_file_major_png"])
plot_cm(cm_mean,  ["ivdd","normal"], "Confusion Matrix (File-level, Mean Probability)", paths["cm_file_mean_png"])
print("[INFO] 保存:", paths["cm_file_major_png"])
print("[INFO] 保存:", paths["cm_file_mean_png"])

print("\n[Done] 出力先:", OUT_DIR)
