In [None]:
# -*- coding: utf-8 -*-
"""
Eval (3 keypoints) with a trained .keras model
- NORMALIZE_MODE must match training: "zscore" or "tail_minmax"
- outputs go to: data/test/eval_outputs/<TS>/
"""

import os
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "-1")
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")

import re, glob
from datetime import datetime
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

tf.random.set_seed(42)
np.random.seed(42)

# ====== パス設定 ======
REPO_ROOT = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master"
TRAIN_ROOT = os.path.join(REPO_ROOT, "data", "train")
TEST_ROOT  = os.path.join(REPO_ROOT, "data", "test")

EVAL_CSV_DIR = os.path.join(TEST_ROOT, "eval_csv")
EVAL_GLOB    = os.path.join(EVAL_CSV_DIR, "*.csv")
OUT_BASE     = os.path.join(os.path.dirname(EVAL_CSV_DIR), "eval_outputs")  # sibling of eval_csv
assert os.path.basename(EVAL_CSV_DIR) == "eval_csv", "eval_outputsがeval_csvの内側に作られないよう防御"
os.makedirs(OUT_BASE, exist_ok=True)

RUN_ID   = datetime.now().strftime("%Y%m%d-%H%M%S")
OUT_DIR  = os.path.join(OUT_BASE, RUN_ID)
os.makedirs(OUT_DIR, exist_ok=True)

# ====== モデルの場所 ======
# 明示的に指定するならここを編集（例: bestモデルのフルパス）
CKPT_PATH = ""  # "" の場合は最新モデルを自動探索

# 学習モードに合わせる
NORMALIZE_MODE = "tail_minmax"  # or "zscore"

# ====== モデル自動探索（CKPT_PATHが空のとき） ======
def _latest_keras_in(dir_path: str) -> str | None:
    files = glob.glob(os.path.join(dir_path, "*.keras"))
    if not files:
        return None
    files.sort(key=lambda p: os.path.getmtime(p), reverse=True)
    return files[0]

if CKPT_PATH == "":
    cand = _latest_keras_in(os.path.join(TRAIN_ROOT, "train1_model"))
    if cand is None:
        cand = _latest_keras_in(os.path.join(TRAIN_ROOT, "train2_model"))
    if cand is None:
        raise FileNotFoundError("最新モデル(.keras)が見つかりませんでした。CKPT_PATHを明示してください。")
    CKPT_PATH = cand

print("[INFO] using model:", CKPT_PATH)

# ====== データ設定 ======
KEYPOINTS = ["left back paw", "right back paw", "tail set"]
USE_LIKELIHOOD      = False
MIN_KEEP_LIKELIHOOD = 0.6
SEQ_LEN  = 60
STRIDE   = 30
DIMS     = 6

CLASS_NAMES = ["normal", "ivdd"]
NAME2IDX    = {"normal":0, "ivdd":1}

# ====== 前処理 ======
def _norm_name(s: str) -> str:
    return "".join(ch for ch in s.lower() if ch not in " _-")

def _resolve_keypoints(all_bodyparts, requested):
    norm2orig = {}
    for bp in all_bodyparts:
        k = _norm_name(bp)
        if k not in norm2orig:
            norm2orig[k] = bp
    resolved, missing = [], []
    for req in requested:
        k = _norm_name(req)
        if k in norm2orig:
            resolved.append(norm2orig[k])
        else:
            missing.append(req)
    if missing:
        raise ValueError(f"指定KPが見つかりません: {missing}\n利用可能: {all_bodyparts}")
    return resolved

def infer_label_from_filename(path: str) -> int:
    name = os.path.basename(path).lower()
    stem = os.path.splitext(name)[0]
    tokens = [t for t in re.split(r'[^a-z0-9]+', stem) if t]
    token_set = set(tokens)

    has_ivdd = any(t == "ivdd" or t.startswith("ivdd") for t in tokens)
    has_normal = "normal" in token_set

    if has_ivdd and not has_normal:
        return NAME2IDX["ivdd"]
    if has_normal and not has_ivdd:
        return NAME2IDX["normal"]

    if tokens and tokens[0] in NAME2IDX:
        return NAME2IDX[tokens[0]]

    parent_tokens = [t for t in re.split(r'[^a-z0-9]+', os.path.dirname(path).lower()) if t]
    p_has_ivdd   = any(t == "ivdd" or t.startswith("ivdd") for t in parent_tokens)
    p_has_normal = "normal" in set(parent_tokens)
    if p_has_ivdd and not p_has_normal:
        return NAME2IDX["ivdd"]
    if p_has_normal and not p_has_ivdd:
        return NAME2IDX["normal"]

    raise ValueError(f"ラベル不明: {name}")

def read_dlc_3kp_xy(csv_path: str, keypoints, use_likelihood=True, min_keep_likelihood=0.6):
    df = pd.read_csv(csv_path, header=[0,1,2], index_col=0)
    bodyparts = list({bp for (_, bp, _) in df.columns})
    use_kps = _resolve_keypoints(bodyparts, keypoints)

    cols = {}
    for bp in use_kps:
        cols[f"{bp}_x"] = df.xs((bp, "x"), level=[1,2], axis=1)
        cols[f"{bp}_y"] = df.xs((bp, "y"), level=[1,2], axis=1)
    X_df = pd.concat(cols.values(), axis=1)
    X_df.columns = list(cols.keys())

    if use_likelihood:
        for bp in use_kps:
            try:
                lcol = df.xs((bp, "likelihood"), level=[1,2], axis=1).values.flatten()
                low = lcol < min_keep_likelihood
                for c in [f"{bp}_x", f"{bp}_y"]:
                    v = X_df[c].values
                    v[low] = np.nan
                    X_df[c] = v
            except KeyError:
                pass

    X_df = X_df.interpolate(method="linear", limit_direction="both", axis=0)
    X_df = X_df.bfill().ffill().fillna(0.0)
    return X_df.values.astype(np.float32), use_kps  # (T,6)

def zscore_per_file(X: np.ndarray, eps=1e-6) -> np.ndarray:
    mu = X.mean(axis=0, keepdims=True)
    sd = X.std(axis=0, keepdims=True)
    return (X - mu) / (sd + eps)

def normalize_tailset_minmax(X: np.ndarray, used_kps: list[str], ref_name="tail set", eps=1e-6) -> np.ndarray:
    low = [s.lower() for s in used_kps]
    if ref_name.lower() not in low:
        raise ValueError(f"'{ref_name}' が used_kps にありません: {used_kps}")
    r = low.index(ref_name.lower())

    Xc = X.copy()
    cx, cy = X[:, 2*r], X[:, 2*r+1]
    for i in range(len(used_kps)):
        Xc[:, 2*i]   -= cx
        Xc[:, 2*i+1] -= cy

    mn = Xc.min(axis=0, keepdims=True)
    mx = Xc.max(axis=0, keepdims=True)
    return (Xc - mn) / (mx - mn + eps)

def make_windows(X: np.ndarray, seq_len: int, stride: int):
    n = X.shape[0]
    if n < seq_len:
        return np.empty((0, seq_len, X.shape[1]), dtype=X.dtype), []
    starts = list(range(0, n - seq_len + 1, stride))
    Xw = np.stack([X[s:s+seq_len] for s in starts], axis=0)
    return Xw, starts

def build_dataset(csv_paths, seq_len=SEQ_LEN, stride=STRIDE):
    Xs, ys, fids, starts_all = [], [], [], []
    used_kps_any = None
    for p in csv_paths:
        y = infer_label_from_filename(p)
        X_raw, used_kps = read_dlc_3kp_xy(p, KEYPOINTS, USE_LIKELIHOOD, MIN_KEEP_LIKELIHOOD)
        if used_kps_any is None:
            used_kps_any = used_kps

        if X_raw.shape[1] != DIMS:
            raise ValueError(f"{os.path.basename(p)}: 次元{X_raw.shape[1]} != 期待{DIMS}")

        if NORMALIZE_MODE == "zscore":
            Xn = zscore_per_file(X_raw)
        elif NORMALIZE_MODE == "tail_minmax":
            Xn = normalize_tailset_minmax(X_raw, used_kps)
        else:
            raise ValueError("NORMALIZE_MODE は 'zscore' or 'tail_minmax'")

        Xw, sidx = make_windows(Xn, seq_len, stride)
        if Xw.shape[0] == 0:
            print(f"[WARN] {os.path.basename(p)}: フレーム不足でスキップ")
            continue

        Xs.append(Xw)
        ys.append(np.full((Xw.shape[0],), y, dtype=np.int64))
        fids.extend([os.path.basename(p)]*Xw.shape[0])
        starts_all.extend(sidx)

    if not Xs:
        raise RuntimeError("評価用データが作れませんでした。eval_csv内のCSV名を確認してください。")
    X = np.concatenate(Xs, axis=0)
    y = np.concatenate(ys, axis=0)
    fids = np.array(fids)
    starts_all = np.array(starts_all)
    print(f"[INFO] 使用キーポイント: {used_kps_any}")
    return X, y, fids, starts_all

# ====== CSV読み込み ======
eval_csvs = sorted(glob.glob(EVAL_GLOB))
if not eval_csvs:
    raise FileNotFoundError(f"評価用CSVが見つかりません: {EVAL_GLOB}")

X, y, file_ids, starts = build_dataset(eval_csvs, SEQ_LEN, STRIDE)
print("X:", X.shape, "y:", y.shape, "files:", len(np.unique(file_ids)))

# ====== モデル読み込み ======
model = keras.models.load_model(CKPT_PATH, compile=False)
logits = model.predict(X, batch_size=64)

# 出力の形に応じて確率を計算（Dense(1) or Dense(2) どちらでもOKに）
if logits.ndim == 2 and logits.shape[1] == 1:
    p_ivdd = tf.math.sigmoid(logits).numpy().ravel()
elif logits.ndim == 2 and logits.shape[1] == 2:
    probs = tf.nn.softmax(logits, axis=1).numpy()
    # 推定上、「ivdd」をindex=1とする（学習時 0=normal,1=ivdd に合わせる）
    p_ivdd = probs[:, 1]
else:
    raise ValueError(f"未知のlogits形状: {logits.shape}")

p_normal = 1.0 - p_ivdd
y_pred = (p_ivdd >= 0.5).astype(int)

# ====== 保存パス ======
paths = {
    "win_csv":           os.path.join(OUT_DIR, f"window_predictions_{RUN_ID}.csv"),
    "roc_png":           os.path.join(OUT_DIR, f"roc_window_{RUN_ID}.png"),
    "cm_win_png":        os.path.join(OUT_DIR, f"cm_window_{RUN_ID}.png"),
    "file_pred_csv":     os.path.join(OUT_DIR, f"file_level_predictions_{RUN_ID}.csv"),
    "file_errors_csv":   os.path.join(OUT_DIR, f"file_level_errors_{RUN_ID}.csv"),
    "cm_file_major_png": os.path.join(OUT_DIR, f"cm_file_majority_{RUN_ID}.png"),
    "cm_file_mean_png":  os.path.join(OUT_DIR, f"cm_file_meanprob_{RUN_ID}.png"),
}

# ====== ウィンドウ単位レポート ======
print("\n[Window-level] classification_report:")
print(classification_report(y, y_pred, target_names=CLASS_NAMES, digits=4))
cm_win = confusion_matrix(y, y_pred, labels=[0,1])
print("[Window-level] confusion matrix:\n", cm_win)

df_win = (
    pd.DataFrame({
        "file": file_ids, "start": starts, "true": y, "pred": y_pred,
        "p_ivdd": p_ivdd, "p_normal": p_normal
    })
    .sort_values(["file","start"])
)
df_win.to_csv(paths["win_csv"], index=False, encoding="utf-8-sig")
print("[INFO] 保存:", paths["win_csv"])

# ====== ROC（ウィンドウ） ======
try:
    auc_val = roc_auc_score(y, p_ivdd)
    fpr, tpr, _ = roc_curve(y, p_ivdd, pos_label=1)
    plt.figure(figsize=(6,6))
    plt.plot(fpr, tpr, label=f"AUC={auc_val:.3f}")
    plt.plot([0,1],[0,1],"--")
    plt.title("ROC (window-level)"); plt.xlabel("FPR"); plt.ylabel("TPR"); plt.legend()
    plt.tight_layout(); plt.savefig(paths["roc_png"], dpi=150); plt.close()
    print("[INFO] 保存:", paths["roc_png"])
except Exception as e:
    print("[WARN] ROC失敗:", e)

# ====== 混同行列（ウィンドウ） ======
def plot_cm(cm, labels, title, out_png):
    cmn = cm.astype(np.float32) / np.maximum(cm.sum(axis=1, keepdims=True), 1)
    fig, ax = plt.subplots(figsize=(6,6))
    im = ax.imshow(cmn, interpolation="nearest", cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(len(labels)), yticks=np.arange(len(labels)),
           xticklabels=labels, yticklabels=labels, ylabel="True", xlabel="Pred", title=title)
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, int(cm[i,j]), ha="center",
                    color="white" if cmn[i,j] > cmn.max()/2 else "black", fontsize=12)
    plt.tight_layout(); plt.savefig(out_png, dpi=150); plt.close()

plot_cm(cm_win, CLASS_NAMES, "Confusion Matrix (window-level)", paths["cm_win_png"])
print("[INFO] 保存:", paths["cm_win_png"])

# ====== ファイル単位（多数決 / 平均確率） ======
major_pred = df_win.groupby("file")["pred"].agg(lambda a: np.bincount(a, minlength=2).argmax())
true_file  = df_win.groupby("file")["true"].first()  # 同一ファイルは同一ラベル想定

mean_prob = df_win.groupby("file")[["p_normal","p_ivdd"]].mean()
mean_pred = (mean_prob["p_ivdd"].values >= 0.5).astype(int)

print("\n[File-level Majority] report:")
print(classification_report(true_file.values, major_pred.values, target_names=CLASS_NAMES, digits=4))
print("[File-level Majority] CM:\n", confusion_matrix(true_file.values, major_pred.values, labels=[0,1]))

print("\n[File-level MeanProb] report:")
print(classification_report(true_file.values, mean_pred, target_names=CLASS_NAMES, digits=4))
print("[File-level MeanProb] CM:\n", confusion_matrix(true_file.values, mean_pred, labels=[0,1]))

df_file = pd.DataFrame({
    "file": mean_prob.index,
    "true": [CLASS_NAMES[t] for t in true_file.values],
    "pred_majority": [CLASS_NAMES[p] for p in major_pred.values],
    "pred_meanprob": [CLASS_NAMES[p] for p in mean_pred],
    "p_normal_mean": mean_prob["p_normal"].values,
    "p_ivdd_mean": mean_prob["p_ivdd"].values,
})
df_file.to_csv(paths["file_pred_csv"], index=False, encoding="utf-8-sig")
print("[INFO] 保存:", paths["file_pred_csv"])

# 誤分類のみ
df_err = df_file[df_file["true"] != df_file["pred_meanprob"]].copy()
df_err.to_csv(paths["file_errors_csv"], index=False, encoding="utf-8-sig")
print("[INFO] 保存:", paths["file_errors_csv"])

# ファイル単位 CM 画像
def plot_cm_filelevel(y_true, y_pred, labels, out_png, title):
    cm = confusion_matrix(y_true, y_pred, labels=[0,1])
    cmn = cm.astype(np.float32) / np.maximum(cm.sum(axis=1, keepdims=True), 1)
    fig, ax = plt.subplots(figsize=(6,6))
    im = ax.imshow(cmn, interpolation="nearest", cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(len(labels)), yticks=np.arange(len(labels)),
           xticklabels=labels, yticklabels=labels, ylabel="True", xlabel="Pred", title=title)
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, int(cm[i,j]), ha="center",
                    color="white" if cmn[i,j] > cmn.max()/2 else "black", fontsize=12)
    plt.tight_layout(); plt.savefig(out_png, dpi=150); plt.close()

y_true_f = true_file.values
y_pred_major = major_pred.values
y_pred_mean  = mean_pred

plot_cm_filelevel(y_true_f, y_pred_major, CLASS_NAMES, paths["cm_file_major_png"],
                  "Confusion Matrix (File-level, Majority Vote)")
plot_cm_filelevel(y_true_f, y_pred_mean, CLASS_NAMES, paths["cm_file_mean_png"],
                  "Confusion Matrix (File-level, Mean Probability)")
print("[INFO] すべて保存完了:", OUT_DIR)
