In [19]:
# -*- coding: utf-8 -*-
"""
Binary IVDD validation (ivdd vs normal) from DeepLabCut CSV (3-level header)
- trainで使ったのと同じ前処理/ネットワーク構造
- 5 keypoints × (x,y) = 10 dims
- 窓長 SEQ_LEN=30, STRIDE=5（要件通り）
- 学習済み .keras モデルを読み込んで、評価・混同行列・ROC・各種CSV出力
"""

import os
# 安定運用（GPU隠蔽/ログ控えめ/スレッド制御は任意）
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "-1")
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")

import glob
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import (
    classification_report, confusion_matrix,
    roc_auc_score, roc_curve, auc
)
import matplotlib.pyplot as plt
import os
import re




# ========= パラメータ =========
# 学習時と同じ値を使ってください
DATA_DIR   = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\ivdd\test"          # 評価用CSVを置くフォルダ（例：test用）
CSV_GLOB   = os.path.join(DATA_DIR, "*.csv")

# 学習時に保存したベストモデル
CKPT_DIR   = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\ivdd\train\checkpoints"
CKPT_PATH  = os.path.join(CKPT_DIR, "ivdd_lstm_final copy.keras")    # ivdd_train.ipynb と同じパスに合わせる

# 5点（あなたが指定した名称）
KEYPOINTS = [
    "left back paw",
    "right back paw",
    "left front paw",
    "right front paw",
    "tail set",
]

USE_LIKELIHOOD       = True
MIN_KEEP_LIKELIHOOD  = 0.6

SEQ_LEN  = 60
STRIDE   = 30
DIMS     = 10    # 5点×(x,y)
N_HIDDEN = 30    # 学習時と一致
CLASS_NAMES  = ["ivdd", "normal"]
CLASS_TO_IDX = {c:i for i,c in enumerate(CLASS_NAMES)}
N_CLASSES    = 2

# 可視化保存先
OUT_DIR = os.path.join(DATA_DIR, "eval_outputs")
os.makedirs(OUT_DIR, exist_ok=True)

tf.random.set_seed(42)
np.random.seed(42)

# ========= ラベル推定（ファイル名から） =========
def infer_label_from_filename(path: str) -> int:
    """
    ファイル名からラベルを推定する。
    1) 拡張子を除いたファイル名を非英数字で分割 → トークンに 'ivdd' / 'normal' が「厳密一致」したら採用
    2) 1)で一意に決まらなければ、先頭トークンが 'ivdd' / 'normal' なら採用
    3) まだ決まらなければ、親ディレクトリ名のトークンを参照
    """
    name = os.path.basename(path).lower()
    stem = os.path.splitext(name)[0]

    # 非英数字で分割してトークン化（例: "normal_10dlc_resnet50_ivddoct30" -> ["normal","10dlc","resnet50","ivddoct30"]）
    tokens = [t for t in re.split(r'[^a-z0-9]+', stem) if t]
    token_set = set(tokens)

    has_ivdd_exact   = ('ivdd' in token_set)
    has_normal_exact = ('normal' in token_set)

    if has_ivdd_exact and not has_normal_exact:
        return CLASS_TO_IDX["ivdd"]
    if has_normal_exact and not has_ivdd_exact:
        return CLASS_TO_IDX["normal"]

    # 先頭トークン優先のフォールバック
    if tokens:
        if tokens[0] in CLASS_TO_IDX:
            return CLASS_TO_IDX[tokens[0]]

    # 親ディレクトリにも 'ivdd' / 'normal' が入っていればそれを採用（片方のみヒット時）
    parent_tokens = [t for t in re.split(r'[^a-z0-9]+', os.path.dirname(path).lower()) if t]
    pset = set(parent_tokens)
    p_has_ivdd   = ('ivdd' in pset)
    p_has_normal = ('normal' in pset)
    if p_has_ivdd and not p_has_normal:
        return CLASS_TO_IDX["ivdd"]
    if p_has_normal and not p_has_ivdd:
        return CLASS_TO_IDX["normal"]

    # それでも決まらない場合は例外
    raise ValueError(
        f"ラベルを特定できません: {name} "
        f"(推奨: ファイル名の先頭を 'ivdd_' か 'normal_' にしてください)"
    )

# ========= 前処理（train と同じ流儀） =========
def _norm_name(s: str) -> str:
    return "".join(ch for ch in s.lower() if ch not in " _-")

def _resolve_keypoints(all_bodyparts, requested):
    norm2orig = {}
    for bp in all_bodyparts:
        k = _norm_name(bp)
        if k not in norm2orig:
            norm2orig[k] = bp
    resolved, missing = [], []
    for req in requested:
        k = _norm_name(req)
        if k in norm2orig:
            resolved.append(norm2orig[k])
        else:
            missing.append(req)
    if missing:
        raise ValueError(f"指定キーポイントがCSVで見つかりません: {missing}\n利用可能: {all_bodyparts}")
    return resolved

def read_dlc_5kp_xy(csv_path: str,
                    keypoints,
                    use_likelihood=True,
                    min_keep_likelihood=0.6) -> tuple[np.ndarray, list[str]]:
    """
    DLCの3段ヘッダCSVを読み込み、指定5点の (x,y) だけを抽出して (N,10) を返す。
    低likelihoodはNaN→補間（線形/前方/後方）→0埋め。
    """
    df = pd.read_csv(csv_path, header=[0,1,2], index_col=0)
    bodyparts = list({bp for (_, bp, _) in df.columns})
    use_kps = _resolve_keypoints(bodyparts, keypoints)

    # (x,y) 抽出
    cols = {}
    for bp in use_kps:
        cols[f"{bp}_x"] = df.xs((bp, "x"), level=[1,2], axis=1)
        cols[f"{bp}_y"] = df.xs((bp, "y"), level=[1,2], axis=1)
    X_df = pd.concat(cols.values(), axis=1)
    X_df.columns = list(cols.keys())

    # likelihood によるマスク
    if use_likelihood:
        for bp in use_kps:
            try:
                lcol = df.xs((bp, "likelihood"), level=[1,2], axis=1).values.flatten()
                low = lcol < min_keep_likelihood
                for c in [f"{bp}_x", f"{bp}_y"]:
                    vals = X_df[c].values
                    vals[low] = np.nan
                    X_df[c] = vals
            except KeyError:
                pass

    # 線形補間 → 前方/後方補完 → 0埋め（FutureWarning対策: bfill()/ffill() を使用）
    X_df = X_df.interpolate(method="linear", limit_direction="both", axis=0)
    X_df = X_df.bfill().ffill().fillna(0.0)

    X = X_df.values.astype(np.float32)  # (T,10)
    return X, use_kps

def zscore_per_file(X: np.ndarray, eps: float=1e-6) -> np.ndarray:
    mu = X.mean(axis=0, keepdims=True)
    sd = X.std(axis=0, keepdims=True)
    return (X - mu) / (sd + eps)

def make_windows(X: np.ndarray, seq_len: int, stride: int) -> np.ndarray:
    n = X.shape[0]
    if n < seq_len:
        return np.empty((0, seq_len, X.shape[1]), dtype=X.dtype)
    starts = range(0, n - seq_len + 1, stride)
    return np.stack([X[s:s+seq_len] for s in starts], axis=0)

def build_dataset(csv_paths, seq_len=SEQ_LEN, stride=STRIDE):
    X_list, y_list, file_ids, starts = [], [], [], []
    used_kps_any = None
    for p in csv_paths:
        y_lab = infer_label_from_filename(p)

        X_raw, used_kps = read_dlc_5kp_xy(
            p,
            keypoints=KEYPOINTS,
            use_likelihood=USE_LIKELIHOOD,
            min_keep_likelihood=MIN_KEEP_LIKELIHOOD
        )
        if used_kps_any is None:
            used_kps_any = used_kps

        if X_raw.shape[1] != DIMS:
            raise ValueError(f"{os.path.basename(p)}: 取り出し次元 {X_raw.shape[1]} != 期待 {DIMS}")

        X_raw = zscore_per_file(X_raw)
        X_win = make_windows(X_raw, seq_len, stride)  # (M, T, D)
        if X_win.shape[0] == 0:
            print(f"[WARN] {os.path.basename(p)}: フレーム不足（{seq_len}未満）でスキップ")
            continue

        X_list.append(X_win)
        y_list.append(np.full((X_win.shape[0],), y_lab, dtype=np.int64))
        file_ids += [os.path.basename(p)]*X_win.shape[0]
        starts  += list(range(0, X_raw.shape[0] - seq_len + 1, stride))

    if not X_list:
        raise RuntimeError("評価用データが作れませんでした。CSV名に 'ivdd' または 'normal' を含めてください。")

    X = np.concatenate(X_list, axis=0)
    y = np.concatenate(y_list, axis=0)
    file_ids = np.array(file_ids)
    starts = np.array(starts)
    print(f"[INFO] 使用キーポイント: {used_kps_any}")
    return X, y, file_ids, starts

# ========= モデル定義（学習時と同一） =========
class LSTM_RNN(keras.Model):
    def __init__(self, n_input, n_hidden, n_classes):
        super().__init__()
        self.input_dense = keras.layers.Dense(n_hidden, activation='relu')
        self.time_dist   = keras.layers.TimeDistributed(self.input_dense)
        self.lstm1 = keras.layers.LSTM(n_hidden, return_sequences=True)
        self.lstm2 = keras.layers.LSTM(n_hidden)
        self.out   = keras.layers.Dense(n_classes)  # logits

    def call(self, x, training=False):
        x = self.time_dist(x)
        x = self.lstm1(x, training=training)
        x = self.lstm2(x, training=training)
        x = self.out(x)
        return x

class LSTMWithL2(LSTM_RNN):
    def __init__(self, n_input, n_hidden, n_classes, l2_lambda=1e-4):
        super().__init__(n_input, n_hidden, n_classes)
        self.l2_lambda = l2_lambda

# ========= データ読み込み =========
csv_files = sorted(glob.glob(CSV_GLOB))
if not csv_files:
    raise FileNotFoundError(f"評価用CSVが見つかりません: {CSV_GLOB}")

X, y, file_ids, starts = build_dataset(csv_files, SEQ_LEN, STRIDE)
print("X:", X.shape, "y:", y.shape, "files:", len(np.unique(file_ids)))

# ========= モデル読込 =========
def load_trained_model():
    # まずは .keras の「フルモデル」読み込み（最も簡単）
    try:
        m = keras.models.load_model(
            CKPT_PATH,
            custom_objects={"LSTMWithL2": LSTMWithL2, "LSTM_RNN": LSTM_RNN},
            compile=False
        )
        print(f"[INFO] Loaded full model: {CKPT_PATH}")
        return m
    except Exception as e:
        print(f"[WARN] load_model 失敗: {e}")
        # フォールバック: 同構造モデルを作ってから load_weights（もしweightsのみ保存していた場合）
        m = LSTMWithL2(n_input=DIMS, n_hidden=N_HIDDEN, n_classes=N_CLASSES, l2_lambda=1e-4)
        _ = m(tf.zeros([1, SEQ_LEN, DIMS]))  # build
        try:
            m.load_weights(CKPT_PATH)
            print(f"[INFO] Loaded weights into fresh model from: {CKPT_PATH}")
            return m
        except Exception as e2:
            raise RuntimeError(f"モデルの読み込みに失敗しました。: {e2}")

model = load_trained_model()

# ========= 推論（ウィンドウ単位） =========
logits = model.predict(X, batch_size=64)
# Kerasの出力が logits のはず（Dense(n_classes)）
probs = tf.nn.softmax(logits, axis=1).numpy()
y_pred = probs.argmax(axis=1)

# ========= レポート（ウィンドウ単位） =========
print("\n[Window-level] classification_report:")
print(classification_report(y, y_pred, target_names=CLASS_NAMES, digits=4))
cm = confusion_matrix(y, y_pred)
print("[Window-level] confusion matrix:\n", cm)

# ========= ファイル単位（多数決/平均確率） =========
df_win = pd.DataFrame({
    "file": file_ids,
    "start": starts,
    "true": y,
    "pred": y_pred,
    "p_ivdd": probs[:, CLASS_TO_IDX["ivdd"]],
    "p_normal": probs[:, CLASS_TO_IDX["normal"]],
})
df_win.sort_values(["file","start"], inplace=True)
df_win.to_csv(os.path.join(OUT_DIR, "window_predictions.csv"), index=False, encoding="utf-8-sig")
print(f"[INFO] 保存: window_predictions.csv")

# 多数決
major_pred = df_win.groupby("file")["pred"].agg(lambda a: np.bincount(a).argmax())
true_file  = df_win.groupby("file")["true"].first()  # 同一ファイルは同一ラベルの想定
print("\n[File-level Majority Vote] classification_report:")
print(classification_report(true_file.values, major_pred.values, target_names=CLASS_NAMES, digits=4))
print("[File-level Majority Vote] confusion matrix:\n", confusion_matrix(true_file.values, major_pred.values))

# 平均確率→argmax
mean_prob = df_win.groupby("file")[["p_ivdd", "p_normal"]].mean()
mean_pred = mean_prob.values.argmax(axis=1)
print("\n[File-level Mean Probability] classification_report:")
print(classification_report(true_file.values, mean_pred, target_names=CLASS_NAMES, digits=4))
print("[File-level Mean Probability] confusion matrix:\n", confusion_matrix(true_file.values, mean_pred))

# ========= ROC（ウィンドウ単位） =========
try:
    y_score_ivdd = probs[:, CLASS_TO_IDX["ivdd"]]
    auc_val = roc_auc_score(y, y_score_ivdd)
    fpr, tpr, _ = roc_curve(y, y_score_ivdd, pos_label=CLASS_TO_IDX["ivdd"])
    plt.figure(figsize=(6,6))
    plt.plot(fpr, tpr, label=f"AUC = {auc_val:.3f}")
    plt.plot([0,1], [0,1], linestyle="--")
    plt.title("ROC (window-level)")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, "roc_window.png"), dpi=150)
    plt.close()
    print("[INFO] 保存: roc_window.png")
except Exception as e:
    print(f"[WARN] ROC プロット失敗: {e}")

# ========= 混同行列プロット（ウィンドウ単位） =========
def plot_cm(cm, labels, title, path_png):
    cmn = cm.astype(np.float32) / (cm.sum() + 1e-6)
    fig, ax = plt.subplots(figsize=(6,6))
    im = ax.imshow(cmn, interpolation='nearest', cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(len(labels)), yticks=np.arange(len(labels)),
           xticklabels=labels, yticklabels=labels,
           ylabel='True label', xlabel='Predicted label',
           title=title)
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
    # 枠内の数字は生のカウントで
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, int(cm[i, j]), ha="center",
                    color="white" if cmn[i, j] > cmn.max()/2 else "black", fontsize=12)
    plt.tight_layout()
    plt.savefig(path_png, dpi=150)
    plt.close()

plot_cm(cm, CLASS_NAMES, "Confusion Matrix (window-level)",
        os.path.join(OUT_DIR, "cm_window.png"))
print("[INFO] 保存: cm_window.png")

# ========= 予測CSV（ファイル集約） =========
df_file = pd.DataFrame({
    "file": mean_prob.index,
    "true": [CLASS_NAMES[t] for t in true_file.values],
    "pred_majority": [CLASS_NAMES[p] for p in major_pred.values],
    "pred_meanprob": [CLASS_NAMES[p] for p in mean_pred],
    "p_ivdd_mean": mean_prob["p_ivdd"].values,
    "p_normal_mean": mean_prob["p_normal"].values,
})
df_file.to_csv(os.path.join(OUT_DIR, "file_level_predictions.csv"), index=False, encoding="utf-8-sig")
print("[INFO] 保存: file_level_predictions.csv")

print("\n[Done] 検証が完了しました。出力は:", OUT_DIR)


[WARN] ivdd1_case1DLC_resnet50_IvddOct30shuffle1_100000.csv: フレーム不足（60未満）でスキップ
[INFO] 使用キーポイント: ['left back paw', 'right back paw', 'left front paw', 'right front paw', 'tail set']
X: (288, 60, 10) y: (288,) files: 42
[WARN] load_model 失敗: <class '__main__.LSTMWithL2'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.

config={'module': None, 'class_name': 'LSTMWithL2', 'config': {'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}}, 'registered_name': 'LSTMWithL2', 'build_config': {'input_shape': [None, 60, 10]}, 'compile_config': None}.

Exception encountered: Unable to revive model from config. When overriding the `get_config()` method, make sure that the returned config contains all items used as arguments in the  constructor to <class '__