# Grad-Cam  可视化与可解释

In [3]:
# model_path = "/workspace/models/runs_yolov11_pose/outputs_v4/yolo11m-pose/yolo11m-pose_swd_pose_split_0.7_0.2_0.1_bs16_sz640--x/weights/best.pt"
# model_path = "/workspace/models/runs_yolov11_pose/outputs_v4/yolo11n-pose/yolo11n-pose_swd_pose_split_0.7_0.2_0.1_bs32_sz640--x/weights/best.pt"
model_path = "/workspace/models/runs_yolov11_pose/outputs_v4/yolo11s-pose/yolo11s-pose_swd_pose_split_0.6_0.2_0.2_bs8_sz640--x/weights/best.pt"
# img_path = "/workspace/models/runs_yolov11_pose/cropped_objects/swd"
img_path = "/workspace/models/runs_yolov11_pose/datasets_v1/swd_pose_split_0.7_0.2_0.1/images/train"
out_path = "/workspace/models/runs_yolov11_pose/output_grad_cam"

OUT_DIR = out_path            # 输出目录
MODEL = model_path  # 你的 yolov11-pose 权重
IMGS = img_path          # 文件/通配/目录

## 批量执行

In [4]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os, glob, cv2, torch, numpy as np
from pathlib import Path
from ultralytics import YOLO

# grad-cam 家族
from pytorch_grad_cam import GradCAM, GradCAMPlusPlus, EigenCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget

# ==================== 配置参数 ====================
IMG_SIZE = 512                        # 姿态估计常用尺寸
DO_METHODS = ("gradcam", "gradcam++", "eigen")  # CAM 类型
TARGET_KEYPOINT = None                # None=所有关键点平均; int=特定关键点id
SAVE_ORIG_OVERLAY = False             # 在原始分辨率上叠加热力图
ALPHA = 0.7                           # 叠加透明度
# =================================================

# ---------- 关键点目标类 ----------
class KeypointOutputTarget:
    """针对特定关键点的目标类"""
    def __init__(self, keypoint_idx=None):
        self.keypoint_idx = keypoint_idx  # None表示所有关键点平均
    
    def __call__(self, model_output):
        if self.keypoint_idx is None:
            # 返回所有关键点的平均激活
            return model_output.mean(dim=2).mean(dim=1)  # [batch, num_keypoints] -> [batch]
        else:
            # 返回特定关键点的激活
            return model_output[:, self.keypoint_idx].mean(dim=1)  # [batch]

# ---------- 预处理函数 ----------
def build_preprocess(imgsz: int):
    """姿态估计的预处理"""
    def letterbox_square(img_bgr, new_size, color=(114,114,114)):
        h, w = img_bgr.shape[:2]
        scale = min(new_size / h, new_size / w)
        nh, nw = int(round(h * scale)), int(round(w * scale))
        resized = cv2.resize(img_bgr, (nw, nh), interpolation=cv2.INTER_LINEAR)
        top = (new_size - nh) // 2
        bottom = new_size - nh - top
        left = (new_size - nw) // 2
        right = new_size - nw - left
        padded = cv2.copyMakeBorder(resized, top, bottom, left, right,
                                    borderType=cv2.BORDER_CONSTANT, value=color)
        meta = dict(mode="letterbox", scale=scale, pad=(top, bottom, left, right), resized_hw=(nh, nw))
        return padded, meta

    def _preprocess(img_bgr):
        sq_bgr, meta = letterbox_square(img_bgr, imgsz)
        rgb01 = cv2.cvtColor(sq_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
        tensor = torch.from_numpy(rgb01).permute(2,0,1).unsqueeze(0)  # [1,3,H,W]
        return rgb01, tensor, meta
    
    return _preprocess

# ---------- 找到最后一个卷积层 ----------
def find_last_conv_layer(module: torch.nn.Module):
    last = None
    for m in module.modules():
        if isinstance(m, torch.nn.Conv2d):
            last = m
    if last is None:
        raise RuntimeError("未在模型中找到 Conv2d 层")
    return last

# ---------- 姿态估计输出适配器 ----------
class PoseOutputAdapter(torch.nn.Module):
    """将姿态估计输出转换为适合Grad-CAM的格式"""
    def __init__(self, base: torch.nn.Module):
        super().__init__()
        self.base = base
    
    def forward(self, x):
        # 获取模型输出
        results = self.base(x)
        
        # 提取关键点热力图（通常是最后一个卷积层的输出）
        if hasattr(results, 'keypoint_heatmaps'):
            return results.keypoint_heatmaps
        
        # 如果模型输出是Results对象，尝试提取关键点
        if hasattr(results, 'keypoints') and results.keypoints is not None:
            return results.keypoints
        
        # 对于YOLOv8姿态估计，通常可以直接返回输出
        return results

# ---------- CAM回投影到原图 ----------
def cam_to_original(grayscale_cam, orig_shape, meta):
    H0, W0 = orig_shape[:2]
    if meta.get("mode") != "letterbox":
        cam_resized = cv2.resize(grayscale_cam, (W0, H0), interpolation=cv2.INTER_LINEAR)
        return cam_resized
    
    top, bottom, left, right = meta["pad"]
    nh, nw = meta["resized_hw"]
    size = top + nh + bottom
    cam_sq = cv2.resize(grayscale_cam, (size, size), interpolation=cv2.INTER_LINEAR)
    cam_cropped = cam_sq[top: top+nh, left: left+nw]
    cam_orig = cv2.resize(cam_cropped, (W0, H0), interpolation=cv2.INTER_LINEAR)
    return cam_orig

# ---------- 处理单个模型 ----------
def process_model(model_path, img_paths, output_dir, device):
    """处理单个模型的所有图片"""
    print(f"正在处理模型: {model_path}")
    
    # 加载YOLO姿态估计模型
    wrapper = YOLO(model_path)
    base: torch.nn.Module = wrapper.model.eval().to(device)
    net = PoseOutputAdapter(base).eval().to(device)
    target_layer = find_last_conv_layer(base)

    # 预处理
    preprocess = build_preprocess(IMG_SIZE)

    # CAM方法映射
    method_map = {
        "gradcam": GradCAM,
        "gradcam++": GradCAMPlusPlus,
        "eigen": EigenCAM
    }
    methods = [m for m in DO_METHODS if m in method_map]

    # 为每个方法准备子目录
    method_dirs = {}
    model_name = Path(model_path).stem
    model_output_dir = os.path.join(output_dir, model_name)
    os.makedirs(model_output_dir, exist_ok=True)
    
    for method in methods:
        method_dir = os.path.join(model_output_dir, method)
        os.makedirs(method_dir, exist_ok=True)
        method_dirs[method] = method_dir
        
        if SAVE_ORIG_OVERLAY:
            orig_dir = os.path.join(method_dir, "original_overlay")
            os.makedirs(orig_dir, exist_ok=True)
            method_dirs[f"{method}_orig"] = orig_dir

    # 处理每张图片
    for img_path in img_paths:
        img_bgr = cv2.imread(img_path)
        if img_bgr is None:
            print(f"[跳过] 无法读取 {img_path}")
            continue
        
        rgb01_square, x, meta = preprocess(img_bgr)
        x = x.to(device)

        # 启用梯度
        torch.set_grad_enabled(True)
        net.zero_grad(set_to_none=True)
        for param in net.parameters():
            param.requires_grad_(True)
        x.requires_grad_(True)

        # 创建目标
        cam_targets = [KeypointOutputTarget(TARGET_KEYPOINT)]

        # 生成CAM
        for mname in methods:
            CAM = method_map[mname]
            with CAM(model=net, target_layers=[target_layer]) as cam:
                grayscale_cam = cam(input_tensor=x, targets=cam_targets)[0]

            # 方形图上叠加
            vis_square = show_cam_on_image(rgb01_square.copy(), grayscale_cam, use_rgb=True, image_weight=ALPHA)
            stem = Path(img_path).stem
            
            # 保存到对应方法的子目录
            square_output_path = os.path.join(method_dirs[mname], f"{stem}_square.jpg")
            cv2.imwrite(square_output_path, cv2.cvtColor(vis_square, cv2.COLOR_RGB2BGR))

            # 原图上叠加
            if SAVE_ORIG_OVERLAY:
                cam_orig = cam_to_original(grayscale_cam, img_bgr.shape, meta)
                cam_orig_rgb = show_cam_on_image(
                    cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB).astype(np.float32)/255.0,
                    cam_orig, use_rgb=True, image_weight=ALPHA)
                orig_output_path = os.path.join(method_dirs[f"{mname}_orig"], f"{stem}_orig.jpg")
                cv2.imwrite(orig_output_path, cv2.cvtColor(cam_orig_rgb, cv2.COLOR_RGB2BGR))

        print(f"[OK] {Path(img_path).name} -> {model_output_dir}")

# ---------- 主流程 ----------
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    os.makedirs(OUT_DIR, exist_ok=True)

    # 收集模型路径
    if os.path.isdir(MODEL):
        model_paths = sorted(glob.glob(os.path.join(MODEL, "*.pt")))
    elif any(ch in MODEL for ch in "*?[]"):
        model_paths = sorted(glob.glob(MODEL))
    else:
        model_paths = [MODEL]

    assert model_paths, f"未找到模型文件：{MODEL}"

    # 收集图片路径
    if os.path.isdir(IMGS):
        img_paths = sorted(sum([glob.glob(os.path.join(IMGS, p)) 
                               for p in ("*.jpg", "*.jpeg", "*.png", "*.bmp", "*.webp")], []))
    elif any(ch in IMGS for ch in "*?[]"):
        img_paths = sorted(glob.glob(IMGS))
    else:
        img_paths = [IMGS]

    assert img_paths, f"未找到图片：{IMGS}"

    print(f"找到 {len(model_paths)} 个模型")
    print(f"找到 {len(img_paths)} 张图片")
    print(f"使用的CAM方法: {', '.join(DO_METHODS)}")

    # 处理每个模型
    for i, model_path in enumerate(model_paths, 1):
        print(f"\n处理进度: {i}/{len(model_paths)}")
        process_model(model_path, img_paths, OUT_DIR, device)

    print(f"\n全部完成。结果保存在: {OUT_DIR}")

if __name__ == "__main__":
    main()

找到 1 个模型
找到 607 张图片
使用的CAM方法: gradcam, gradcam++, eigen

处理进度: 1/1
正在处理模型: /workspace/models/runs_yolov11_pose/outputs_v4/yolo11s-pose/yolo11s-pose_swd_pose_split_0.6_0.2_0.2_bs8_sz640--x/weights/best.pt
[OK] 0607_0731_700_obj6_swd_uuid_35af4292-800d-4e97-91e4-eca5e5229ec9.jpg -> /workspace/models/runs_yolov11_pose/output_grad_cam/best


KeyboardInterrupt: 