In [None]:
%pip install ultralytics opencv-python 

In [None]:
from ultralytics import YOLO
import os
import cv2
from matplotlib import pyplot as plt
import yaml

In [None]:
model = YOLO("./_output_/K-Fold-Training/fold_5_exp/weights/best.pt")

In [None]:
base_path = "./ppe-detection-project-dataset-c/versions/1"
test_img_dir = os.path.join(base_path, "test/images")
test_label_dir = os.path.join(base_path, "test/labels")

In [None]:
import random

all_images = [f for f in os.listdir(test_img_dir) if f.endswith((".jpg", ".png"))]
sample_images = random.sample(all_images, 50)


In [None]:
yaml_path = os.path.join("./", "config.yaml")


with open(yaml_path, "r") as f:
    config = yaml.safe_load(f)

class_names = config["names"]

print(class_names)


In [None]:
import os
import yaml
import cv2
import matplotlib.pyplot as plt

# --- อ่าน config.yaml ---
yaml_path = os.path.join("./", "config.yaml")
with open(yaml_path, "r") as f:
    config = yaml.safe_load(f)

class_names = config["names"]

# --- วน loop แสดงรูป ---
for img_name in sample_images:
    img_path = os.path.join(test_img_dir, img_name)
    label_path = os.path.join(test_label_dir, os.path.splitext(img_name)[0] + ".txt")
    
    # อ่านภาพ
    img = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # --- Ground Truth ---
    gt_img = img_rgb.copy()
    if os.path.exists(label_path):
        with open(label_path, "r") as f:
            for line in f.readlines():
                cls, x, y, w, h = map(float, line.strip().split())
                h_img, w_img, _ = gt_img.shape
                x1 = int((x - w/2) * w_img)
                y1 = int((y - h/2) * h_img)
                x2 = int((x + w/2) * w_img)
                y2 = int((y + h/2) * h_img)
                cls_name = class_names[int(cls)]
                # วาดกรอบเขียว
                cv2.rectangle(gt_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
                # ใส่ชื่อ class
                cv2.putText(gt_img, cls_name, (x1, y1-5),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
    
    # --- Prediction ---
    results = model(img_path, imgsz=640, conf=0.25)
    pred_img = img_rgb.copy()
    for r in results:
        boxes = r.boxes.xyxy.cpu().numpy()
        scores = r.boxes.conf.cpu().numpy()
        pred_classes = r.boxes.cls.cpu().numpy()
        for box, cls_idx, conf in zip(boxes, pred_classes, scores):
            x1, y1, x2, y2 = map(int, box)
            cls_name = r.names[int(cls_idx)]
            text = f"{cls_name} {conf:.2f}"
            # วาดกรอบแดง
            cv2.rectangle(pred_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
            # ใส่ชื่อ class + confidence
            cv2.putText(pred_img, text, (x1, y1-5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)

    # --- แสดงคู่ภาพ ---
    fig, axs = plt.subplots(1,2, figsize=(12,6))
    axs[0].imshow(gt_img)
    axs[0].set_title("Ground Truth")
    axs[0].axis("off")

    axs[1].imshow(pred_img)
    axs[1].set_title("Prediction")
    axs[1].axis("off")

    plt.show()


In [None]:
%pip install ultralytics torch 
%pip install YOLOv8-Explainer 
%pip install grad-cam==1.4.8


In [None]:
import torch
import torch.nn as nn
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
from ultralytics import YOLO


class YOLOv8GradCAM:
    def __init__(self, model, target_layer_idx=-1):
        self.model = model
        self.target_layer_idx = target_layer_idx
        self.gradients = None
        self.activations = None

        # Register hooks
        self.hook_handles = []
        self._register_hooks()

    def _register_hooks(self):
        def backward_hook(module, grad_input, grad_output):
            if grad_output[0] is not None:
                self.gradients = grad_output[0]

        def forward_hook(module, input, output):
            self.activations = output

        # Handle negative indexing
        if self.target_layer_idx < 0:
            layer_idx = len(self.model) + self.target_layer_idx
        else:
            layer_idx = self.target_layer_idx

        # Ensure layer index is valid
        layer_idx = max(0, min(layer_idx, len(self.model) - 1))

        # Find a suitable layer (skip problematic layer types)
        suitable_layer_found = False
        for offset in [0, -1, 1, -2, 2]:
            try_idx = layer_idx + offset
            if 0 <= try_idx < len(self.model):
                target_layer = self.model[try_idx]
                layer_type = type(target_layer).__name__

                if layer_type not in [
                    "Upsample",
                    "AdaptiveAvgPool2d",
                    "Flatten",
                    "Dropout",
                ]:
                    layer_idx = try_idx
                    suitable_layer_found = True
                    break

        if not suitable_layer_found:
            print(
                f"Warning: No suitable layer found near index {self.target_layer_idx}"
            )
            layer_idx = min(5, len(self.model) - 1)

        target_layer = self.model[layer_idx]
        handle1 = target_layer.register_backward_hook(backward_hook)
        handle2 = target_layer.register_forward_hook(forward_hook)
        self.hook_handles.extend([handle1, handle2])

        print(f"Registered hooks on layer {layer_idx}: {type(target_layer).__name__}")
        self.actual_layer_idx = layer_idx

    def forward(self, input_tensor):
        return self.model(input_tensor)

    def generate_cam(self, input_tensor, class_idx=None):
        input_tensor.requires_grad_(True)

        try:
            output = self.forward(input_tensor)

            if isinstance(output, (list, tuple)):
                output = output[0]

            if len(output.shape) > 2:
                pooled_output = torch.mean(
                    output.view(output.size(0), output.size(1), -1), dim=2
                )
                target_score = torch.mean(pooled_output)
            else:
                target_score = torch.mean(output)

        except Exception as e:
            print(f"Forward pass error: {e}")
            return np.random.rand(64, 64)

        try:
            self.model.zero_grad()
            target_score.backward(retain_graph=True)

            if self.gradients is None or self.activations is None:
                print("Warning: No gradients or activations captured")
                return np.random.rand(64, 64)

            gradients = self.gradients.detach()
            activations = self.activations.detach()

            if len(gradients.shape) == 4:
                gradients = gradients[0]
                activations = activations[0]

                weights = torch.mean(gradients.view(gradients.size(0), -1), dim=1)

                cam = torch.zeros(activations.shape[1:], dtype=torch.float32)
                for i, w in enumerate(weights):
                    if i < activations.size(0):
                        cam += w * activations[i]
            else:
                cam = (
                    torch.mean(activations[0], dim=0)
                    if len(activations.shape) > 2
                    else activations[0]
                )

            cam = F.relu(cam)
            if cam.max() > cam.min():
                cam = (cam - cam.min()) / (cam.max() - cam.min())
            else:
                cam = torch.zeros_like(cam)

            cam_numpy = cam.numpy()
            cam_numpy = 1.0 - cam_numpy  # Invert

            return cam_numpy

        except Exception as e:
            print(f"Backward pass error: {e}")
            import traceback

            traceback.print_exc()
            return np.random.rand(64, 64)

    def cleanup(self):
        for handle in self.hook_handles:
            handle.remove()
        self.hook_handles = []


def generate_t8_gradcam(
    image_path, model_path="best.pt", device="cpu", class_names=None
):
    """Generate only t_8 Grad-CAM visualization"""

    # Load model and image
    print(f"Loading model from: {model_path}")
    yolo = YOLO(model_path)
    device = torch.device(device)
    yolo.model.to(device)

    # Load and preprocess image
    print(f"Loading image from: {image_path}")
    image_bgr = cv2.imread(image_path)
    if image_bgr is None:
        raise ValueError(f"Could not load image from {image_path}")

    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    print(f"Original image shape: {image_rgb.shape}")

    # Prepare input tensor
    input_tensor = (
        torch.from_numpy(image_rgb.transpose(2, 0, 1)).float().unsqueeze(0) / 255.0
    )
    input_tensor = F.interpolate(
        input_tensor, size=(640, 640), mode="bilinear", align_corners=False
    )
    input_tensor = input_tensor.to(device)
    print(f"Input tensor shape: {input_tensor.shape}")

    # Get backbone layers
    try:
        backbone_layers = yolo.model.model[:11]
        print(f"Using backbone layers: {len(backbone_layers)}")
    except Exception as e:
        print(f"Error accessing backbone layers: {e}")
        backbone_layers = list(yolo.model.model.children())[:11]

    # Run initial prediction
    with torch.no_grad():
        results = yolo(image_path)
        if results and len(results[0].boxes) > 0:
            detected_classes = results[0].boxes.cls.cpu().numpy()
            confidences = results[0].boxes.conf.cpu().numpy()
            print(f"\nDetected {len(detected_classes)} objects:")
            for i, (cls_idx, conf) in enumerate(zip(detected_classes, confidences)):
                if class_names and int(cls_idx) < len(class_names):
                    class_name = class_names[int(cls_idx)]
                else:
                    class_name = f"Class_{int(cls_idx)}"
                print(f"  {i + 1}. {class_name} (conf: {conf:.3f})")

    # Generate t_8 Grad-CAM
    print("\n" + "=" * 60)
    print("Generating t_8 (Layer 8) Grad-CAM...")
    print("=" * 60)

    gradcam_model = YOLOv8GradCAM(backbone_layers, target_layer_idx=8)

    with torch.enable_grad():
        cam = gradcam_model.generate_cam(input_tensor)

    # Ensure CAM is valid
    if cam is None or cam.size == 0:
        print(f"Warning: Empty CAM")
        cam = np.ones((64, 64)) * 0.5

    # Resize CAM to original image size
    cam_resized = cv2.resize(cam, (image_rgb.shape[1], image_rgb.shape[0]))
    cam_normalized = np.uint8(255 * np.clip(cam_resized, 0, 1))

    # Create heatmap
    heatmap = cv2.applyColorMap(cam_normalized, cv2.COLORMAP_JET)
    heatmap_rgb = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)

    # Create overlay
    overlay = cv2.addWeighted(image_rgb, 0.6, heatmap_rgb, 0.4, 0)

    # Cleanup
    gradcam_model.cleanup()
    del gradcam_model

    # Display result
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))

    # Original image
    axes[0].imshow(image_rgb)
    axes[0].set_title("Original Image", fontsize=14, weight="bold")
    axes[0].axis("off")

    # Heatmap only
    axes[1].imshow(heatmap_rgb)
    axes[1].set_title("t_8 Heatmap\n(Semantic Features)", fontsize=14, weight="bold")
    axes[1].axis("off")

    # Overlay
    axes[2].imshow(overlay)
    axes[2].set_title(
        "t_8 Overlay\n(Layer 8 - High-Level Features)", fontsize=14, weight="bold"
    )
    axes[2].axis("off")

    plt.tight_layout()
    plt.suptitle("YOLOv8 Grad-CAM - Layer 8 (t_8) Visualization", fontsize=16, y=1.02)
    # plt.show()

    # Save result
    import os

    os.makedirs("t8_output", exist_ok=True)
    cv2.imwrite("t8_output/t8_overlay.jpg", cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR))
    cv2.imwrite(
        "t8_output/t8_heatmap.jpg", cv2.cvtColor(heatmap_rgb, cv2.COLOR_RGB2BGR)
    )
    print("\nSaved t_8 results to 't8_output/' folder")

    return {
        "cam": cam_resized,
        "overlay": overlay,
        "heatmap": heatmap_rgb,
        "original": image_rgb,
    }

In [None]:
# read best.pt model from _output_/K-Fold-Training/Fold-n/best.pt

model_path = "./best.pt"
image_path = "./ppe-detection-project-dataset-c/versions/1/test/images/0002.jpg"
device = "cpu"  # or "cuda" if GPU is available
generate_t8_gradcam(image_path, model_path, device, class_names)




In [None]:
import os
import yaml
import cv2
import matplotlib.pyplot as plt

# --- อ่าน config.yaml ---
yaml_path = os.path.join("./", "config.yaml")
with open(yaml_path, "r") as f:
    config = yaml.safe_load(f)

class_names = config["names"]

# --- วน loop แสดงรูป ---
for img_name in sample_images:
    img_path = os.path.join(test_img_dir, img_name)
    label_path = os.path.join(test_label_dir, os.path.splitext(img_name)[0] + ".txt")
    
    # อ่านภาพ
    img = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # --- Ground Truth ---
    gt_img = img_rgb.copy()
    if os.path.exists(label_path):
        with open(label_path, "r") as f:
            for line in f.readlines():
                cls, x, y, w, h = map(float, line.strip().split())
                h_img, w_img, _ = gt_img.shape
                x1 = int((x - w/2) * w_img)
                y1 = int((y - h/2) * h_img)
                x2 = int((x + w/2) * w_img)
                y2 = int((y + h/2) * h_img)
                cls_name = class_names[int(cls)]
                # วาดกรอบเขียว
                cv2.rectangle(gt_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
                # ใส่ชื่อ class
                cv2.putText(gt_img, cls_name, (x1, y1-5),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
    
    # --- Prediction ---
    results = model(img_path, imgsz=640, conf=0.25)
    pred_img = img_rgb.copy()
    for r in results:
        boxes = r.boxes.xyxy.cpu().numpy()
        scores = r.boxes.conf.cpu().numpy()
        pred_classes = r.boxes.cls.cpu().numpy()
        for box, cls_idx, conf in zip(boxes, pred_classes, scores):
            x1, y1, x2, y2 = map(int, box)
            cls_name = r.names[int(cls_idx)]
            text = f"{cls_name} {conf:.2f}"
            # วาดกรอบแดง
            cv2.rectangle(pred_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
            # ใส่ชื่อ class + confidence
            cv2.putText(pred_img, text, (x1, y1-5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)

    # --- Grad-CAM Overlay (Layer t_8) ---
    gradcam_result = generate_t8_gradcam(
        img_path, 
        model_path="best.pt", 
        device="cuda" if torch.cuda.is_available() else "cpu", 
        class_names=class_names
    )
    overlay_img = gradcam_result["overlay"]

    # --- แสดงภาพ 3 คอลัมน์ ---
    fig, axs = plt.subplots(1, 3, figsize=(18, 6))

    axs[0].imshow(gt_img)
    axs[0].set_title("Ground Truth", fontsize=14, weight="bold")
    axs[0].axis("off")

    axs[1].imshow(pred_img)
    axs[1].set_title("Prediction", fontsize=14, weight="bold")
    axs[1].axis("off")

    axs[2].imshow(overlay_img)
    axs[2].set_title("t_8 Overlay\n(Layer 8 - High-Level Features)", fontsize=14, weight="bold")
    axs[2].axis("off")

    plt.tight_layout()
    plt.show()
