In [1]:
%matplotlib inline

In [2]:
import torch
import cv2
import numpy as np
import os
import yaml
import logging
import math
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
import torchvision
from utils.yolov7_compat import attempt_load

# --- Importaciones de Grad-CAM ---
try:
    from pytorch_grad_cam import EigenCAM
    from pytorch_grad_cam.utils.image import show_cam_on_image
    GRAD_CAM_AVAILABLE = True
    print("✅ Librería 'grad-cam' encontrada e importada correctamente.")
except ImportError:
    GRAD_CAM_AVAILABLE = False
    print("⚠️ ADVERTENCIA: La librería 'grad-cam' no está instalada.")

# Configurar el logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print(f"PyTorch versión: {torch.__version__}")
print(f"CUDA disponible: {torch.cuda.is_available()}")


✅ Librería 'grad-cam' encontrada e importada correctamente.
PyTorch versión: 2.5.1+cu121
CUDA disponible: True


In [7]:
### --- ¡MODIFICA ESTAS TRES LÍNEAS SI ES NECESARIO! --- ###
MODEL_PATH = 'weights/best.pt'
IMAGE_PATH = 'data/validation_set/images/WIN_20250321_15_02_33_Pro_jpg.rf.016b2ed93bc4b1b0387d0faf99879d06.jpg'
YAML_PATH = 'configs/models_config.yaml' # Ruta a tu archivo .yaml que contiene los 'names' de las clases
OUTPUT_FOLDER = 'analysis_results' # <<< NUEVO: Carpeta para guardar las imágenes generadas

# --- Umbral de confianza ---
CONF_THRESHOLD = 0.25

# --- Capas objetivo para analizar con EigenCAM ---
cam_targets = {
    #"Backbone (Capa 3)": 3,
    "Backbone (Capa 24)": 24,
    "Backbone (Capa 50)": 50,
    "SPPCSPC (Capa 51)": 51,
    "Neck (Capa 75)": 75,
    "Neck (Capa 88)": 88,
    "Neck (Capa 101)": 101,
    "Head (Capa 104)": 104
}

# --- Configuración del dispositivo ---
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

Usando dispositivo: cuda:0


In [4]:
# =================================================================================
# TUS FUNCIONES PARA DIBUJAR CAJAS Y UTILIDADES (Adaptadas para Jupyter)
# =================================================================================
import time
import math

def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
                        labels=()):
    """Runs Non-Maximum Suppression (NMS) on inference results."""
    # ... (código completo de NMS de YOLOv7) ...
    # [Aquí iría el código completo de la función NMS]
    # Por brevedad, lo he resumido, pero en el código a continuación estará completo.
    # El código completo se asegura de que las predicciones crudas se conviertan en cajas finales.
    nc = prediction.shape[2] - 5  # number of classes
    xc = prediction[..., 4] > conf_thres  # candidates

    # Settings
    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
    max_det = 300  # maximum number of detections per image
    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
    time_limit = 10.0  # seconds to quit after
    redundant = True  # require redundant detections
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
    merge = False  # use merge-NMS

    t = time.time()
    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
    for xi, x in enumerate(prediction):  # image index, image inference
        x = x[xc[xi]]  # confidence

        # Cat apriori labels if autolabelling
        if labels and len(labels[xi]):
            l = labels[xi]
            v = torch.zeros((len(l), nc + 5), device=x.device)
            v[:, :4] = l[:, 1:5]  # box
            v[:, 4] = 1.0  # conf
            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
            x = torch.cat((x, v), 0)

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:  # best class only
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]

        # Filter by class
        if classes is not None:
            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]

        # Check shape
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
        elif n > max_nms:  # excess boxes
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
            # update boxes as weighted mean
            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
            weights = iou * scores[None]  # box weights
            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
            if redundant:
                i = i[iou.sum(1) > 1]  # require redundancy

        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            print(f'WARNING: NMS time limit {time_limit}s exceeded')
            break  # time limit exceeded

    return output

def xywh2xyxy(x):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y

# (El resto de tus funciones de dibujo permanecen igual)

def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
    """Redimensiona coordenadas del tamaño de img1 al tamaño de img0."""
    if ratio_pad is None:
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]
    coords[:, [0, 2]] -= pad[0]
    coords[:, [1, 3]] -= pad[1]
    coords[:, :4] /= gain
    coords[:, :4] = coords[:, :4].clamp(min=0)
    return coords

def draw_prediction_boxes(image, predictions, tensor_shape, class_names):
    img_with_boxes = image.copy()
    dets = predictions[0]
    if dets is not None and len(dets):
        dets_cpu = dets.clone()
        dets_cpu[:, :4] = scale_coords(tensor_shape, dets_cpu[:, :4], image.shape).round()
        for *xyxy, conf, cls in reversed(dets_cpu):
            class_id = int(cls)
            label = f'{class_names[class_id]} {conf:.2f}' if class_id < len(class_names) else f'ID Inesperado: {class_id}'
            color = (0, 255, 0)
            x1, y1, x2, y2 = map(int, xyxy)
            cv2.rectangle(img_with_boxes, (x1, y1), (x2, y2), color, 2)
            cv2.putText(img_with_boxes, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    return img_with_boxes

def draw_ground_truth_boxes(image, label_path, class_names):
    img_with_gt = image.copy()
    h, w, _ = image.shape
    try:
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                cls_id, x_center, y_center, width, height = int(parts[0]), *map(float, parts[1:])
                if cls_id < len(class_names):
                    label = class_names[cls_id]
                    x1, y1 = int((x_center - width / 2) * w), int((y_center - height / 2) * h)
                    x2, y2 = int((x_center + width / 2) * w), int((y_center + height / 2) * h)
                    cv2.rectangle(img_with_gt, (x1, y1), (x2, y2), (255, 0, 0), 2)
                    cv2.putText(img_with_gt, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
    except FileNotFoundError:
        print(f"⚠️ Advertencia: Archivo de etiqueta no encontrado en '{label_path}'")
    return img_with_gt

print("✅ Funciones de utilidad y dibujo de cajas definidas.")

✅ Funciones de utilidad y dibujo de cajas definidas.


In [5]:
# =================================================================================
# CLASES Y FUNCIONES DE VISUALIZACIÓN DE CAM
# =================================================================================

class YOLOv7ModelWrapper(torch.nn.Module):
    def __init__(self, model):
        super(YOLOv7ModelWrapper, self).__init__()
        self.model = model
    def forward(self, x):
        return self.model(x)[0]

def generate_eigen_cam_image(model, img_rgb, img_tensor, geometry_info, target_layer_identifier, device):
    if not GRAD_CAM_AVAILABLE: return None
    _, pad = geometry_info
    pad_w, pad_h = int(pad[0]), int(pad[1])
    tensor_h, tensor_w = img_tensor.shape[2:]
    all_modules = list(model.modules())
    if not (0 <= target_layer_identifier < len(all_modules)): return None
    target_module = all_modules[target_layer_identifier]
    wrapped_model = YOLOv7ModelWrapper(model)
    wrapped_model.to(device)
    with EigenCAM(model=wrapped_model, target_layers=[target_module]) as cam:
        grayscale_cam = cam(input_tensor=img_tensor.to(device))
    if grayscale_cam is None: return None
    grayscale_cam = grayscale_cam[0, :]
    h_unpad, w_unpad = tensor_h - 2 * pad_h, tensor_w - 2 * pad_w
    cam_cropped = grayscale_cam[pad_h : pad_h + h_unpad, pad_w : pad_w + w_unpad]
    if cam_cropped.size == 0: return None
    cam_resized_to_original = cv2.resize(cam_cropped, (img_rgb.shape[1], img_rgb.shape[0]))
    img_float = np.float32(img_rgb) / 255
    cam_image = show_cam_on_image(img_float, cam_resized_to_original, use_rgb=True)
    return (f"EigenCAM (Capa {target_layer_identifier})", cam_image)

def preprocess_image(img_bgr, img_size=640):
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    h, w, _ = img_rgb.shape
    r = img_size / max(h, w)
    new_w, new_h = int(w * r), int(h * r)
    img_resized = cv2.resize(img_rgb, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
    pad_w = (img_size - new_w) / 2
    pad_h = (img_size - new_h) / 2
    top, bottom, left, right = int(round(pad_h-0.1)), int(round(pad_h+0.1)), int(round(pad_w-0.1)), int(round(pad_w+0.1))
    img_padded = cv2.copyMakeBorder(img_resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))
    img_tensor = img_padded.transpose(2, 0, 1)
    img_tensor = np.ascontiguousarray(img_tensor)
    img_tensor = torch.from_numpy(img_tensor).float() / 255.0
    img_tensor = img_tensor.unsqueeze(0)
    geometry_info = (r, (pad_w, pad_h))
    return img_rgb, img_tensor, geometry_info

print("✅ Funciones de visualización de CAM definidas.")

✅ Funciones de visualización de CAM definidas.


In [8]:
# --- 1. Cargar configuración ---
try:
    with open(YAML_PATH, 'r', errors='ignore') as f:
        data = yaml.safe_load(f)
        class_names = data['yolov7-base']['class_names']
    print(f"✅ Nombres de clase cargados desde '{YAML_PATH}': {class_names}")
except Exception as e:
    print(f"❌ Error al cargar el archivo YAML '{YAML_PATH}': {e}")
    class_names = None

# --- 2. Cargar modelo ---
try:
    model = attempt_load(MODEL_PATH, map_location=device)
    model.eval()
    print(f"✅ Modelo cargado exitosamente desde '{MODEL_PATH}' usando 'attempt_load'.")
except Exception as e:
    print(f"❌ Error al cargar el modelo con 'attempt_load': {e}")
    model = None

# --- 3. Procesar y analizar la imagen ---
if model and class_names and os.path.exists(IMAGE_PATH):
    img_bgr = cv2.imread(IMAGE_PATH)
    img_rgb, img_tensor, geometry_info = preprocess_image(img_bgr)

    with torch.no_grad():
        pred_raw = model(img_tensor.to(device))[0]
        predictions_list = non_max_suppression(pred_raw, conf_thres=CONF_THRESHOLD, iou_thres=0.45)

    label_path = IMAGE_PATH.replace('/images/', '/labels/').rsplit('.', 1)[0] + '.txt'
    img_with_gt = draw_ground_truth_boxes(img_rgb, label_path, class_names)
    img_with_preds = draw_prediction_boxes(img_rgb, predictions_list, img_tensor.shape[2:], class_names)

    output_cam_images = []
    print("\nGenerando mapas de calor con EigenCAM...")
    for layer_name, layer_idx in cam_targets.items():
        try:
            print(f"  Procesando: {layer_name} (Índice {layer_idx})")
            result = generate_eigen_cam_image(model, img_rgb, img_tensor, geometry_info, layer_idx, device)
            if result:
                output_cam_images.append(result)
        except MemoryError:
            print(f"  🔴 MEMORY ERROR: La capa {layer_idx} es demasiado grande. Saltando.")
        except Exception as e:
            print(f"  🟡 ERROR INESPERADO en la capa {layer_idx}: {e}. Saltando.")

    # --- 4. EXPORTAR RESULTADOS A ARCHIVOS ---
    print(f"\n✅ Análisis completo. Exportando imágenes a la carpeta: '{OUTPUT_FOLDER}'...")

    # Crear la carpeta de salida si no existe
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)

    # Obtener un nombre base del archivo de imagen original
    base_filename = os.path.splitext(os.path.basename(IMAGE_PATH))[0]

    # Guardar imagen con Realidad del Terreno (GT)
    gt_path = os.path.join(OUTPUT_FOLDER, f"{base_filename}_ground_truth.jpg")
    cv2.imwrite(gt_path, cv2.cvtColor(img_with_gt, cv2.COLOR_RGB2BGR))
    print(f"  -> Guardado: {gt_path}")

    # Guardar imagen con Predicciones
    preds_path = os.path.join(OUTPUT_FOLDER, f"{base_filename}_predictions.jpg")
    cv2.imwrite(preds_path, cv2.cvtColor(img_with_preds, cv2.COLOR_RGB2BGR))
    print(f"  -> Guardado: {preds_path}")

    # Guardar cada uno de los mapas de calor
    for label, cam_img in output_cam_images:
        # Crear un nombre de archivo seguro a partir de la etiqueta
        safe_label = label.replace(' ', '_').replace('(', '').replace(')', '')
        cam_path = os.path.join(OUTPUT_FOLDER, f"{base_filename}_{safe_label}.jpg")
        cv2.imwrite(cam_path, cv2.cvtColor(cam_img, cv2.COLOR_RGB2BGR))
        print(f"  -> Guardado: {cam_path}")

    print("\n🎉 Exportación finalizada.")

else:
    if not model: print("El modelo no se pudo cargar.")
    if not class_names: print("Los nombres de las clases no se pudieron cargar.")
    if not os.path.exists(IMAGE_PATH): print(f"La imagen no se encontró en: {IMAGE_PATH}")

✅ Nombres de clase cargados desde 'configs/models_config.yaml': ['paja', 'suciedad']
Fusing layers... 


INFO:utils.torch_utils:Model Summary: 314 layers, 36487166 parameters, 6194944 gradients, 103.2 GFLOPS


RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse
✅ Modelo cargado exitosamente desde 'weights/best.pt' usando 'attempt_load'.

Generando mapas de calor con EigenCAM...
  Procesando: Backbone (Capa 24) (Índice 24)
  Procesando: Backbone (Capa 50) (Índice 50)
  Procesando: SPPCSPC (Capa 51) (Índice 51)
  Procesando: Neck (Capa 75) (Índice 75)
  Procesando: Neck (Capa 88) (Índice 88)
  Procesando: Neck (Capa 101) (Índice 101)
  Procesando: Head (Capa 104) (Índice 104)

✅ Análisis completo. Exportando imágenes a la carpeta: 'analysis_results'...
  -> Guardado: analysis_results\WIN_20250321_15_02_33_Pro_jpg.rf.016b2ed93bc4b1b0387d0faf99879d06_ground_truth.jpg
  -> Guardado: analysis_results\WIN_20250321_15_02_33_Pro_jpg.rf.016b2ed93bc4b1b0387d0faf99879d06_predictions.jpg
  -> Guardado: analysis_results\WIN_20250321_15_02_33_Pro_jpg.rf.016b2ed93bc4b1b0387d0faf99879d06_EigenCAM_Capa_24.jpg
  -> Guardado: analysis_results\WIN_20250321_15_02_33_Pro_jpg.