In [1]:
from ultralytics import YOLO
import cv2
import numpy as np
import matplotlib.pyplot as plt

import gc
import torch

In [2]:
BATCH_SIZE = 1
C = 3 # number of channels of the input image
H = 640 # heigh of the input image
W = 640 # width of the input image

# mAP CARACTERIZATION

# Base model

In [4]:
model = YOLO('../../weights/yolov8lsalmons.pt', task='segment')
metrics = model.val(data='../../datasets/salmons/salmons.yaml', task='segment', verbose=False,conf=0.4,device='cuda')
print('base model box mAP50: ', metrics.box.map50)
print('base model box mAP50-95: ', metrics.box.map)

Ultralytics 8.3.55 🚀 Python-3.10.12 torch-2.3.0 CUDA:0 (Orin, 62841MiB)
YOLOv8l-seg summary (fused): 295 layers, 45,912,659 parameters, 0 gradients, 220.1 GFLOPs


[34m[1mval: [0mScanning /home/juam/Documents/SalmonsTRT/datasets/salmons/labels/val.cache... 52 images, 34 backgrounds, 0 corrupt: 100%|██████████| 86/86 [00:00<?, ?it/s]
  return F.conv2d(input, weight, bias, self.stride,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:07<00:00,  1.28s/it]


                   all         86        496      0.714      0.514      0.636       0.46      0.717      0.516       0.64      0.392
Speed: 0.8ms preprocess, 66.8ms inference, 0.0ms loss, 9.2ms postprocess per image
Results saved to [1m/home/juam/Documents/SalmonsTRT/runs/segment/val43[0m
base model box mAP50:  0.6362255099560289
base model box mAP50-95:  0.46046713532610867


# TRT fp32

In [None]:
model = YOLO('../../weights/yolov8lsalmons_fp32_bs32.engine', task='segment')
metrics = model.val(data='../../datasets/salmons/salmons.yaml', task='segment', verbose=False,conf=0.4,device='cuda')
print('base model box mAP50: ', metrics.box.map50)
print('base model box mAP50-95: ', metrics.box.map)

# TRT fp16

In [None]:
model = YOLO('../../weights/yolov8lsalmons_fp16_bs32.engine', task='segment')
metrics = model.val(data='../../datasets/salmons/salmons.yaml', task='segment', verbose=False,conf=0.4,device='cuda')
print('base model box mAP50: ', metrics.box.map50)
print('base model box mAP50-95: ', metrics.box.map)

# TRT int8

In [None]:
model = YOLO('../../weights/yolov8lsalmons_int8_bs32.engine', task='segment')
metrics = model.val(data='../../datasets/salmons/salmons.yaml', task='segment', verbose=False,conf=0.4,device='cuda')
print('base model box mAP50: ', metrics.box.map50)
print('base model box mAP50-95: ', metrics.box.map)

# PIXEL TO PIXEL CARACTERIZATION

## Compare output segmentations masks

In [None]:
base_model = YOLO('../../weights/yolov8lsalmons.pt', task='segment')
results_base = base_model.predict("../../datasets/salmons/images/val/Img2.jpeg",show_boxes=False,save=True)

In [None]:
trt_model =  YOLO('../../weights/yolov8lsalmons_fp16_bs32.engine', task='segment')
results_trt = trt_model.predict("../../datasets/salmons/images/val/Img2.jpeg", show_boxes=False,save=True)

In [26]:
def compare_images(image1, image2):
    # Leer imágenes
    img1 = cv2.imread(image1)  # Groundtruth
    img2 = cv2.imread(image2)  # Modelo predicho

    if img1.shape != img2.shape:
        raise ValueError("Las imágenes deben tener el mismo tamaño y número de canales")

    # Convertir imágenes a HSV
    hsv_img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2HSV)
    hsv_img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2HSV)

    # Crear máscara de la segmentación del groundtruth (azul)
    lower_blue = np.array([100, 100, 50])
    upper_blue = np.array([140, 255, 255])
    groundtruth_mask = cv2.inRange(hsv_img1, lower_blue, upper_blue)

    # Aplicar máscara al modelo predicho y al groundtruth
    gt_segment = cv2.bitwise_and(img1, img1, mask=groundtruth_mask)
    pred_segment = cv2.bitwise_and(img2, img2, mask=groundtruth_mask)

    # Usar el canal azul en lugar de convertir a escala de grises
    blue_gt = gt_segment[:, :, 0]
    blue_pred = pred_segment[:, :, 0]

    # Calcular píxeles iguales dentro de la máscara del groundtruth
    equal_pixels = np.sum((blue_gt == blue_pred) & (groundtruth_mask > 0))
    total_pixels_in_mask = np.sum(groundtruth_mask > 0)

    # Calcular el closeness dentro de la máscara
    closeness_percentage = (equal_pixels / total_pixels_in_mask) * 100 if total_pixels_in_mask > 0 else 0

    # Crear una visualización de diferencias
    overlay = np.zeros_like(img1, dtype=np.uint8)
    overlay[:, :, 2] = 255  # Canal rojo para resaltar diferencias

    diferencia = cv2.absdiff(blue_gt, blue_pred)
    _, diferencia_binaria = cv2.threshold(diferencia, 10, 255, cv2.THRESH_BINARY)
    mask_differences = cv2.bitwise_and(overlay, overlay, mask=diferencia_binaria)

    # Ajustar la mezcla para mantener el brillo original
    img1_with_differences = cv2.addWeighted(img1, 1, mask_differences, 1, 0.0)

    return img1_with_differences, closeness_percentage

In [None]:
comparacion_binaria, closeness = compare_images('../../outputs/segmentation/Img2_base.jpg','../../outputs/segmentation/Img2_fp32.jpg')
cv2.imwrite('../../outputs/segmentation/Img2_fp32_compare.jpg', comparacion_binaria)

# Mostrar la imagen con matplotlib
print("closeness: ", closeness, '%')
plt.figure(figsize=(10, 5))
plt.imshow(cv2.cvtColor(comparacion_binaria, cv2.COLOR_BGR2RGB))  # Convertir de BGR a RGB para mostrar correctamente con matplotlib
plt.axis('off')  # Ocultar ejes
plt.title("Diferencias resaltadas en rojo")
plt.show()

In [48]:
# Eliminar objetos
del base_model
del trt_model
gc.collect()
# Limpiar caché de la GPU
torch.cuda.empty_cache()

In [None]:
import cv2
import numpy as np

def combine_images_with_titles(image_paths, titles, output_path):
    # Cargar las imágenes
    images = [cv2.imread(img_path) for img_path in image_paths]

    # Redimensionar las imágenes al tamaño de la primera
    base_height, base_width = images[0].shape[:2]
    images = [cv2.resize(img, (base_width, base_height)) for img in images]

    # Agregar títulos dentro de cada imagen
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 2
    font_thickness = 3
    text_color = (255, 255, 255)  # Blanco
    text_background = (0, 0, 0)  # Negro

    for i, img in enumerate(images):
        # Obtener el tamaño del texto
        (text_width, text_height), _ = cv2.getTextSize(titles[i], font, font_scale, font_thickness)
        # Calcular la posición del texto (centrado en la parte superior)
        x = (img.shape[1] - text_width) // 2
        y = text_height + 10
        # Dibujar un fondo negro para el texto
        cv2.rectangle(img, (x - 5, y - text_height - 5), (x + text_width + 5, y + 5), text_background, -1)
        # Dibujar el texto
        cv2.putText(img, titles[i], (x, y), font, font_scale, text_color, font_thickness)

    # Crear una cuadrícula de imágenes
    top_row = np.hstack((images[0], images[1]))
    bottom_row = np.hstack((images[2], images[3]))
    combined_image = np.vstack((top_row, bottom_row))

    # Guardar la imagen combinada
    cv2.imwrite(output_path, combined_image)
    print(f"Imagen combinada guardada en: {output_path}")

# Rutas de las imágenes
image_paths = [
    "../../outputs/segmentation/Img3_base.jpg",
    "../../outputs/segmentation/Img3_fp32_compare.jpg",
    "../../outputs/segmentation/Img3_fp16_compare.jpg",
    "../../outputs/segmentation/Img3_int8_compare.jpg"
]

# Títulos correspondientes
titles = ["Modelo base", "TRT fp32", "TRT fp16", "TRT int8"]

# Ruta para guardar la imagen combinada
output_path = "../../outputs/segmentation/Img3_combined_image.jpg"

# Llamar a la función
combine_images_with_titles(image_paths, titles, output_path)
