# üß™ Taller - Segmentaci√≥n Sem√°ntica Multimodal: Qu√© hay en la Imagen

## 1. Instalaci√≥n del modelo:

In [None]:
pip install git+https://github.com/facebookresearch/segment-anything.git

Collecting git+https://github.com/facebookresearch/segment-anything.git
  Cloning https://github.com/facebookresearch/segment-anything.git to /tmp/pip-req-build-9ktko6v3
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/segment-anything.git /tmp/pip-req-build-9ktko6v3
  Resolved https://github.com/facebookresearch/segment-anything.git to commit dca509fe793f601edb92606367a655c15ac00fdf
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
pip install supervision opencv-python matplotlib



In [None]:
!wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

--2025-05-09 02:33:04--  https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 18.173.166.48, 18.173.166.31, 18.173.166.51, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|18.173.166.48|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2564550879 (2.4G) [binary/octet-stream]
Saving to: ‚Äòsam_vit_h_4b8939.pth.1‚Äô


2025-05-09 02:33:23 (133 MB/s) - ‚Äòsam_vit_h_4b8939.pth.1‚Äô saved [2564550879/2564550879]



## 2. Cargar imagen de prueba y el modelo SAM:

In [10]:
from segment_anything import SamPredictor, sam_model_registry
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import imageio.v2 as imageio
import os

# Crear directorios para la entrega
base_dir = "2025-04-29_taller_segmentacion_semantica_sam_deeplab"
os.makedirs(f"{base_dir}/imagenes_entrada", exist_ok=True)
os.makedirs(f"{base_dir}/mascaras_salida", exist_ok=True)
os.makedirs(f"{base_dir}/resultados", exist_ok=True)

# Cargar imagen
image_path = f"{base_dir}/imagenes_entrada/imagen.jpg"
image = cv2.imread(image_path)
if image is None:
    raise FileNotFoundError(f"No se pudo cargar la imagen en {image_path}")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Cargar modelo
checkpoint_path = "sam_vit_h_4b8939.pth"  # Descarga desde: https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
if not os.path.exists(checkpoint_path):
    raise FileNotFoundError(f"No se encontr√≥ el checkpoint en {checkpoint_path}. Descarga 'sam_vit_h_4b8939.pth' desde el enlace proporcionado.")
sam = sam_model_registry["vit_h"](checkpoint=checkpoint_path).to("cpu")  # Usar CPU
predictor = SamPredictor(sam)
predictor.set_image(image)

Original frame shape: (853, 640, 3)
Colored frame 0 shape: (853, 640, 3)
Binary frame 0 shape: (853, 640, 3)
Colored frame 1 shape: (853, 640, 3)
Binary frame 1 shape: (853, 640, 3)
Colored frame 2 shape: (853, 640, 3)
Binary frame 2 shape: (853, 640, 3)


## 3. Seleccionar punto o caja para segmentar y obtener m√°scara:

In [None]:
# Seleccionar punto para segmentar
input_point = np.array([[300, 300]])  # Punto en la imagen
input_label = np.array([1])

# Obtener m√°scaras
masks, scores, _ = predictor.predict(
    point_coords=input_point,
    point_labels=input_label,
    multimask_output=True
)

## 4. Visualizar resultados y guardar m√°scaras

In [None]:
# Definir colores para cada m√°scara
colors = ['red', 'green', 'blue']  # Un color por m√°scara (hasta 3 m√°scaras)

# Funci√≥n para dibujar contornos de la m√°scara
def draw_mask_contours(image, mask, ax, color='red'):
    contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for contour in contours:
        contour = contour.squeeze()
        if contour.ndim == 1:
            continue
        ax.plot(contour[:, 0], contour[:, 1], color=color, linewidth=2)

# Funci√≥n para redimensionar y convertir a RGB
def prepare_image_for_gif(image_array, target_shape):
    image_pil = Image.fromarray(image_array)
    image_pil = image_pil.resize((target_shape[1], target_shape[0]), Image.LANCZOS)
    # Convertir a RGB (3 canales)
    if image_pil.mode != 'RGB':
        image_pil = image_pil.convert('RGB')
    return np.array(image_pil)

# Visualizar resultados, guardar m√°scaras y preparar im√°genes para GIF
gif_frames = []
target_shape = image.shape[:2]  # Altura y ancho de la imagen original

# A√±adir imagen original al GIF
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.title("Imagen Original")
plt.axis("off")
original_path = f"{base_dir}/resultados/imagen_original.png"
plt.savefig(original_path, bbox_inches='tight')
plt.close()
original_frame = imageio.imread(original_path)
original_frame = prepare_image_for_gif(original_frame, target_shape)
gif_frames.append(original_frame)
print(f"Original frame shape: {original_frame.shape}")

# Procesar cada m√°scara
for i, (mask, score) in enumerate(zip(masks, scores)):
    # Guardar m√°scara como imagen binaria
    mask_binary = mask.astype(np.uint8) * 255
    mask_pil = Image.fromarray(mask_binary)
    mask_binary_path = f"{base_dir}/mascaras_salida/mask_{i}.png"
    mask_pil.save(mask_binary_path)

    # Visualizar m√°scara con color espec√≠fico
    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    draw_mask_contours(image, mask, plt.gca(), color=colors[i % len(colors)])
    plt.title(f"M√°scara {i} (Score: {score:.3f})")
    plt.axis("off")
    mask_colored_path = f"{base_dir}/resultados/mask_colored_{i}.png"
    plt.savefig(mask_colored_path, bbox_inches='tight')
    plt.close()

    # A√±adir im√°genes al GIF: coloreada y binaria
    colored_frame = imageio.imread(mask_colored_path)
    colored_frame = prepare_image_for_gif(colored_frame, target_shape)
    gif_frames.append(colored_frame)
    print(f"Colored frame {i} shape: {colored_frame.shape}")

    binary_frame = imageio.imread(mask_binary_path)
    binary_frame = prepare_image_for_gif(binary_frame, target_shape)
    gif_frames.append(binary_frame)
    print(f"Binary frame {i} shape: {binary_frame.shape}")

# Guardar imagen original
Image.fromarray(image).save(original_path)

# Generar GIF animado
gif_path = f"{base_dir}/resultados/segmentation_process.gif"
imageio.mimsave(gif_path, gif_frames, duration=1000)  # 1 segundo por frame