In [8]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [18]:
import os
os.chdir('/content/gdrive/My Drive/LabProject')

In [12]:
using_colab = 1
if using_colab:
    import torch
    import torchvision
    print("PyTorch version:", torch.__version__)
    print("Torchvision version:", torchvision.__version__)
    print("CUDA is available:", torch.cuda.is_available())
    import sys
    !{sys.executable} -m pip install opencv-python matplotlib
    !{sys.executable} -m pip install 'git+https://github.com/facebookresearch/sam2.git'

    !mkdir -p images
    !wget -P images https://raw.githubusercontent.com/facebookresearch/sam2/main/notebooks/images/truck.jpg
    !wget -P images https://raw.githubusercontent.com/facebookresearch/sam2/main/notebooks/images/groceries.jpg

    !mkdir -p ../checkpoints/
    !wget -P ../checkpoints/ https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_large.pt

PyTorch version: 2.5.1+cu121
Torchvision version: 0.20.1+cu121
CUDA is available: True
Collecting git+https://github.com/facebookresearch/sam2.git
  Cloning https://github.com/facebookresearch/sam2.git to /tmp/pip-req-build-bnxv5bic
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/sam2.git /tmp/pip-req-build-bnxv5bic
  Resolved https://github.com/facebookresearch/sam2.git to commit c2ec8e14a185632b0a5d8b161928ceb50197eddc
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting hydra-core>=1.3.2 (from SAM-2==1.0)
  Downloading hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB)
Collecting iopath>=0.1.10 (from SAM-2==1.0)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ...

In [14]:
import os
# if using Apple MPS, fall back to CPU for unsupported ops
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [15]:
# select the device for computation
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(f"using device: {device}")

if device.type == "cuda":
    # use bfloat16 for the entire notebook
    torch.autocast("cuda", dtype=torch.bfloat16).__enter__()
    # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
    if torch.cuda.get_device_properties(0).major >= 8:
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
elif device.type == "mps":
    print(
        "\nSupport for MPS devices is preliminary. SAM 2 is trained with CUDA and might "
        "give numerically different outputs and sometimes degraded performance on MPS. "
        "See e.g. https://github.com/pytorch/pytorch/issues/84936 for a discussion."
    )

using device: cuda


In [16]:
!pip install torchmetrics



In [19]:
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor
from utils.general import check_device
import torch
from torchmetrics import JaccardIndex
from PIL import Image
import numpy as np
import pandas as pd
from functions.creating_list import get_base_filename, load_boxes, load_points, load_three_points, exctract_paths
import pickle
import matplotlib
import matplotlib.pyplot as plt
import cv2
from functions.mask import extract_true_mask_json
from functions.sam2_functions import show_mask, show_points, show_box, show_masks

In [20]:
# Load predictor
sam2_checkpoint = 'checkpoints/sam2.1_hiera_large.pt'
model_cfg = "configs/sam2.1/sam2.1_hiera_l.yaml"
sam2_model = build_sam2(model_cfg, sam2_checkpoint, device=device)
predictor = SAM2ImagePredictor(sam2_model)

In [21]:
# Define directories
folder_images = 'image/Cholect_dataset/images/test'
folder_points = 'image/cholect_annotation/points/test'
folder_labels = 'image/cholect_annotation/classes/test'
folder_bbox = 'image/cholect_annotation/bbox/test'
folder_true = 'image/Cholect_dataset/labels/test'
folder_three_pts = 'image/cholect_annotation/three_points/test'

matching_image_paths, matching_bbox_paths, matching_pts_paths, matching_lbs_paths, matching_true_paths, matching_three_pts_paths = exctract_paths(folder_images, folder_bbox, folder_points, folder_labels, folder_true, folder_three_pts)

Number of files in each folder: 876 876 876 876 876 876
Number of matching files: 876


In [22]:
# Use sam2 with mask
output_folder = 'image/cholect_annotation/IoU/iou_mask_bbox.csv'
all_results = []
for box, lbs_path, image_path, true_contour in zip(matching_bbox_paths, matching_lbs_paths, matching_image_paths, matching_true_paths):
    print(f"Processing image: {image_path}")
    # Read lbs file
    with open(lbs_path, 'r') as file:
        lbs = file.readlines()

    # Check if lbs is empty or contains only empty lines
    if not lbs or all(line.strip() == "" for line in lbs):
        print(f"Skipping {lbs_path} as it is empty.")
        continue

    image = Image.open(image_path)
    image_width, image_height = image.size
    image = np.array(image.convert("RGB"))
    predictor.set_image(image)

    bbox = load_boxes(box, image_width, image_height, 0)

    pred_masks, scores, _ = predictor.predict(
        point_coords=None,
        point_labels=None,
        box=bbox,
        multimask_output=False,
    )

    # Open the true contour file and extract masks
    true_masks = extract_true_mask_json(true_contour, image_width, image_height)

    # Convert masks to PyTorch tensors
    if pred_masks.shape[1] == 1:
        pred_masks = pred_masks.squeeze(1)
    pred_mask_tensor = torch.tensor(pred_masks)
    true_mask_tensor = torch.tensor(true_masks)
    # Verify shapes match
    assert pred_mask_tensor.shape == true_mask_tensor.shape, "mask and true_mask must have the same shape"

    # Initialize Jaccard Index metric for binary masks
    jaccard = JaccardIndex(task="binary", num_classes=2)

    # Compute Jaccard Index for each pair of masks
    iou_scores = []
    for i in range(pred_mask_tensor.shape[0]):
        iou = jaccard(pred_mask_tensor[i], true_mask_tensor[i])
        iou_scores.append(iou.item())

    # Aggiungi i risultati per ogni maschera nell'immagine
    for i, iou in enumerate(iou_scores):
        all_results.append({
            "image": get_base_filename(image_path),
            "class": lbs[i].strip(),
            "iou": iou,
            "is_similar": iou > 0.5,
        })

# Crea un DataFrame con i risultati
results_df = pd.DataFrame(all_results)

# Salva il DataFrame in un file CSV
results_df.to_csv(output_folder, index=False)

Processing image: image/Cholect_dataset/images/test/seg8k_video12_015750.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015765.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015780.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015795.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015810.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015825.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015840.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015855.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015870.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015885.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015900.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_019505.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_019520.png

In [24]:
# Use sam2 with points
output_folder = 'image/cholect_annotation/IoU/iou_center_pts_bbox.csv'
all_results = []
for pts_path, image_path, lbs_path, true_contour in zip(matching_pts_paths, matching_image_paths, matching_lbs_paths, matching_true_paths):
    print(f"Processing image: {image_path}")
    # Read lbs file
    with open(lbs_path, 'r') as file:
        lbs = file.readlines()

    # Check if lbs is empty or contains only empty lines
    if not lbs or all(line.strip() == "" for line in lbs):
        print(f"Skipping {lbs_path} as it is empty.")
        continue

    image = Image.open(image_path)
    image_width, image_height = image.size
    image = np.array(image.convert("RGB"))
    predictor.set_image(image)

    # Load the points (coordinates)
    points = load_points(pts_path, image_width, image_height, 0)
    input_point = np.array(points)
    predictor.set_image(image)

    pred_masks = []
    # Iterate over each point (create one-hot label for each point)
    for idx in range(len(input_point)):
        # Create a one-hot encoded label where only the current point has "1"
        input_label = np.zeros(len(input_point))
        input_label[idx] = 1


        # Make the prediction for the current label configuration
        masks, scores, logits = predictor.predict(
            point_coords=input_point,
            point_labels=input_label,
            multimask_output=True
        )

        sorted_ind = np.argsort(scores)[::-1]
        masks = masks[sorted_ind]
        scores = scores[sorted_ind]
        logits = logits[sorted_ind]
        best_mask = masks[0]

        pred_masks.append(best_mask)

    # Convert the list to a NumPy array for further processing
    pred_masks = np.stack(pred_masks)

    # Open the true contour file and extract masks
    true_masks = extract_true_mask_json(true_contour, image_width, image_height)

    # Convert masks to PyTorch tensors
    pred_mask_tensor = torch.tensor(pred_masks)
    true_mask_tensor = torch.tensor(true_masks)
    # Verify shapes match
    assert pred_mask_tensor.shape == true_mask_tensor.shape, "mask and true_mask must have the same shape"

    # Initialize Jaccard Index metric for binary masks
    jaccard = JaccardIndex(task="binary", num_classes=2)

    # Compute Jaccard Index for each pair of masks
    iou_scores = []
    for i in range(pred_mask_tensor.shape[0]):
        iou = jaccard(pred_mask_tensor[i], true_mask_tensor[i])
        iou_scores.append(iou.item())

    # Aggiungi i risultati per ogni maschera nell'immagine
    for i, iou in enumerate(iou_scores):
        all_results.append({
            "image": get_base_filename(image_path),
            "class": lbs[i].strip(),
            "iou": iou,
            "is_similar": iou > 0.5,
        })

# Crea un DataFrame con i risultati
results_df = pd.DataFrame(all_results)

# Salva il DataFrame in un file CSV
results_df.to_csv(output_folder, index=False)

Processing image: image/Cholect_dataset/images/test/seg8k_video12_015750.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015765.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015780.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015795.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015810.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015825.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015840.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015855.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015870.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015885.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015900.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_019505.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_019520.png

In [25]:
# Use sam2 with three points
output_folder = 'image/cholect_annotation/IoU/iou_three_pts_bbox.csv'
all_results = []
for pts_path, image_path, lbs_path, true_contour in zip(matching_three_pts_paths, matching_image_paths, matching_lbs_paths, matching_true_paths):
    print(f"Processing image: {image_path}")

    # Read lbs file
    with open(lbs_path, 'r') as file:
        lbs = file.readlines()

    # Check if lbs is empty or contains only empty lines
    if not lbs or all(line.strip() == "" for line in lbs):
        print(f"Skipping {lbs_path} as it is empty.")
        continue

    image = Image.open(image_path)
    image_width, image_height = image.size
    image = np.array(image.convert("RGB"))
    predictor.set_image(image)

    # Load the three points
    points = load_three_points(pts_path, image_width, image_height, 0)
    input_point = np.array(points)
    predictor.set_image(image)

    pred_masks = []
    input_labels = []
    # Iterate over the points in steps of 3
    for idx in range(0, len(input_point), 3):
        # Create a label array with zeros
        input_label = np.zeros(len(input_point))
        # Set the 3 consecutive points to 1 (idx, idx+1, idx+2)
        input_label[idx:idx+3] = 1
        # Append the label array to the list of labels
        input_labels.append(input_label)

    # Convert the list of labels into a numpy array after the loop
    input_labels = np.array(input_labels)

    # Iterate over the input_labels for prediction
    pred_masks = []
    for input_label in input_labels:
        # Make the prediction for the current label configuration
        masks, scores, logits = predictor.predict(
            point_coords=input_point,
            point_labels=input_label,
            multimask_output=True
        )

        sorted_ind = np.argsort(scores)[::-1]
        masks = masks[sorted_ind]
        scores = scores[sorted_ind]
        logits = logits[sorted_ind]
        best_mask = masks[0]

        # Show the results
        # plt.figure(figsize=(10, 10))
        # plt.imshow(image)
        # show_mask(best_mask, plt.gca(), random_color=True)
        # show_points(input_point, input_label, plt.gca())
        # plt.show()

        pred_masks.append(best_mask)

    # Convert the list to a NumPy array for further processing
    pred_masks = np.stack(pred_masks)

    # Open the true contour file and extract masks
    true_masks = extract_true_mask_json(true_contour, image_width, image_height)

    # Convert masks to PyTorch tensors
    if pred_masks.shape[1] == 1:
            pred_masks = pred_masks.squeeze(1)
    pred_mask_tensor = torch.tensor(pred_masks)
    true_mask_tensor = torch.tensor(true_masks)

    # Verify shapes match
    assert pred_mask_tensor.shape == true_mask_tensor.shape, "mask and true_mask must have the same shape"

    # Initialize Jaccard Index metric for binary masks
    jaccard = JaccardIndex(task="binary", num_classes=2)

    # Compute Jaccard Index for each pair of masks
    iou_scores = []
    for i in range(pred_mask_tensor.shape[0]):
        iou = jaccard(pred_mask_tensor[i], true_mask_tensor[i])
        iou_scores.append(iou.item())

    # Aggiungi i risultati per ogni maschera nell'immagine
    for i, iou in enumerate(iou_scores):
        all_results.append({
            "image": get_base_filename(image_path),
            "class": lbs[i].strip(),
            "iou": iou,
            "is_similar": iou > 0.5,
        })

# Crea un DataFrame con i risultati
results_df = pd.DataFrame(all_results)

# Salva il DataFrame in un file CSV
results_df.to_csv(output_folder, index=False)

Processing image: image/Cholect_dataset/images/test/seg8k_video12_015750.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015765.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015780.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015795.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015810.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015825.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015840.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015855.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015870.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015885.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_015900.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_019505.png
Processing image: image/Cholect_dataset/images/test/seg8k_video12_019520.png