In [None]:
import os
HOME = os.getcwd()
print("HOME:", HOME)


# !pip install -q 'git+https://github.com/facebookresearch/segment-anything.git'

# !pip install -q jupyter_bbox_widget roboflow dataclasses-json supervision==0.23.0

# !mkdir -p {HOME}/weights
# !wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth -P {HOME}/weights




In [None]:
import os

CHECKPOINT_PATH = os.path.join(HOME, "weights", "sam_vit_h_4b8939.pth")
print(CHECKPOINT_PATH, "; exist:", os.path.isfile(CHECKPOINT_PATH))

Download example data

In [None]:
# !mkdir -p {HOME}/data

# !wget -q https://media.roboflow.com/notebooks/examples/dog.jpeg -P {HOME}/data
# !wget -q https://media.roboflow.com/notebooks/examples/dog-2.jpeg -P {HOME}/data
# !wget -q https://media.roboflow.com/notebooks/examples/dog-3.jpeg -P {HOME}/data
# !wget -q https://media.roboflow.com/notebooks/examples/dog-4.jpeg -P {HOME}/data

Load Model

In [None]:
import torch

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
MODEL_TYPE = "vit_h"


from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

sam = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT_PATH).to(device=DEVICE)

Automated Mask Generation

In [None]:
mask_generator = SamAutomaticMaskGenerator(sam)


import os

# IMAGE_NAME = "/home/stud1/Desktop/PlantDoc-Object-Detection-Dataset/TEST/1684.jpg"
IMAGE_NAME = "/home/stud1/Desktop/data/Leaves_sunlight/2025-02-03_011/2025-02-03_011.png"
IMAGE_PATH = os.path.join(HOME, "data", IMAGE_NAME)

Generate Maks With SAM

In [None]:
import cv2
import supervision as sv

image_bgr = cv2.imread(IMAGE_PATH)
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

sam_result = mask_generator.generate(image_rgb)

OUTPUT FORMAT


SamAutomaticMaskGenerator returns a list of masks, where each mask is a dict containing various information about the mask:

segmentation - [np.ndarray] - the mask with (W, H) shape, and bool type
area - [int] - the area of the mask in pixels
bbox - [List[int]] - the boundary box of the mask in xywh format
predicted_iou - [float] - the model's own prediction for the quality of the mask
point_coords - [List[List[float]]] - the sampled input point that generated this mask
stability_score - [float] - an additional measure of mask quality
crop_box - List[int] - the crop of the image used to generate this mask in xywh format



In [None]:
print(sam_result[0].keys())

Results visualisation with Supervision
As of version 0.5.0 Supervision has native support for SAM.

In [None]:
mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)

detections = sv.Detections.from_sam(sam_result=sam_result)

annotated_image = mask_annotator.annotate(scene=image_bgr.copy(), detections=detections)

sv.plot_images_grid(
    images=[image_bgr, annotated_image],
    grid_size=(1, 2),
    titles=['source image', 'segmented image']
)

Interaction with segmentation results

In [None]:
import math

masks = [
    mask['segmentation']
    for mask in sorted(sam_result, key=lambda x: x['area'], reverse=True)
]

# Ensure the grid can fit all images
num_masks = len(masks)
rows = 8
cols = math.ceil(num_masks / rows)  # Use ceil to ensure all images fit

sv.plot_images_grid(
    images=masks,
    grid_size=(rows, cols),  # Adjusted grid size
    size=(16, 16)
)


For Automatci Grid Size

In [None]:
import math

num_masks = len(masks)
rows = math.ceil(math.sqrt(num_masks))  # Approximate square grid
cols = math.ceil(num_masks / rows)

sv.plot_images_grid(
    images=masks,
    grid_size=(rows, cols),
    size=(16, 16)
)


With Bounding Boxes

In [None]:
!pip install imageio[freeimage]
!pip install opencv-python-headless matplotlib imageio[freeimage] torch torchvision supervision segment-anything



In [None]:
import os
import cv2
import numpy as np
import imageio.v3 as iio
import matplotlib.pyplot as plt
import torch

# Import SAM libraries and supervision (for annotation)
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
import supervision as sv

# ------------------ File Paths ------------------
HDR_PATH = "/home/stud1/Desktop/data/Leaves_sunlight/2025-02-03_011/capture/2025-02-03_011.hdr"

# ------------------ 1. Load & Normalize HDR Image ------------------
hdr_image = None
if os.path.exists(HDR_PATH):
    try:
        # Load HDR data using imageio
        hdr_data = iio.imread(HDR_PATH)
        # Normalize the spectral response to 0-255 for display
        hdr_normalized = (hdr_data - np.min(hdr_data)) / (np.max(hdr_data) - np.min(hdr_data)) * 255
        hdr_image = np.uint8(hdr_normalized)
    except Exception as e:
        print("Error loading HDR:", e)
        hdr_image = None
else:
    print("HDR file not found at:", HDR_PATH)

# ------------------ 2. Prepare HDR Image for SAM ------------------
if hdr_image is not None:
    # SAM expects an RGB image; if your HDR is single-channel, convert it to 3-channel.
    if len(hdr_image.shape) == 2 or hdr_image.shape[-1] == 1:
        hdr_image_rgb = cv2.cvtColor(hdr_image, cv2.COLOR_GRAY2RGB)
    else:
        hdr_image_rgb = hdr_image.copy()
else:
    hdr_image_rgb = None

# ------------------ 3. Load SAM Model & Generate Masks ------------------
model_type = "vit_h"  # Using the "vit_h" variant
device = "cuda" if torch.cuda.is_available() else "cpu"

if hdr_image_rgb is not None:
    try:
        # Initialize the SAM model without providing a checkpoint path explicitly.
        sam = sam_model_registry[model_type]()
        sam.to(device=device)

        # Create the SAM automatic mask generator
        mask_generator = SamAutomaticMaskGenerator(sam)

        # Generate segmentation masks for the HDR image
        sam_results = mask_generator.generate(hdr_image_rgb)

        if sam_results:
            # Convert SAM output to a supervision detection object and annotate the image
            detections = sv.Detections.from_sam(sam_result=sam_results)
            mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
            annotated_hdr = mask_annotator.annotate(scene=hdr_image_rgb.copy(), detections=detections)
        else:
            print("SAM did not produce any masks for the HDR image.")
            annotated_hdr = hdr_image_rgb
    except Exception as e:
        print("Error during SAM segmentation:", e)
        annotated_hdr = hdr_image_rgb
else:
    annotated_hdr = None

# ------------------ 4. Display the Segmented HDR Image ------------------
if annotated_hdr is not None:
    plt.figure(figsize=(10, 10))
    plt.imshow(annotated_hdr)
    plt.title("HDR Image with SAM Segmentation")
    plt.axis("off")
    plt.show()
else:
    print("No HDR image available for segmentation/display.")


In [None]:
Segmentes Area

In [None]:
import os
import cv2
import numpy as np
import supervision as sv
import matplotlib.pyplot as plt

# Define dataset path
DATASET_PATH = "/home/stud1/Desktop/PlantDoc-Object-Detection-Dataset/TEST"

# Get list of image files
image_files = [f for f in os.listdir(DATASET_PATH) if f.endswith(('.jpg', '.png'))]
image_files = image_files[:5]  # Process only first 5 images

for image_name in image_files:
    image_path = os.path.join(DATASET_PATH, image_name)
    
    # Read and preprocess image
    image_bgr = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    
    # Generate masks (assumes you have a valid mask_generator)
    sam_result = mask_generator.generate(image_rgb)
    
    if not sam_result:
        print(f"No masks detected for {image_name}")
        continue
    
    # Extract the first (largest) mask -- full dictionary
    largest_mask_data = max(sam_result, key=lambda x: x['area'])
    
    # Convert segmentation mask to displayable format (0 or 255)
    segmentation_mask = largest_mask_data['segmentation'].astype(np.uint8) * 255
    
    # Annotate mask
    mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
    # Pass the entire dictionary to from_sam(), not just the mask
    detections = sv.Detections.from_sam(sam_result=[largest_mask_data])
    annotated_image = mask_annotator.annotate(scene=image_bgr.copy(), detections=detections)
    
    # Display original and segmented image using Supervision
    sv.plot_images_grid(
        images=[image_bgr, annotated_image],
        grid_size=(1, 2),
        titles=['Source Image', 'Segmented Image']
    )
    
    # Display the single mask directly with matplotlib to avoid axes.flat issues
    plt.figure(figsize=(8, 8))
    plt.imshow(segmentation_mask, cmap='gray')
    plt.axis('off')
    plt.title('Largest Mask')
    plt.show()


In [None]:
import os
import cv2
import math
import supervision as sv

# Define dataset path
DATASET_PATH = "/home/stud1/Desktop/PlantDoc-Object-Detection-Dataset/TEST"

# Get list of image files
image_files = [f for f in os.listdir(DATASET_PATH) if f.endswith(('.jpg', '.png'))]
image_files = image_files[:5]  # Process only first 5 images

for image_name in image_files:
    image_path = os.path.join(DATASET_PATH, image_name)
    
    # Read and preprocess image
    image_bgr = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    
    # Generate masks
    sam_result = mask_generator.generate(image_rgb)
    
    # Annotate masks
    mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
    detections = sv.Detections.from_sam(sam_result=sam_result)
    annotated_image = mask_annotator.annotate(scene=image_bgr.copy(), detections=detections)
    
    # Display original and segmented image
    sv.plot_images_grid(
        images=[image_bgr, annotated_image],
        grid_size=(1, 2),
        titles=['Source Image', 'Segmented Image']
    )
    
    # Extract masks sorted by area
    masks = [mask['segmentation'] for mask in sorted(sam_result, key=lambda x: x['area'], reverse=True)]
    
    # Determine grid size
    num_masks = len(masks)
    rows = 8
    cols = math.ceil(num_masks / rows)
    
    # Display masks
    sv.plot_images_grid(
        images=masks,
        grid_size=(rows, cols),
        size=(16, 16)
    )


Generate Segmentation with Bounding Box

The SamPredictor class provides an easy interface to the model for prompting the model. It allows the user to first set an image using the set_image method, which calculates the necessary image embeddings. Then, prompts can be provided via the predict method to efficiently predict masks from those prompts. The model can take as input both point and box prompts, as well as masks from the previous iteration of prediction.

In [None]:
!pip install jupyter_bbox_widget ipywidgets
!jupyter nbextension enable --py widgetsnbextension
!jupyter nbextension enable --py jupyter_bbox_widget


In [None]:
mask_predictor = SamPredictor(sam)

In [None]:
# import os

# IMAGE_NAME = "/home/stud1/Desktop/PlantDoc-Object-Detection-Dataset/TEST/07c.jpg"
# IMAGE_PATH = os.path.join(HOME, "data", IMAGE_NAME)

In [None]:
# # helper function that loads an image before adding it to the widget

# import base64

# def encode_image(filepath):
#     with open(filepath, 'rb') as f:
#         image_bytes = f.read()
#     encoded = str(base64.b64encode(image_bytes), 'utf-8')
#     return "data:image/jpg;base64,"+encoded

In [None]:
import os
import base64
from jupyter_bbox_widget import BBoxWidget

# Ensure IMAGE_NAME is a string, not a tuple
IMAGE_NAME = "/home/stud1/Desktop/PlantDoc-Object-Detection-Dataset/TEST/1684.jpg"

# Define encode_image function correctly
def encode_image(filepath):
    with open(filepath, 'rb') as f:
        image_bytes = f.read()
    encoded = base64.b64encode(image_bytes).decode('utf-8')
    return "data:image/jpg;base64," + encoded 

# Use correct IMAGE_PATH (No need for os.path.join)
IMAGE_PATH = IMAGE_NAME  

# Create widget
widget = BBoxWidget()
widget.image = encode_image(IMAGE_PATH)
widget


In [None]:
widget.bboxes

Generate masks with SAM

NOTE: SamPredictor.predict method takes np.ndarray box argument in [x_min, y_min, x_max, y_max] format. Let's reorganise your data first

In [None]:
import numpy as np

# default_box is going to be used if you will not draw any box on image above
default_box = {'x': 168, 'y': 1247, 'width': 555, 'height': 678, 'label': ''}

box = widget.bboxes[0] if widget.bboxes else default_box
box = np.array([
    box['x'],
    box['y'],
    box['x'] + box['width'],
    box['y'] + box['height']
])


import cv2
import numpy as np
import supervision as sv

image_bgr = cv2.imread(IMAGE_PATH)
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

mask_predictor.set_image(image_rgb)

masks, scores, logits = mask_predictor.predict(
    box=box,
    multimask_output=True
)

Results visualisation with Supervision

In [None]:
box_annotator = sv.BoxAnnotator(color=sv.Color.RED, color_lookup=sv.ColorLookup.INDEX)
mask_annotator = sv.MaskAnnotator(color=sv.Color.RED, color_lookup=sv.ColorLookup.INDEX)

detections = sv.Detections(
    xyxy=sv.mask_to_xyxy(masks=masks),
    mask=masks
)
detections = detections[detections.area == np.max(detections.area)]

source_image = box_annotator.annotate(scene=image_bgr.copy(), detections=detections)
segmented_image = mask_annotator.annotate(scene=image_bgr.copy(), detections=detections)

sv.plot_images_grid(
    images=[source_image, segmented_image],
    grid_size=(1, 2),
    titles=['source image', 'segmented image']
)

Interaction with segmentation results 

In [None]:
import supervision as v

sv.plot_images_grid(
    images=masks,
    grid_size=(1, 4),
    size=(16, 4)
)