In [None]:
from sam_tool import SAMTool

# How to use it?

## 1. Initialize the Tool

In [2]:
from sam_tool import SAMTool
import cv2
import numpy as np

# Initialize with default settings (vit_b model, auto device)
tool = SAMTool()

# Or with specific configuration
tool = SAMTool(
    model_type="vit_l",  # "vit_b", "vit_l", or "vit_h"
    device="mps",        # "auto", "mps", "cuda", or "cpu"
    checkpoint_path=None # Will auto-download if None
)

Using device: mps
Using cached checkpoint: /Users/prudhvivuda/.cache/sam/sam_vit_b_01ec64.pth
Loading SAM vit_b model...
✓ Model loaded successfully
Using device: mps
Downloading SAM vit_l checkpoint...
This is a one-time download (~1.2GB)
Progress: 100.0%
✓ Downloaded to /Users/prudhvivuda/.cache/sam/sam_vit_l_0b3195.pth
Loading SAM vit_l model...
✓ Model loaded successfully


## 2. Single Image Processing

In [3]:
# Load and prepare image
image = cv2.imread("dataset/bag.jpg")
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Set image for processing
tool.set_image(image_rgb)

# Select object with points
result = tool.select_with_points(
    points=[(100, 200), (150, 250)],  # (x, y) coordinates
    labels=[1, 1]  # 1 = positive (object), 0 = negative (background)
)

# Access the mask
mask = result['mask']  # Boolean numpy array
score = result['score']  # Confidence score
bbox = result['bbox']  # [x1, y1, x2, y2]
area = result['area']  # Number of pixels

# Save the mask
cv2.imwrite("mask.png", (mask * 255).astype(np.uint8))

True

## 3. Batch Processing with Interactive Selection

In [4]:
# Process multiple images interactively
results = tool.process_interactive(
    input_path="dataset/fan.jpg",  # Folder or single image
    output_dir="output/",
    auto_skip=False  # Don't skip images with no selections
)

# Results contains:
# - total_images: Number processed
# - total_masks: Total masks created
# - results: List of per-image results
print(f"Processed {results['total_images']} images")
print(f"Created {results['total_masks']} masks")


Starting interactive selection for 1 image(s)


[1/1] Processing: fan.jpg

INTERACTIVE SELECTION CONTROLS:
--------------------------------------------------
🖱️  Left Click        : Add object point (green)
🖱️  Right Click       : Add background point (red)
🖱️  Ctrl+Drag        : Draw bounding box
⌨️  'a' / 'Enter'    : Accept and save current mask
⌨️  'r'              : Reset current selection
⌨️  'u'              : Undo last saved mask
⌨️  's'              : Skip this image
⌨️  'q' / 'ESC'      : Next image / Finish

  ✓ Saved mask 1 (area: 1643495 pixels)
✓ Saved 1 masks

✅ Interactive selection complete!
📊 Total masks selected: 1
📁 Output saved to: output

Processed 1 images
Created 1 masks


## 4. Automatic Segmentation

In [None]:
# Set image first
tool.set_image(image_rgb)

# Automatically detect all objects
masks = tool.auto_generate_masks(
    points_per_side=32,  # Sampling density
    pred_iou_thresh=0.88,
    stability_score_thresh=0.95,
    min_mask_region_area=100,  # Minimum object size
    max_objects=10  # Limit number of objects
)

# Process each detected object
for i, mask_data in enumerate(masks):
    mask = mask_data['segmentation']
    area = mask_data['area']
    bbox = mask_data['bbox']  # [x, y, w, h] format
    
    # Save individual mask
    cv2.imwrite(f"object_{i}.png", (mask * 255).astype(np.uint8))
    print(f"Object {i}: Area={area} pixels")

## 5. Box Selection


In [None]:
tool.set_image(image_rgb)

# Select object within bounding box
result = tool.select_with_box(
    box=[100, 100, 400, 400]  # [x1, y1, x2, y2]
)

mask = result['mask']

# Export Formats

## Export to YOLO Detection Format

In [None]:
# After getting a mask, export to YOLO format
tool.set_image(image_rgb)
result = tool.select_with_points([(200, 300)])

# Export single mask to YOLO detection format
yolo_line = tool.export_to_yolo_detection(
    mask_or_bbox=result['mask'],
    class_id=0  # Object class
)
print(yolo_line)  # "0 0.453125 0.567890 0.234375 0.345678"

# Save to file
with open("labels.txt", "w") as f:
    f.write(yolo_line)

## Export to YOLO Segmentation Format

In [None]:
# Export mask as polygon for YOLO segmentation
yolo_seg = tool.export_to_yolo_segmentation(
    mask=result['mask'],
    class_id=0,
    simplify=True  # Simplify polygon
)
print(yolo_seg)  # "0 0.23 0.45 0.34 0.56 ..." (polygon points)

## Export to COCO RLE Format

In [None]:
# Export to COCO Run-Length Encoding
rle = tool.export_to_coco_rle(result['mask'])
print(rle['size'])  # [height, width]
print(rle['counts'])  # RLE string

## Batch Export to YOLO

In [None]:
# Export all masks from a processing session to YOLO format
export_result = tool.export_masks_to_yolo(
    masks_dir="output/masks",
    original_images_dir="images/",
    output_dir="yolo_dataset/",
    format_type="detection",  # or "segmentation"
    use_mask_id_as_class=True  # Different class for each mask
)

print(f"Exported {export_result['total_labels']} labels")
print(f"Number of classes: {export_result['num_classes']}")