In [1]:
from ultralytics import SAM

# Load a model
model = SAM("sam2_b.pt")

# Display model information (optional)
model.info()

Model summary: 566 layers, 80,833,666 parameters, 80,833,666 gradients


(566, 80833666, 80833666, 0.0)

In [2]:
import os
path = "/storage3/vadim/HTR-historical/data/processed/4 Segmenter test/text_segmenter/test/images/"
out = os.listdir(path)
out = [path + name for name in out]

In [3]:
# Segment with bounding box prompt
results = model(out[0])


image 1/1 /storage3/vadim/HTR-historical/data/processed/4 Segmenter test/text_segmenter/test/images/080951f6-13.png: 1024x1024 1 0, 1 1, 1 2, 1 3, 3713.2ms
Speed: 19.7ms preprocess, 3713.2ms inference, 1.1ms postprocess per image at shape (1, 3, 1024, 1024)


In [10]:
results[0].save()

'results_080951f6-13.png'

In [6]:
results

[ultralytics.engine.results.Results object with attributes:
 
 boxes: None
 keypoints: None
 masks: ultralytics.engine.results.Masks object
 names: {0: '0'}
 obb: None
 orig_img: array([[[ 8,  8,  8],
         [ 8,  8,  8],
         [ 8,  8,  8],
         ...,
         [13, 13, 13],
         [14, 13, 13],
         [14, 11, 10]],
 
        [[ 8,  8,  8],
         [ 8,  8,  8],
         [ 8,  8,  8],
         ...,
         [13, 13, 13],
         [14, 13, 13],
         [14, 11, 10]],
 
        [[ 8,  8,  8],
         [ 8,  8,  8],
         [ 8,  8,  8],
         ...,
         [13, 13, 13],
         [14, 13, 13],
         [14, 11, 10]],
 
        ...,
 
        [[ 0,  5,  4],
         [ 0,  5,  4],
         [ 2,  7,  6],
         ...,
         [ 1,  1,  4],
         [ 2,  1,  3],
         [10,  9,  9]],
 
        [[12, 15, 16],
         [14, 16, 17],
         [14, 16, 17],
         ...,
         [11, 13, 13],
         [11, 12, 12],
         [21, 19, 21]],
 
        [[ 5,  1,  3],
         

In [None]:
# Segment with point prompt
results = model("path/to/image.jpg", points=[150, 150], labels=[1])

In [15]:
from ultralytics.data.annotator import auto_annotate

res = auto_annotate(data=out[0], det_model="/storage3/vadim/HTR-historical/runs/segment/train/weights/best.pt", sam_model="sam2_b.pt", output_dir='out')


image 1/1 /storage3/vadim/HTR-historical/data/processed/4 Segmenter test/text_segmenter/test/images/080951f6-13.png: 1568x1088 59 texts, 29.7ms
Speed: 8.1ms preprocess, 29.7ms inference, 3.0ms postprocess per image at shape (1, 3, 1568, 1088)


In [8]:
from pathlib import Path

from ultralytics import SAM, YOLO

def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None):
    """
    Automatically annotates images using a YOLO object detection model and a SAM segmentation model.

    This function processes images in a specified directory, detects objects using a YOLO model, and then generates
    segmentation masks using a SAM model. The resulting annotations are saved as text files.

    Args:
        data (str): Path to a folder containing images to be annotated.
        det_model (str): Path or name of the pre-trained YOLO detection model.
        sam_model (str): Path or name of the pre-trained SAM segmentation model.
        device (str): Device to run the models on (e.g., 'cpu', 'cuda', '0').
        output_dir (str | None): Directory to save the annotated results. If None, a default directory is created.

    Examples:
        >>> from ultralytics.data.annotator import auto_annotate
        >>> auto_annotate(data="ultralytics/assets", det_model="yolov8n.pt", sam_model="mobile_sam.pt")

    Notes:
        - The function creates a new directory for output if not specified.
        - Annotation results are saved as text files with the same names as the input images.
        - Each line in the output text file represents a detected object with its class ID and segmentation points.
    """
    det_model = YOLO(det_model)
    sam_model = SAM(sam_model)

    data = Path(data)
    if not output_dir:
        output_dir = data.parent / f"{data.stem}_auto_annotate_labels"
    Path(output_dir).mkdir(exist_ok=True, parents=True)

    det_results = det_model(data, stream=True, device=device)
    out_res = []

    for result in det_results:
        class_ids = result.boxes.cls.int().tolist()  # noqa
        if len(class_ids):
            boxes = result.boxes.xyxy  # Boxes object for bbox outputs
            sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=True, device=device)
            out_res.append((result, sam_results))
            segments = sam_results[0].masks.xyn  # noqa

            with open(f"{Path(output_dir) / Path(result.path).stem}.txt", "w") as f:
                for i in range(len(segments)):
                    s = segments[i]
                    if len(s) == 0:
                        continue
                    segment = map(str, segments[i].reshape(-1).tolist())
                    f.write(f"{class_ids[i]} " + " ".join(segment) + "\n")
    
    return out_res

In [9]:
res = auto_annotate(data=out[0], det_model="/storage3/vadim/HTR-historical/models/detector/best.pt", sam_model="sam2_l.pt", output_dir='out')


image 1/1 /storage3/vadim/HTR-historical/data/processed/4 Segmenter test/text_segmenter/test/images/080951f6-13.png: 1024x704 40 texts, 29.5ms
Results saved to [1m/storage3/vadim/HTR-historical/runs/segment/predict3[0m
Speed: 2.8ms preprocess, 29.5ms inference, 0.7ms postprocess per image at shape (1, 3, 1024, 704)


In [6]:
res[0][0].save()

NameError: name 'res' is not defined