In [1]:
import os
import cv2
import json
import torch
import numpy as np
from tqdm import tqdm
from datetime import datetime
from segment_anything import sam_model_registry, SamPredictor
from pycocotools import mask as mask_utils
import urllib.request

In [2]:
print(torch.cuda.is_available())

False


In [3]:
# Define checkpoint path and download url
checkpoint_path = os.path.join("checkpoints", "sam_vit_h_4b8939.pth")
checkpoint_url = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth"

# Make directory if it doesn't exist
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)

# Download if the file is missing
if not os.path.exists(checkpoint_path):
    print(f"Downloading SAM checkpoint from {checkpoint_url}...")
    urllib.request.urlretrieve(checkpoint_url, checkpoint_path)
    print("Download complete.")
else:
    print("Checkpoint already exists at:", checkpoint_path)

Checkpoint already exists at: checkpoints\sam_vit_h_4b8939.pth


In [4]:
image_folder = r"Dataset_resized"
model_type = "vit_h"
output_root = "segmentation_outputs"
os.makedirs(output_root, exist_ok=True)

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
sam = sam_model_registry[model_type](checkpoint=checkpoint_path)
sam.to(device)
predictor = SamPredictor(sam)

In [6]:
# Track processed images
processed_file = os.path.join(output_root, "processed_images.json")
if os.path.exists(processed_file):
    with open(processed_file, "r") as f:
        processed_images = set(json.load(f))
else:
    processed_images = set()

In [7]:
def binary_mask_to_rle(mask):
    rle = mask_utils.encode(np.asfortranarray(mask.astype(np.uint8)))
    rle["counts"] = rle["counts"].decode("utf-8")  # bytes to str for JSON
    return rle


annotation_id = 1
image_id = 1

In [8]:
# i = 0
for fname in tqdm(os.listdir(image_folder)):
    if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
        continue

    if fname in processed_images:
        continue

    # i += 1
    # if i < 100:
    #     continue

    while True:
        image_path = os.path.join(image_folder, fname)
        image_bgr = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
        h, w = image_rgb.shape[:2]

        predictor.set_image(image_rgb)

        click_points = []
        click_labels = []
        removal_points = []
        removal_labels = []

        def click_event(event, x, y, flags, param):
            if event == cv2.EVENT_LBUTTONDOWN:
                click_points.append([x, y])
                click_labels.append(1)
                cv2.circle(image_bgr, (x, y), 5, (0, 255, 0), -1)
                cv2.imshow("Click points, press 's' to segment or 'k' to skip", image_bgr)
            if event == cv2.EVENT_RBUTTONDOWN:
                removal_points.append([x, y])
                removal_labels.append(0)
                cv2.circle(image_bgr, (x, y), 5, (0, 0, 255), -1)
                cv2.imshow(
                    "Click points, press 's' to segment or 'k' to skip", image_bgr
                )

        print(f"\nProcessing image: {fname}")
        cv2.imshow("Click points, press 's' to segment or 'k' to skip", image_bgr)
        cv2.setMouseCallback("Click points, press 's' to segment or 'k' to skip", click_event)

        key = None
        # Wait for 's' to segment or 'k' to skip or 'ESC' to exit
        while True:
            key = cv2.waitKey(1)
            if key == ord("s") and click_points:
                break
            if key == ord("k"):
                print("⏭️ Skipping this image...")
                break
            elif key == 27:
                cv2.destroyAllWindows()
                exit()

        if key == ord("k"):
            break  # Move to next image

        input_points = np.array(click_points + removal_points)
        input_labels = np.array(click_labels + removal_labels)
        masks, scores, logits = predictor.predict(
            point_coords=input_points,
            point_labels=input_labels,
            multimask_output=False,
        )
        mask = masks[0]

        result_overlay = image_bgr.copy()
        result_overlay[mask] = [0, 255, 0]

        # Show result and ask user to retry or accept
        cv2.imshow("Result - press 'r' to retry or any key to accept", result_overlay)
        key = cv2.waitKey(0)
        cv2.destroyAllWindows()

        if key == ord("r"):
            print("🔁 Retrying segmentation for this image...")
            continue  # Re-do the same image

        # Save segmentation result
        base_name = os.path.splitext(fname)[0]
        overlay_path = os.path.join(output_root, f"{base_name}_segmented.png")
        cv2.imwrite(overlay_path, result_overlay)

        coco_data = {
            "info": {
                "description": "Manual SAM Segmentation",
                "date_created": datetime.now().isoformat(),
            },
            "images": [{"id": image_id, "file_name": fname, "width": w, "height": h}],
            "annotations": [
                {
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": 1,
                    "segmentation": binary_mask_to_rle(mask),
                    "area": int(mask.sum()),
                    "bbox": list(cv2.boundingRect(mask.astype(np.uint8))),
                    "iscrowd": 0,
                }
            ],
            "categories": [{"id": 1, "name": "object"}],
        }

        json_path = os.path.join(output_root, f"{base_name}.json")
        with open(json_path, "w") as f:
            json.dump(coco_data, f)

        # ✅ Save processed image info
        processed_images.add(fname)
        with open(processed_file, "w") as f:
            json.dump(list(processed_images), f)

        annotation_id += 1
        image_id += 1
        break  # Move to next image


print("\n✅ All segmentations complete. JSON and segmented images saved.")

  0%|          | 0/607 [00:00<?, ?it/s]


Processing image: 135_jpg.rf.bf9542f31ad0a4d2fcdbef9390d055f5.jpg
🔁 Retrying segmentation for this image...

Processing image: 135_jpg.rf.bf9542f31ad0a4d2fcdbef9390d055f5.jpg
🔁 Retrying segmentation for this image...

Processing image: 135_jpg.rf.bf9542f31ad0a4d2fcdbef9390d055f5.jpg


  7%|▋         | 41/607 [18:51<4:20:25, 27.61s/it]


Processing image: 137_jpg.rf.b377cd40dea15934f2d2ca0f4ddd937e.jpg


  7%|▋         | 43/607 [20:26<4:31:29, 28.88s/it]


Processing image: 20250515_150052.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250515_150052.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250515_150052.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250515_150052.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250515_150052.jpg


 14%|█▍        | 86/607 [27:36<2:21:30, 16.30s/it]


Processing image: 20250516_151635.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_151635.jpg


 17%|█▋        | 101/607 [30:55<2:10:59, 15.53s/it]


Processing image: 20250516_151701.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_151701.jpg


 17%|█▋        | 103/607 [34:09<2:41:37, 19.24s/it]

⏭️ Skipping this image...

Processing image: 20250516_151739.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_151739.jpg


 17%|█▋        | 104/607 [40:44<4:20:10, 31.03s/it]

⏭️ Skipping this image...

Processing image: 20250516_151915.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_151915.jpg


 17%|█▋        | 106/607 [44:01<5:05:27, 36.58s/it]


Processing image: 20250516_152304.jpg


 18%|█▊        | 111/607 [45:20<4:23:12, 31.84s/it]


Processing image: 20250516_152337.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_152337.jpg


 18%|█▊        | 112/607 [49:05<5:59:55, 43.63s/it]


Processing image: 20250516_153507.jpg


 19%|█▉        | 117/607 [50:30<4:50:33, 35.58s/it]


Processing image: 20250516_154217.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_154217.jpg


 20%|██        | 122/607 [58:34<7:17:05, 54.07s/it]


Processing image: 20250516_154226.jpg


 20%|██        | 123/607 [1:00:00<7:36:33, 56.60s/it]


Processing image: 20250516_154412.jpg


 20%|██        | 124/607 [1:01:16<7:51:23, 58.56s/it]


Processing image: 20250516_154451.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_154451.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_154451.jpg


 21%|██        | 126/607 [1:05:17<9:41:30, 72.54s/it]

⏭️ Skipping this image...

Processing image: 20250516_154514.jpg


 21%|██        | 127/607 [1:21:48<26:39:57, 200.00s/it]


Processing image: 20250516_155209.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_155209.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_155209.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_155209.jpg


 21%|██        | 128/607 [1:49:11<58:23:34, 438.86s/it]

⏭️ Skipping this image...

Processing image: 20250516_155520.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_155520.jpg


 21%|██▏       | 130/607 [2:09:18<65:08:58, 491.70s/it]

⏭️ Skipping this image...

Processing image: 20250516_155546.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_155546.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_155546.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_155546.jpg


 22%|██▏       | 131/607 [2:33:59<89:24:39, 676.22s/it]


Processing image: 20250516_155624.jpg


 22%|██▏       | 132/607 [2:35:18<72:37:50, 550.46s/it]


Processing image: 20250516_155655.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_155655.jpg


 22%|██▏       | 134/607 [2:56:57<77:13:45, 587.79s/it]


Processing image: 20250516_155739.jpg


 22%|██▏       | 135/607 [2:58:02<62:33:07, 477.09s/it]


Processing image: 20250516_155803.jpg


 22%|██▏       | 136/607 [3:00:11<51:52:28, 396.49s/it]


Processing image: 20250516_160635.jpg


 23%|██▎       | 140/607 [3:01:15<23:17:54, 179.60s/it]


Processing image: 20250516_160716.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160716.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160716.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160716.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160716.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160716.jpg


 24%|██▎       | 143/607 [3:25:44<38:17:45, 297.12s/it]


Processing image: 20250516_160730.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160730.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160730.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160730.jpg


 24%|██▎       | 144/607 [3:35:57<44:25:00, 345.36s/it]

⏭️ Skipping this image...

Processing image: 20250516_160801.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160801.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160801.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160801.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_160801.jpg


 24%|██▍       | 145/607 [4:00:26<70:11:12, 546.91s/it]

⏭️ Skipping this image...


 24%|██▍       | 146/607 [4:01:23<57:15:13, 447.10s/it]


Processing image: 20250516_160828.jpg
⏭️ Skipping this image...


 24%|██▍       | 147/607 [4:02:22<45:56:27, 359.54s/it]


Processing image: 20250516_161055.jpg
⏭️ Skipping this image...

Processing image: 20250516_161158.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_161158.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_161158.jpg


 25%|██▍       | 149/607 [4:19:46<53:50:13, 423.17s/it]

⏭️ Skipping this image...

Processing image: 20250516_162108.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_162108.jpg


 25%|██▌       | 152/607 [4:24:32<34:33:57, 273.49s/it]


Processing image: 20250516_162209.jpg


 25%|██▌       | 153/607 [4:25:44<29:56:43, 237.45s/it]


Processing image: 20250516_162527.jpg


 26%|██▌       | 156/607 [4:27:20<18:35:44, 148.44s/it]


Processing image: 20250516_162550.jpg


 26%|██▌       | 157/607 [4:29:08<17:41:34, 141.54s/it]


Processing image: 20250516_162705.jpg


 26%|██▌       | 158/607 [4:30:31<16:12:26, 129.95s/it]


Processing image: 20250516_162708.jpg


 26%|██▌       | 159/607 [4:31:34<14:21:45, 115.41s/it]


Processing image: 20250516_162906.jpg


 26%|██▋       | 160/607 [4:33:20<14:03:16, 113.19s/it]


Processing image: 20250516_163000.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_163000.jpg
🔁 Retrying segmentation for this image...

Processing image: 20250516_163000.jpg


 26%|██▋       | 160/607 [4:37:22<12:54:54, 104.01s/it]


KeyboardInterrupt: 