In [2]:
import torch
print("GPU available:", torch.cuda.is_available())
print("Device:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))

GPU available: False
Device: cpu


In [3]:
!pip install roboflow



In [4]:
from roboflow import Roboflow

In [5]:
rf = Roboflow(api_key="wHEnsJuxVhaHJ0SOxtb7")

In [6]:
Crproject = rf.workspace().project("cracks-3ii36-l6v6t")
Dwproject = rf.workspace().project("drywall-join-detect-fqwwa")

loading Roboflow workspace...
loading Roboflow project...
loading Roboflow workspace...
loading Roboflow project...


In [7]:
print(rf.workspace())

loading Roboflow workspace...
{
  "name": "TITHI",
  "url": "tithi-u2obm",
  "projects": [
    "tithi-u2obm/cracks-3ii36-l6v6t",
    "tithi-u2obm/cracks-3ii36-qegq8",
    "tithi-u2obm/drywall-join-detect-fqwwa"
  ]
}


In [8]:
Crdataset = Crproject.version(1).download("coco")
Dwdataset = Dwproject.version(1).download("coco")

ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

In [None]:
!mkdir data

In [None]:
!mv cracks-1 data
!mv Drywall-Join-Detect-1 data

In [10]:
import json

with open("data/cracks-1/train/_annotations.coco.json") as f:
    coco_crack = json.load(f)

In [11]:
print("Images:", len(coco_crack["images"]))
print("Annotations:", len(coco_crack["annotations"]))
print("Categories:", coco_crack["categories"])


Images: 5164
Annotations: 8133
Categories: [{'id': 0, 'name': 'crack', 'supercategory': 'none'}, {'id': 1, 'name': 'NewCracks - v2 2024-05-18 10-54pm', 'supercategory': 'crack'}]


In [12]:
import cv2
import numpy as np
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt
import os

In [13]:
!pip install segment-anything


Collecting segment-anything
  Downloading segment_anything-1.0-py3-none-any.whl.metadata (487 bytes)
Downloading segment_anything-1.0-py3-none-any.whl (36 kB)
Installing collected packages: segment-anything
Successfully installed segment-anything-1.0


In [14]:
!wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth

--2026-02-07 15:53:03--  https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 3.163.189.51, 3.163.189.14, 3.163.189.96, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|3.163.189.51|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 375042383 (358M) [binary/octet-stream]
Saving to: ‚Äòsam_vit_b_01ec64.pth‚Äô


2026-02-07 15:53:05 (329 MB/s) - ‚Äòsam_vit_b_01ec64.pth‚Äô saved [375042383/375042383]



In [15]:
from segment_anything import sam_model_registry, SamPredictor

checkpoint_path = "sam_vit_b_01ec64.pth"

sam = sam_model_registry["vit_b"](checkpoint=checkpoint_path)
sam.to("cuda")

predictor = SamPredictor(sam)


In [16]:
print(next(sam.parameters()).device)

cuda:0


In [17]:
def sam_segment(image, predictor):
    predictor.set_image(image)

    h, w, _ = image.shape
    input_point = np.array([[w//2, h//2]])
    input_label = np.array([1])

    masks, scores, _ = predictor.predict(
        point_coords=input_point,
        point_labels=input_label,
        multimask_output=True
    )

    return masks[scores.argmax()]

In [18]:
splits = ["train", "valid", "test"]
datasets = ["cracks-1", "Drywall-Join-Detect-1"]

In [19]:
def generate_masks_for_all_splits(base_data_dir="data",base_output_dir="outputs",predictor=None):

    for dataset in datasets:
        for split in splits:

            image_dir = os.path.join(base_data_dir, dataset, split)

            if not os.path.exists(image_dir):
                print(f"‚ö†Ô∏è Skipping {dataset}/{split} (not found)")
                continue

            save_dir = os.path.join(base_output_dir,dataset,f"{split}_crack")
            os.makedirs(save_dir, exist_ok=True)

            print(f"\nüöÄ Processing {dataset} | {split}")

            saved_any = False

            for img_name in tqdm(os.listdir(image_dir)):

                if not img_name.lower().endswith((".jpg", ".png", ".jpeg")):
                    continue

                img_path = os.path.join(image_dir, img_name)

                image = cv2.imread(img_path)
                if image is None:
                    print("Unreadable image:", img_name)
                    continue

                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                mask = sam_segment(image, predictor)
                mask = (mask * 255).astype("uint8")

                out_name = img_name.rsplit(".", 1)[0] + "__segment_crack.png"
                out_path = os.path.join(save_dir, out_name)
                cv2.imwrite(out_path, mask)

                # ‚úÖ show only ONE sample per split
                if not saved_any:
                    plt.figure(figsize=(4,4))
                    plt.imshow(mask, cmap="gray")
                    plt.title(f"{dataset} | {split} sample")
                    plt.axis("off")
                    saved_any = True

            print(f"‚úÖ Done: {dataset}/{split}")


In [None]:
generate_masks_for_all_splits(base_data_dir="data",base_output_dir="outputs",predictor=predictor)


üöÄ Processing cracks-1 | train


  1%|          | 63/5165 [00:27<36:35,  2.32it/s]