In [11]:
import cv2
import numpy as np
from pathlib import Path
from ultralytics import YOLO
import shutil

# Convert Masks to YOLO Annotations
### Dataset Format for YOLO
YOLO requires annotations in .txt files (one per image) with bounding boxes or segmentation masks. For object detection (bounding boxes), use this format:
```bash
0 0.5 0.5 0.2 0.3  # class_id, x_center, y_center, width, height
1 0.7 0.3 0.1 0.1
```
If your masks are binary images, use OpenCV to extract bounding boxes:

In [6]:
def convert_mask_to_yolo(mask_path, output_path, class_id=0, mode='segmentation'):
    """
    Convert a mask image to YOLO format annotations.
    Args:
        mask_path: Path to the mask image
        output_path: Path to save the YOLO annotation
        class_id: Class ID for the annotation (default 0)
        mode: 'segmentation' or 'detection' (default 'segmentation')
    """
    # Read the mask image
    mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
    if mask is None:
        print(f"Could not read mask: {mask_path}")
        return
    
    # Find contours in the mask
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Get image dimensions for normalization
    height, width = mask.shape
    
    with open(output_path, 'w') as file:
        for contour in contours:
            if mode == 'segmentation':
                # Handle segmentation mode
                points = contour.squeeze()
                if len(points.shape) < 2:  # Skip invalid contours
                    continue
                
                # Normalize coordinates
                normalized_points = points / [width, height]
                
                # Write points to file
                points_str = " ".join(f"{x:.5f} {y:.5f}" for x, y in normalized_points)
                file.write(f"{class_id} {points_str}\n")
                
            else:  # detection mode
                # Handle detection mode (bounding box)
                x, y, w, h = cv2.boundingRect(contour)
                
                # Convert to YOLO format (center coordinates + width + height)
                center_x = (x + w/2) / width
                center_y = (y + h/2) / height
                norm_width = w / width
                norm_height = h / height
                
                # Write bounding box to file
                file.write(f"{class_id} {center_x:.5f} {center_y:.5f} "
                          f"{norm_width:.5f} {norm_height:.5f}\n")


def process_dataset(base_path):
    """
    Process the entire dataset, organizing images and generating YOLO annotations.
    Args:
        base_path: Base path to the dataset
    """
    # Create a new base folder for processed data
    processed_base_path = Path(base_path) / 'processed_data'
    processed_base_path.mkdir(parents=True, exist_ok=True)

    # Process each split (train, valid, test)
    for split in ['train', 'valid', 'test']:
        # Setup directories in the new base folder
        split_path = processed_base_path / split
        images_dir = split_path / 'images'
        labels_dir = split_path / 'labels'
        
        # Create directories if they don't exist
        images_dir.mkdir(parents=True, exist_ok=True)
        labels_dir.mkdir(parents=True, exist_ok=True)
        
        # Process all mask files in the original dataset
        original_split_path = Path(base_path) / split
        for mask_file in original_split_path.glob('*_mask.png'):
            # Get corresponding image name
            image_name = mask_file.name.replace('_mask.png', '.jpg')
            image_path = mask_file.with_name(image_name)
            
            try:
                # Move image to images directory
                shutil.copy(image_path, images_dir / image_name)
                
                # Create YOLO annotation
                label_name = mask_file.stem.replace('_mask', '') + '.txt'
                label_path = labels_dir / label_name
                
                # Convert mask to YOLO format
                convert_mask_to_yolo(mask_file, label_path)
                
            except FileNotFoundError:
                print(f"Missing image file: {image_path}")
                continue

In [7]:

# Usage example
if __name__ == "__main__":
    dataset_path = Path('../Road_mask/')
    process_dataset(dataset_path)

# Segmentation (YOLOv8)

Install YOLOv8:

In [None]:
!pip3 install ultralytics

## Training Code

In [None]:
DATA_YAML_PATH = './data.yaml'


In [None]:
model = YOLO('yolov8n-seg.pt')  # Segmentation model

model.train(data=DATA_YAML_PATH, project="./Models", epochs=50, imgsz=640, batch=16)

New https://pypi.org/project/ultralytics/8.3.69 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.67 🚀 Python-3.10.12 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 4080 Laptop GPU, 11985MiB)
[34m[1mengine/trainer: [0mtask=segment, mode=train, model=yolov8n-seg.pt, data=./data.yaml, epochs=50, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=./Models, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, em

[34m[1mtrain: [0mScanning /home/user/Desktop/rci/Road_mask/processed_data/train/labels... 105 images, 0 backgrounds, 0 corrupt: 100%|██████████| 105/105 [00:00<00:00, 1381.36it/s]

[34m[1mtrain: [0mNew cache created: /home/user/Desktop/rci/Road_mask/processed_data/train/labels.cache





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


  check_for_updates()
[34m[1mval: [0mScanning /home/user/Desktop/rci/Road_mask/processed_data/valid/labels... 11 images, 0 backgrounds, 0 corrupt: 100%|██████████| 11/11 [00:00<00:00, 1722.44it/s]

[34m[1mval: [0mNew cache created: /home/user/Desktop/rci/Road_mask/processed_data/valid/labels.cache





Plotting labels to Models/train3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 66 weight(decay=0.0), 77 weight(decay=0.0005), 76 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mModels/train3[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       1/50      3.18G      2.983      6.915       3.89      2.491         67        640: 100%|██████████| 7/7 [00:01<00:00,  5.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  9.33it/s]

                   all         11         54    0.00346      0.111    0.00287   0.000967   0.000577     0.0185   0.000322   0.000123






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       2/50      2.86G      2.622      3.897      3.708      2.144         70        640: 100%|██████████| 7/7 [00:00<00:00,  9.60it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 16.63it/s]

                   all         11         54       0.01      0.333     0.0176    0.00643       0.01      0.333    0.00943    0.00207






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       3/50         3G      2.255      2.497       3.08      1.889        100        640: 100%|██████████| 7/7 [00:00<00:00,  9.98it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 16.52it/s]

                   all         11         54     0.0129      0.426     0.0424    0.00863     0.0101      0.333      0.032    0.00766






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       4/50      2.86G      2.074      2.195      2.602      1.778        104        640: 100%|██████████| 7/7 [00:00<00:00, 10.02it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 17.15it/s]

                   all         11         54     0.0153      0.463       0.03    0.00993     0.0134      0.407     0.0422     0.0111






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       5/50      2.99G      1.943      1.927      2.359      1.707         75        640: 100%|██████████| 7/7 [00:00<00:00, 10.01it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 16.40it/s]

                   all         11         54     0.0154      0.481     0.0501     0.0155     0.0154      0.481     0.0698     0.0211






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       6/50      2.84G      1.957      1.665      2.323      1.667         82        640: 100%|██████████| 7/7 [00:00<00:00, 10.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 15.09it/s]

                   all         11         54     0.0221      0.648     0.0781      0.031     0.0208      0.611      0.109     0.0409






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       7/50       2.9G      1.833      1.627      2.103      1.606         55        640: 100%|██████████| 7/7 [00:00<00:00,  9.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 15.85it/s]

                   all         11         54     0.0286      0.796      0.131     0.0404     0.0246      0.685      0.116     0.0366






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       8/50         3G      1.762      1.465       1.97      1.567         60        640: 100%|██████████| 7/7 [00:00<00:00, 10.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 15.36it/s]

                   all         11         54      0.131      0.296      0.122     0.0405      0.123      0.278      0.107       0.04






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       9/50      2.98G      1.759       1.43       1.89      1.549         71        640: 100%|██████████| 7/7 [00:00<00:00, 10.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 16.01it/s]

                   all         11         54      0.367      0.185      0.186     0.0869      0.367      0.185      0.187     0.0829






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      10/50      2.93G      1.656      1.545      1.783      1.501         70        640: 100%|██████████| 7/7 [00:00<00:00, 10.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 16.47it/s]

                   all         11         54      0.176      0.241      0.111     0.0534      0.178      0.241      0.107     0.0481






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      11/50      3.11G      1.709      1.424      1.807      1.514        113        640: 100%|██████████| 7/7 [00:00<00:00, 10.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 16.21it/s]

                   all         11         54      0.308       0.23      0.195     0.0796      0.283      0.212      0.167     0.0725






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      12/50      2.97G      1.667      1.432      1.707      1.471         97        640: 100%|██████████| 7/7 [00:00<00:00, 10.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 15.63it/s]

                   all         11         54      0.285      0.333      0.163     0.0718      0.316      0.352      0.202     0.0719






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      13/50         3G      1.616      1.363      1.734      1.472        146        640:  71%|███████▏  | 5/7 [00:00<00:00,  8.23it/s]


KeyboardInterrupt: 

## Testing

In [12]:
TRAINED_MODEL_PATH = './Models/train/train2/weights/best.pt'

TEST_IMAGE_PATH = "../Road_mask_bkp/test/tile_12_jpg.rf.b0d051c082e97538bdd45a416fa6d93b.jpg"

In [13]:
model = YOLO(TRAINED_MODEL_PATH)
# Predict on a single image
image_path = TEST_IMAGE_PATH
results = model.predict(image_path, imgsz=640, conf=0.5)


# Visualize results
for result in results:
    plotted = result.plot()
    cv2.imshow("Prediction", plotted)
    
    # Wait for the user to press any key to close the window
    if cv2.waitKey(0) & 0xFF == ord('q'):  # Press 'q' to quit
        break
    
    cv2.destroyAllWindows()

# Close the window properly after exiting the loop
cv2.destroyAllWindows()


image 1/1 /home/user/Desktop/rci/RCI_local/../Road_mask_bkp/test/tile_12_jpg.rf.b0d051c082e97538bdd45a416fa6d93b.jpg: 640x640 6 roads, 3.2ms
Speed: 1.4ms preprocess, 3.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
