## Dataset preprocessing

Convert the bounding box file into yolo format

In [1]:
# import necessary dependencies
import json
import yaml
import shutil
import random
from pathlib import Path
from collections import Counter
from ultralytics import YOLO

In [2]:
# load the json file
image_data = "data/kavsir_bboxes.json"

with open(image_data, "rb") as f_out:
    file = json.load(f_out)

In [3]:
# A quick look at the json file
print(json.dumps(file, indent=4))

{
    "cju0qkwl35piu0993l0dewei2": {
        "height": 529,
        "width": 622,
        "bbox": [
            {
                "label": "polyp",
                "xmin": 38,
                "ymin": 5,
                "xmax": 430,
                "ymax": 338
            }
        ]
    },
    "cju0qoxqj9q6s0835b43399p4": {
        "height": 1070,
        "width": 1348,
        "bbox": [
            {
                "label": "polyp",
                "xmin": 194,
                "ymin": 284,
                "xmax": 913,
                "ymax": 1049
            }
        ]
    },
    "cju0qx73cjw570799j4n5cjze": {
        "height": 529,
        "width": 619,
        "bbox": [
            {
                "label": "polyp",
                "xmin": 187,
                "ymin": 14,
                "xmax": 543,
                "ymax": 526
            }
        ]
    },
    "cju0roawvklrq0799vmjorwfv": {
        "height": 528,
        "width": 622,
        "bbox": [
            {
           

In [4]:
class_dict = {
    "polyp": 0
}

def convert_to_yolo_format(image_data, dest, img):
    for image_id, data in image_data.items():
        
        if img.stem == image_id:
            image_width = data["width"]
            image_height = data["height"]
            
            # Create a file path for the annotation the path would be defined when split the dataset
            annotation_path = dest
            
            # Prepare list to store YOLO annotations
            annotations = []
            
            # Iterate over all bounding boxes
            for bbox in data["bbox"]:
                label = bbox["label"]
                xmin = bbox["xmin"]
                ymin = bbox["ymin"]
                xmax = bbox["xmax"]
                ymax = bbox["ymax"]
                
                # Calculate YOLO format values
                x_center = ((xmin + xmax) / 2 )/ image_width
                y_center = ((ymin + ymax) / 2) / image_height
                width = (xmax - xmin) / image_width
                height = (ymax - ymin) / image_height
                
                # Get the class ID for the label
                class_id = class_dict.get(label, -1)  # -1 if label is not found
                
                # Append the YOLO annotation to the list
                annotations.append(f"{class_id} {x_center} {y_center} {width} {height}")
            
        
            with open(annotation_path, "w") as f:
                f.write("\n".join(annotations))
            
            return


For training, YOLO expects a directory structure in this format:
```
yolo_data
├── images
│   ├── train
│   └── val
└── labels
    ├── train
    └── val
```

so let use move the create this directory and also split the dataset into training and validation dataset

In [5]:
base_path = Path("yolo_data")

shutil.rmtree(base_path, ignore_errors=True)

image_train = base_path / "images"/"train"
image_train.mkdir(parents=True, exist_ok=True)

image_val = base_path /"images"/ "val"
image_val.mkdir(parents=True, exist_ok=True)

(base_path/ "labels"/ "train").mkdir(parents=True, exist_ok=True)
(base_path/ "labels"/"val").mkdir(parents=True, exist_ok=True)

!tree $base_path

[01;34myolo_data[0m
├── [01;34mimages[0m
│   ├── [01;34mtrain[0m
│   └── [01;34mval[0m
└── [01;34mlabels[0m
    ├── [01;34mtrain[0m
    └── [01;34mval[0m

7 directories, 0 files


Before spliting into training and validation set lets make sure that all the images have the same suffix

In [6]:
image_path = Path("data", "images")

suffix_count = Counter(f.suffix for f in image_path.glob("*"))

suffix_count

Counter({'.jpg': 1000})

In [7]:
train_frac = 0.8
images = list(image_path.glob("*"))

for img in images:
    split = "train" if random.random() < train_frac else "val"
    
    image_id = img.stem
    dest = base_path / "labels" / split / f"{img.stem}.txt"
    convert_to_yolo_format(file, dest, img)
    

    image_dest = base_path / "images" / split / f"{img.stem}.jpg"
    shutil.copy(img, image_dest)

In [8]:
# check the fraction of the dataset in each split
train_count = len(list((base_path/ "images"/"train").glob("*")))
val_count = len(list((base_path/ "images"/"val").glob("*")))
total_count = train_count + val_count

print(f"Training fraction:   {train_count/total_count:0.3f}")
print(f"Validation fraction: {val_count/total_count:0.3f}")

Training fraction:   0.818
Validation fraction: 0.182


### Create a yaml file for the data description

In [9]:
classes = ["polyp"]

metadata = {
    "path": str(base_path.absolute()),
    "train": "images/train",
    "val": "images/val",
    "classes": classes,
    "nc": len(classes)
}

print(metadata)

{'path': '/root/Computer_vision/polyp_yolo_detection/yolo_data', 'train': 'images/train', 'val': 'images/val', 'classes': ['polyp'], 'nc': 1}


In [10]:
yolo_config = "data.yaml"

yaml.safe_dump(metadata, open(yolo_config, 'w'))

In [11]:
!cat data.yaml

classes:
- polyp
nc: 1
path: /root/Computer_vision/polyp_yolo_detection/yolo_data
train: images/train
val: images/val


In [None]:
model = YOLO("yolo11l.pt")

: 

In [None]:
result = model.train(
    data= yolo_config,
    epochs=5,
    patience=5,
    batch=8,
    workers=1
)

New https://pypi.org/project/ultralytics/8.3.63 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.62 🚀 Python-3.12.3 torch-2.5.1+cu124 CPU (Intel Core(TM) i7-8665U 1.90GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11l.pt, data=data.yaml, epochs=5, time=None, patience=5, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=1, project=None, name=train4, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frame

[34m[1mtrain: [0mScanning /root/Computer_vision/polyp_yolo_detection/yolo_data/labels/train... 818 images, 0 backgrounds, 0 corrupt: 100%|██████████| 818/818 [00:02<00:00, 366.93it/s]

[34m[1mtrain: [0mNew cache created: /root/Computer_vision/polyp_yolo_detection/yolo_data/labels/train.cache



[34m[1mval: [0mScanning /root/Computer_vision/polyp_yolo_detection/yolo_data/labels/val... 182 images, 0 backgrounds, 0 corrupt: 100%|██████████| 182/182 [00:00<00:00, 404.65it/s]

[34m[1mval: [0mNew cache created: /root/Computer_vision/polyp_yolo_detection/yolo_data/labels/val.cache





Plotting labels to /root/Computer_vision/runs/detect/train4/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 167 weight(decay=0.0), 174 weight(decay=0.0005), 173 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m/root/Computer_vision/runs/detect/train4[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/103 [00:00<?, ?it/s]