In [6]:
# Script to transform Dental AI image segmentation dataset to YOLOv8 object detection dataset format

# 1. Create a "dataset" folder in the same folder where this script
# 2. Copy Dental AI dataset to the created "dataset" folder
# 3. Run this code
# 4. The transformed dataset will be created in the "yolo_dataset" folder

# --------------- not in this script ------------------
# 5. Use the transformed dataset in the "yolo_dataset" folder as a regular YOLOv8 dataset to train your model on it
#    as described in the article (see train.ipynb as a sample)
# 6. After train finished, the best trained model will be written to runs/detect/train/weights/best.pt file in the current folder
# 7. Use the trained model file to make detections on your own images

In [1]:
import shutil
from os import path
import os
import json

In [2]:
# Define locations of source and destination datasets
SRC_DIR = "dataset"
DEST_DIR = "yolo_dataset"

In [3]:
# Create folder structure of destination dataset
os.makedirs(path.join(DEST_DIR, "train", "images"), exist_ok=True)
os.makedirs(path.join(DEST_DIR, "train", "labels"), exist_ok=True)
os.makedirs(path.join(DEST_DIR, "val", "images"), exist_ok=True)
os.makedirs(path.join(DEST_DIR, "val", "labels"), exist_ok=True)
os.makedirs(path.join(DEST_DIR, "test", "images"), exist_ok=True)
os.makedirs(path.join(DEST_DIR, "test", "labels"), exist_ok=True)

In [10]:
# From source model load classes that this dataset contains
meta = json.load(open(path.join(SRC_DIR,"meta.json")))
classes = {}
for (index, entry) in enumerate(meta["classes"]):
    classes[entry["title"]] = index
classes

{'Caries': 0, 'Cavity': 1, 'Crack': 2, 'Tooth': 3}

In [6]:
# Create the "data.yaml" file in destination dataset
# with classes, that this dataset will contain
with open(path.join(DEST_DIR,"data.yaml"),"w") as fp:
    fp.write("train: ../train/images\n")
    fp.write("val: ../val/images\n")
    fp.write("test: ../test/images\n")
    fp.write("\n")
    fp.write("nc: {}\n".format(len(classes)))
    fp.write("names: ['{}']".format("','".join(classes.keys())))

In [7]:
dirs_map = {"train": "train", "valid": "val", "test":"test"}

In [35]:
# Copy images and transform annotations
points = []
for (src_dir, dest_dir) in dirs_map.items():
    # Copy all images from source to destination dataset
    shutil.copytree(path.join(SRC_DIR,src_dir,"img"),path.join(DEST_DIR,dest_dir,"images"),dirs_exist_ok=True)
    # Go over each annotation file, transform annotations to YOLOv8 format
    # and write to the destination dataset
    for file in os.listdir(path.join(SRC_DIR,src_dir,"ann")):
        ann = json.load(open(path.join(SRC_DIR,src_dir,"ann",file),"r"))
        # get width and height of the image
        img_width = ann["size"]["width"]
        img_height = ann["size"]["height"]
        # Create the annotation file in the destination dataset
        file_name = file.replace(".jpg.json",".txt")
        fp = open(path.join(DEST_DIR,dest_dir,"labels",file_name),"w")
        # Calculate bounding boxes for each object, defined in this annotation file
        for obj in ann["objects"]:
            # Get a class code for this bounding box
            class_id = classes[obj["classTitle"]]
            top = 999999
            left = 999999
            bottom = 0
            right = 0
            for point in obj["points"]["exterior"]:
                # Determine the top left and right bottom corners of bounding box
                if point[0]<left:
                    left = point[0]
                if point[0]>right:
                    right = point[0]
                if point[1]<top:
                    top = point[1]
                if point[1]>bottom:
                    bottom = point[1]
                # calculate bounding box in YOLOv8 format with normalization (x_center,y_center,width_height)
                width = right - left
                height = bottom - top
                x_center = (left+width/2)/img_width
                y_center =(top+height/2)/img_height
                width /= img_width
                height /= img_height
            # Write bounding box to the annotation file to destination dataset
            fp.write("{} {} {} {} {}\n".format(class_id,x_center,y_center,width,height))
        fp.close()