inspired from: https://medium.com/red-buffer/converting-a-custom-dataset-from-coco-format-to-yolo-format-6d98a4fd43fc

In [1]:
import json
import cv2
import os
import matplotlib.pyplot as plt
import shutil

In [2]:
input_path = "../roboflow_datasets/xmm_om_artefacts_512-7-COCO/"
input_json_train = input_path+"train/skf_train_annotations.coco.json"
input_json_valid = input_path+"valid/skf_valid_annotations.coco.json"

output_path = "../roboflow_datasets/xmm_om_artefacts_512-7-YOLO/"

In [3]:
if not os.path.exists(output_path):
    os.mkdir(output_path)

if not os.path.exists(output_path+'images/'):
    os.mkdir(output_path+'images/')

if not os.path.exists(output_path+'labels/'):
    os.mkdir(output_path+'labels/')

In [4]:
f = open(input_json_train)
train_data = json.load(f)
f.close()

In [5]:
file_names = []

def load_images_from_folder(folder, json_annotations):
    count = 0
    filenames_from_json = list(set([file_['file_name'] for file_ in json_annotations['images']]))
    
    for filename in filenames_from_json:
        if filename.split('.')[-1] in ['jpg', 'jpeg', 'png']: 
            source = os.path.join(folder,filename)
            destination = f"{output_path}images/" + filename #img{count}.jpg"
    
            try:
                shutil.copy(source, destination)
                print(f"File {source} copied successfully.")
            # If source and destination are same
            except shutil.SameFileError:
                print("Source and destination represents the same file.")
    
            file_names.append(filename)
            count += 1
    return count

load_images_from_folder(input_path+'train/', train_data)

File ../roboflow_datasets/xmm_om_artefacts_512-7-COCO/train/S0891801501_L_png.rf.9c49e55916ea60433e206ab7ff7b1d21.jpg copied successfully.
File ../roboflow_datasets/xmm_om_artefacts_512-7-COCO/train/S0606070301_L_png.rf.bcb78c4624463d1f6de8f6b9e1f39dc1.jpg copied successfully.
File ../roboflow_datasets/xmm_om_artefacts_512-7-COCO/train/S0148990101_L_png.rf.60b39df7e9b8fb7a27e9b425884a280e.jpg copied successfully.
File ../roboflow_datasets/xmm_om_artefacts_512-7-COCO/train/S0741970301_L_png.rf.da9f77973db72d7cb1bbf0dc50d7b01e.jpg copied successfully.
File ../roboflow_datasets/xmm_om_artefacts_512-7-COCO/train/S0092850201_V_png.rf.4a20c37b86c7ba54afcec360e43000a1.jpg copied successfully.
File ../roboflow_datasets/xmm_om_artefacts_512-7-COCO/train/S0863800201_L_png.rf.65cf63b70d714d1263e4ae2dba4278d5.jpg copied successfully.
File ../roboflow_datasets/xmm_om_artefacts_512-7-COCO/train/S0720250901_L_png.rf.955975938680543d078416070ecb3dec.jpg copied successfully.
File ../roboflow_datasets/x

458

In [6]:
def get_img_ann(image_id, data):
    img_ann = []
    isFound = False
    for ann in data['annotations']:
        if ann['image_id'] == image_id:
            img_ann.append(ann)
            isFound = True
    if isFound:
        return img_ann
    else:
        return None

In [7]:
def get_img(filename, data):
  for img in data['images']:
    if img['file_name'] == filename:
      return img

In [8]:
def coco_to_yolo(
    file_names,
    data
    ):
    count = 0

    for filename in file_names:
        img = get_img(filename, data)
        img_id = img['id']
        img_w = img['width']
        img_h = img['height']
    
        img_ann = get_img_ann(img_id, data)
        if img_ann == None: # usually because the image doesn't have annotations
            continue
        if img_ann:
            file_object = open(f"{output_path}labels/{'.'.join(filename.split('.')[:-1])}.txt", "a")

        for ann in img_ann:
          current_category = ann['category_id'] # As yolo format labels start from 0 
          current_bbox = ann['bbox']
          x = current_bbox[0]
          y = current_bbox[1]
          w = current_bbox[2]
          h = current_bbox[3]
          
          # Finding midpoints
          x_centre = (x + (x+w))/2
          y_centre = (y + (y+h))/2
          
          # Normalization
          x_centre = x_centre / img_w
          y_centre = y_centre / img_h
          w = w / img_w
          h = h / img_h
          
          # Limiting upto fix number of decimal places
          x_centre = format(x_centre, '.6f')
          y_centre = format(y_centre, '.6f')
          w = format(w, '.6f')
          h = format(h, '.6f')
              
          # Writing current object 
          file_object.write(f"{current_category} {x_centre} {y_centre} {w} {h}\n")
        
        file_object.close()
        count += 1  # This should be outside the if img_ann block.

In [9]:
coco_to_yolo(file_names, train_data)