## Installation
- Clone Repo: [https://github.com/ultralytics/JSON2YOLO](https://github.com/ultralytics/JSON2YOLO)
- Copy this file into the repo
- Change dataset paths
- Run!

In [1]:
%pip install -r requirements.txt




You should consider upgrading via the 'c:\Users\phili\Workspaces\Python\Envs\json2yolo\Scripts\python.exe -m pip install --upgrade pip' command.


In [20]:
dataset_base = 'C:/Users/phili/git.haw-hamburg.de/trashberry/trash/object/data/detection/mattress_test_2'
source_json_dir = dataset_base + 'annotations'
source_train_dir = dataset_base + 'images'
source_val_dir = dataset_base + 'images'
source_test_dir = None
output_dir = 'C:/Users/phili/git.haw-hamburg.de/trashberry/trash/object/data/detection/mattress_test_2_converted'
use_segments = False
label_map = [0,0,None,None,None]

In [21]:
import contextlib
import json

import cv2
import pandas as pd
from PIL import Image
from collections import defaultdict

from utils import *
import shutil
import os

def copy_image(filename, source_dir, output_dir):
    out_dir = Path(output_dir)
    if not out_dir.exists():
        out_dir.mkdir(parents=True, exist_ok=True)
    shutil.copyfile(Path(source_dir) / filename, out_dir / filename)

def copy_image_as_type(filename, output_dir, train_dir = None, val_dir = None, test_dir = None):
    out = Path(output_dir) / 'images'
    if train_dir is not None and os.path.exists(Path(train_dir) / filename):
        copy_image(filename, train_dir, out / 'train')
        return 'train'
    if val_dir is not None and os.path.exists(Path(val_dir) / filename):
        copy_image(filename, val_dir, out / 'val')
        return 'val'
    if test_dir is not None and os.path.exists(Path(test_dir) / filename):
        copy_image(filename, test_dir, out / 'test')
        return 'test'
    return None
    
def convert_coco_json(json_dir='../coco/annotations/', output_dir='../datasets/data', train_dir = None, val_dir = None, test_dir = None, use_segments=False, label_map=None, cls91to80=False):
    save_dir = make_dirs(output_dir)  # output directory
    coco80 = coco91_to_coco80_class()

    # Import json
    for json_file in sorted(Path(json_dir).resolve().glob('*.json')):
        fn = Path(save_dir) / 'labels'# folder name
        #fn.mkdir()
        with open(json_file) as f:
            data = json.load(f)
        
        instance_count = {}
        images ={}
        
        for img in data['images']:
            #print(img['id'], ": ", img['file_name'])
            img['image_type'] = copy_image_as_type(img['file_name'], save_dir, train_dir, val_dir, test_dir)
            if img['image_type'] is not None:
                label_dir = Path(output_dir) / 'labels' / img['image_type'] 
                if not label_dir.exists():
                    label_dir.mkdir()
            images[img['id']] = img
        
        # Create image-annotations dict
        imgToAnns = defaultdict(list)
        for ann in data['annotations']:
            imgToAnns[ann['image_id']].append(ann)

        # Write labels file
        for img_id, anns in tqdm(imgToAnns.items(), desc=f'Annotations {json_file}'):
            img = images[img_id]
            h, w, f_name, f_type = img['height'], img['width'], img['file_name'], img['image_type']
            
            if f_type is None:
                continue
            bboxes = []
            segments = []
            for ann in anns:
                # if ann['iscrowd']:
                #     continue
                # The COCO box format is [top left x, top left y, width, height]
                box = np.array(ann['bbox'], dtype=np.float64)
                box[:2] += box[2:] / 2  # xy top-left corner to center
                box[[0, 2]] /= w  # normalize x
                box[[1, 3]] /= h  # normalize y
                if box[2] <= 0 or box[3] <= 0:  # if w <= 0 and h <= 0
                    continue

                cls = coco80[ann['category_id'] - 1] if cls91to80 else ann['category_id'] - 1  # class
                if label_map is not None:
                    cls = label_map[cls]
                    if cls is None:
                        continue
                    
                box = [cls] + box.tolist()
                if box not in bboxes:
                    bboxes.append(box)
                # Segments
                if use_segments:
                    if len(ann['segmentation']) > 1:
                        s = merge_multi_segment(ann['segmentation'])
                        s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
                    else:
                        s = [j for i in ann['segmentation'] for j in i]  # all segments concatenated
                        s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
                    s = [cls] + s
                    if s not in segments:
                        segments.append(s)
            print(f"Processed '{f_name}': {len(bboxes)} annotations")
            if f_type not in instance_count:
                instance_count[f_type] = len(bboxes)
            else:
                instance_count[f_type] += len(bboxes)
            # Write
            with open((fn / f_type / f_name).with_suffix('.txt'), 'a') as file:
                for i in range(len(bboxes)):
                    line = *(segments[i] if use_segments else bboxes[i]),  # cls, box or segments
                    file.write(('%g ' * len(line)).rstrip() % line + '\n')
        print(instance_count)


In [22]:
convert_coco_json(source_json_dir, output_dir, source_train_dir, source_val_dir, source_test_dir, use_segments, label_map)