In [2]:
import json
import yaml

# Load train_json and test_json
with open('train.json', 'r') as f:
    train_json = json.load(f)

with open('test.json', 'r') as f:
    test_json = json.load(f)

# Prepare the YAML structure
yolo_data = {
    'path': '/home/work/MyProject/level2-objectdetection-cv-02/dataset',
    'train': 'train',
    'val': 'train',
    'test': 'test',
    'names': {cat['id']: cat['name'] for cat in train_json['categories']},
    'train_images': [{'file_name': img['file_name'], 'width': img['width'], 'height': img['height'], 'id': img['id']} for img in train_json['images']],
    'train_annotations': [{'image_id': ann['image_id'], 'category_id': ann['category_id'], 'bbox': ann['bbox']} for ann in train_json['annotations']],
    'test_images': [{'file_name': img['file_name'], 'width': img['width'], 'height': img['height'], 'id': img['id']} for img in test_json['images']]
}

# Convert to YAML and save
with open('data.yaml', 'w') as f:
    yaml.dump(yolo_data, f, default_flow_style=False)

In [12]:
yolo_data["train_images"]

[{'file_name': 'train/0000.jpg', 'width': 1024, 'height': 1024, 'id': 0},
 {'file_name': 'train/0001.jpg', 'width': 1024, 'height': 1024, 'id': 1},
 {'file_name': 'train/0002.jpg', 'width': 1024, 'height': 1024, 'id': 2},
 {'file_name': 'train/0003.jpg', 'width': 1024, 'height': 1024, 'id': 3},
 {'file_name': 'train/0004.jpg', 'width': 1024, 'height': 1024, 'id': 4},
 {'file_name': 'train/0005.jpg', 'width': 1024, 'height': 1024, 'id': 5},
 {'file_name': 'train/0006.jpg', 'width': 1024, 'height': 1024, 'id': 6},
 {'file_name': 'train/0007.jpg', 'width': 1024, 'height': 1024, 'id': 7},
 {'file_name': 'train/0008.jpg', 'width': 1024, 'height': 1024, 'id': 8},
 {'file_name': 'train/0009.jpg', 'width': 1024, 'height': 1024, 'id': 9},
 {'file_name': 'train/0010.jpg', 'width': 1024, 'height': 1024, 'id': 10},
 {'file_name': 'train/0011.jpg', 'width': 1024, 'height': 1024, 'id': 11},
 {'file_name': 'train/0012.jpg', 'width': 1024, 'height': 1024, 'id': 12},
 {'file_name': 'train/0013.jpg', 'w

In [5]:
yolo_data["train_annotations"]

[{'image_id': 0, 'category_id': 0, 'bbox': [197.6, 193.7, 547.8, 469.7]},
 {'image_id': 1, 'category_id': 3, 'bbox': [0.0, 407.4, 57.6, 180.6]},
 {'image_id': 1, 'category_id': 7, 'bbox': [0.0, 455.6, 144.6, 181.6]},
 {'image_id': 1, 'category_id': 4, 'bbox': [722.3, 313.4, 274.3, 251.9]},
 {'image_id': 1, 'category_id': 5, 'bbox': [353.2, 671.0, 233.7, 103.4]},
 {'image_id': 1, 'category_id': 5, 'bbox': [3.7, 448.5, 778.2, 242.0]},
 {'image_id': 1, 'category_id': 0, 'bbox': [425.3, 681.9, 216.4, 179.8]},
 {'image_id': 1, 'category_id': 7, 'bbox': [92.4, 601.7, 139.2, 53.1]},
 {'image_id': 1, 'category_id': 0, 'bbox': [622.4, 686.5, 72.8, 94.2]},
 {'image_id': 2, 'category_id': 3, 'bbox': [267.9, 165.2, 631.6, 513.0]},
 {'image_id': 3, 'category_id': 2, 'bbox': [462.2, 369.4, 233.9, 254.6]},
 {'image_id': 3, 'category_id': 6, 'bbox': [773.3, 3.0, 188.4, 428.4]},
 {'image_id': 4, 'category_id': 1, 'bbox': [567.5, 462.2, 165.2, 89.4]},
 {'image_id': 4, 'category_id': 1, 'bbox': [859.4, 4

## Yolo labeling 규칙
-  `.txt` 파일로 만들어야 하며, 
- <class_id> <x_center><y_center><width><height> 로 적어주어야 한다.

In [6]:
import os
from collections import defaultdict

# Organizing data by image_id
organized_data = defaultdict(list)
for annotation in yolo_data['train_annotations']:
    organized_data[annotation['image_id']].append(annotation)

# Create output directory
output_dir = 'labels'
os.makedirs(os.path.join("train",output_dir) , exist_ok=True)

# Function to normalize bbox coordinates
def normalize_bbox(bbox, img_width=1024, img_height=1024):
    x, y, w, h = bbox
    return [
        (x + w/2) / img_width,  # x_center
        (y + h/2) / img_height, # y_center
        w / img_width,          # width
        h / img_height          # height
    ]

# Process each image_id and create corresponding .txt files
for image_id, annotations in organized_data.items():
    padded_image_id = str(image_id).zfill(4)
    file_path = os.path.join(output_dir, f'{padded_image_id}.txt')
    
    # Assuming all images have the same size, get the first annotation's image size
    img_width = 1024 #defalut
    img_height = 1024
    
    with open(file_path, 'w') as f:
        for annotation in annotations:
            category_id = annotation['category_id']
            bbox = normalize_bbox(annotation['bbox'], img_width, img_height)
            # Write the category_id and normalized bbox in the required format
            f.write(f"{category_id} {' '.join(map(str, bbox))}\n")

# Print the list of created files
print(os.listdir(output_dir))

# Print the contents of each file (first 5 lines for brevity)
for filename in os.listdir(output_dir)[:5]:  # Limit to first 5 files
    print(f"\nContents of {filename}:")
    with open(os.path.join(output_dir, filename), 'r') as f:
        print('\n'.join(f.readlines()[:5]))  # Print first 5 lines

['0859.txt', '3242.txt', '1577.txt', '0395.txt', '1699.txt', '3223.txt', '4306.txt', '2120.txt', '0914.txt', '2322.txt', '0536.txt', '2723.txt', '0137.txt', '2361.txt', '1310.txt', '2963.txt', '2708.txt', '4535.txt', '1276.txt', '1155.txt', '4589.txt', '2787.txt', '1534.txt', '2040.txt', '2056.txt', '1820.txt', '2370.txt', '2314.txt', '2972.txt', '3702.txt', '1586.txt', '4533.txt', '1142.txt', '2531.txt', '2367.txt', '2823.txt', '1671.txt', '3369.txt', '4371.txt', '2186.txt', '3875.txt', '4331.txt', '0711.txt', '1326.txt', '3913.txt', '3384.txt', '3721.txt', '2867.txt', '2340.txt', '0587.txt', '4719.txt', '2804.txt', '0862.txt', '1263.txt', '3313.txt', '0290.txt', '0594.txt', '4353.txt', '1281.txt', '4736.txt', '1235.txt', '1420.txt', '3886.txt', '4552.txt', '1823.txt', '1473.txt', '1520.txt', '4805.txt', '0663.txt', '3959.txt', '1764.txt', '3335.txt', '3995.txt', '2936.txt', '2995.txt', '0757.txt', '4438.txt', '3207.txt', '4125.txt', '4846.txt', '2967.txt', '3491.txt', '2094.txt', '18