In [5]:
import os
import shutil
import numpy as np
from PIL import Image, ImageDraw
from sklearn.model_selection import train_test_split
import json

def convert_coco_to_custom(coco_json_path, coco_images_dir, output_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    # Создаем директории для выходных данных
    os.makedirs(output_dir, exist_ok=True)
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_dir, split, 'images'), exist_ok=True)
        os.makedirs(os.path.join(output_dir, split, 'masks'), exist_ok=True)

    # Загружаем COCO аннотации
    with open(coco_json_path, 'r') as f:
        coco_data = json.load(f)

    # Создаем словарь для быстрого доступа к аннотациям по image_id
    image_to_annotations = {}
    for ann in coco_data['annotations']:
        image_id = ann['image_id']
        if image_id not in image_to_annotations:
            image_to_annotations[image_id] = []
        image_to_annotations[image_id].append(ann)

    # Получаем список всех изображений
    all_images = coco_data['images']

    # Разделяем изображения на train, val и test
    train_val_images, test_images = train_test_split(all_images, test_size=test_ratio, random_state=42)
    train_images, val_images = train_test_split(train_val_images, test_size=val_ratio/(train_ratio+val_ratio), random_state=42)

    # Функция для обработки и сохранения изображений и масок
    def process_images(images, split):
        for img in images:
            # Копируем изображение
            src_path = os.path.join(coco_images_dir, img['file_name'])
            dst_path = os.path.join(output_dir, split, 'images', img['file_name'])
            shutil.copy(src_path, dst_path)

            # Создаем маску
            mask = Image.new('L', (img['width'], img['height']), 0)
            draw = ImageDraw.Draw(mask)
            if img['id'] in image_to_annotations:
                for i, ann in enumerate(image_to_annotations[img['id']], start=1):
                    for seg in ann['segmentation']:
                        polygon = np.array(seg).reshape(-1, 2)
                        draw.polygon(polygon.flatten().tolist(), outline=i, fill=i)

            # Сохраняем маску
            mask_path = os.path.join(output_dir, split, 'masks', os.path.splitext(img['file_name'])[0] + '.png')
            mask.save(mask_path)

    # Обрабатываем и сохраняем изображения для каждой выборки
    process_images(train_images, 'train')
    process_images(val_images, 'val')
    process_images(test_images, 'test')

    print(f"Conversion completed. Data saved in {output_dir}")
    print(f"Train images: {len(train_images)}")
    print(f"Validation images: {len(val_images)}")
    print(f"Test images: {len(test_images)}")

# Пример использования
coco_json_path = 'building-segmentation-coco/annotations/instances_default.json'
coco_images_dir = 'building-segmentation-coco/images'
output_dir = 'building-segmentation/'

convert_coco_to_custom(coco_json_path, coco_images_dir, output_dir)


Conversion completed. Data saved in building-segmentation/
Train images: 418
Validation images: 90
Test images: 90
