In [None]:
import os
import json
import shutil
import random

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
coco_dir = '/content/drive/MyDrive/Brain Tumor/coco'
images_dir = os.path.join(coco_dir, 'images')
annotations_file = os.path.join(coco_dir, 'annotations.json')

In [None]:
TRAIN_PERCENT = 0.7
VAL_PERCENT = 0.1
TEST_PERCENT = 0.2

In [None]:
def split_dataset(images_dir, annotations_file):
    with open(annotations_file, 'r') as f:
        coco_data = json.load(f)

    random.shuffle(coco_data['images'])

    num_images = len(coco_data['images'])
    num_train = int(num_images * TRAIN_PERCENT)
    num_val = int(num_images * VAL_PERCENT)
    num_test = num_images - num_train - num_val

    train_images = coco_data['images'][:num_train]
    val_images = coco_data['images'][num_train:num_train+num_val]
    test_images = coco_data['images'][num_train+num_val:]

    train_dir = os.path.join(coco_dir, 'train')
    val_dir = os.path.join(coco_dir, 'valid')
    test_dir = os.path.join(coco_dir, 'test')
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    def move_images_and_update_annotations(image_list, target_dir, subset_name):
        subset_annotations = {
            "info": coco_data["info"],
            "licenses": coco_data["licenses"],
            "images": [],
            "annotations": [],
            "categories": coco_data["categories"]
        }
        for image_info in image_list:
            image_filename = image_info['file_name']
            src_image_path = os.path.join(images_dir, image_filename)
            dst_image_path = os.path.join(target_dir, image_filename)
            shutil.copyfile(src_image_path, dst_image_path)

            subset_annotations['images'].append(image_info)
            image_id = image_info['id']
            annotations = [ann for ann in coco_data['annotations'] if ann['image_id'] == image_id]
            subset_annotations['annotations'].extend(annotations)

        subset_annotations_file = os.path.join(target_dir, f'annotations.json')
        with open(subset_annotations_file, 'w') as f:
            json.dump(subset_annotations, f)

        print(f"{subset_name} set created with {len(image_list)} images.")

    move_images_and_update_annotations(train_images, train_dir, "Train")
    move_images_and_update_annotations(val_images, val_dir, "Validation")
    move_images_and_update_annotations(test_images, test_dir, "Test")


split_dataset(images_dir, annotations_file)

Train set created with 536 images.
Validation set created with 76 images.
Test set created with 154 images.
