In [25]:
import json
import os
import shutil
import random

In [26]:
# Define paths
parent_folder = "Final Dataset"
images_folder = os.path.join("..", parent_folder)
annotations_file = os.path.join("..", parent_folder, "instances_default.json")

out_parent_folder = "Scatter Dataset"
out_folders = {
    "train": os.path.join(out_parent_folder, "train"),
    "val": os.path.join(out_parent_folder, "val"),
    "test": os.path.join(out_parent_folder, "test")
}

In [27]:
# Create output directories
for split in out_folders.values():
    os.makedirs(os.path.join(split, "images"), exist_ok=True)
    os.makedirs(os.path.join(split, "annotations"), exist_ok=True)

In [28]:
# Load annotations
with open(annotations_file, "r") as f:
    data = json.load(f)

In [29]:
# Get annotated images
annotated_images = set()
for ann in data["annotations"]:
    annotated_images.add(ann["image_id"])

print(len(annotated_images))

151


In [30]:
# Filter images with annotations
images_with_annotations = [img for img in data["images"] if img["id"] in annotated_images]

In [31]:
# Shuffle and split dataset
random.shuffle(images_with_annotations)
n = len(images_with_annotations)
train_split = int(0.95 * n)
val_split = int(0.025 * n)

In [32]:
data_splits = {
    "train": images_with_annotations[:train_split],
    "val": images_with_annotations[train_split:train_split + val_split],
    "test": images_with_annotations[train_split + val_split:]
}

In [33]:
# Copy images and prepare JSON annotations
for split, images in data_splits.items():
    split_image_ids = {img["id"] for img in images}
    split_annotations = [ann for ann in data["annotations"] if ann["image_id"] in split_image_ids]

    # Save split JSON
    split_json = {
        "images": images,
        "annotations": split_annotations,
        "categories": data["categories"]
    }
    json_path = os.path.join(out_folders[split], "annotations", "instances_default.json")
    with open(json_path, "w") as f:
        json.dump(split_json, f, indent=4)

    # Copy images
    for img in images:
        src_path = os.path.join(images_folder, img["file_name"])
        dst_path = os.path.join(out_folders[split], "images", img["file_name"])
        if os.path.exists(src_path):
            shutil.copy(src_path, dst_path)