# Custom Coco Dataset

# Resize Image Data

In [1]:
import json
from PIL import Image
import os

In [2]:
# Set the target size for resizing
# Raw images are 8/6 -> 8000x6000 pixels
target_size = (640, 640)
input_folder = "/Users/johannes/Code/Work/Exports/NanoStore_Full_COCO"
output_folder = "/Users/johannes/Code/labelstudio/NanoStore_Full_COCO_resized"
original_json_file = '/Users/johannes/Code/Work/Exports/NanoStore_Full_COCO/result.json'

In [3]:
def resize_images(input_folder, output_folder, target_size=(640, 640)):
    # Create the output folder if it doesn't exist
    output_images_folder = os.path.join(output_folder, "images")
    if not os.path.exists(output_images_folder):
        os.makedirs(output_images_folder)

    # List all files in the input folder
    files = os.listdir(input_folder)

    for file in files:
        if file.endswith(".png"):
            # Open the image
            image_path = os.path.join(input_folder, file)
            img = Image.open(image_path)

            # Resize the image
            resized_img = img.resize(target_size, Image.ANTIALIAS)

            # Save the resized image to the output folder
            output_path = os.path.join(output_images_folder, file)
            resized_img.save(output_path)

            print(f"Resized {file} to {target_size}")

In [4]:
# Resize images
resize_images(os.path.join(input_folder, 'images'), output_folder, target_size)

  resized_img = img.resize(target_size, Image.ANTIALIAS)


Resized 127d2e73-176_0_99.png to (640, 640)
Resized 7739ad30-172_0_120.png to (640, 640)
Resized 750e6d49-172_1_124.png to (640, 640)
Resized 11d368c4-172_0_43.png to (640, 640)
Resized 9d6d203d-173_0_3.png to (640, 640)
Resized 304c9ad4-173_0_47.png to (640, 640)
Resized 234b0062-176_0_367721.png to (640, 640)
Resized c27c84b1-173_0_74.png to (640, 640)
Resized 67d8c9e2-176_0_458981.png to (640, 640)
Resized aa2ed50d-172_0_139.png to (640, 640)
Resized 8f1d0f74-172_0_2.png to (640, 640)
Resized 8cb19294-172_0_37.png to (640, 640)
Resized 094f15f6-172_0_69.png to (640, 640)
Resized e55c5933-173_0_52.png to (640, 640)
Resized 242c0a4e-172_0_3.png to (640, 640)
Resized 9b88abfa-176_1_62.png to (640, 640)
Resized b2e5c6b1-172_0_31.png to (640, 640)
Resized 8fdae462-173_0_44.png to (640, 640)
Resized 2dab6b23-176_1_73.png to (640, 640)
Resized 75e28be8-172_0_60.png to (640, 640)
Resized 6f6c3b6f-176_0_51.png to (640, 640)
Resized 486f751d-172_0_104.png to (640, 640)
Resized dae5793c-172_0_

In [5]:
def resize_annotations(original_json_file, output_folder, target_size=(640, 640), original_shape=(8000, 6000)):
    
    # create annotations folder
    output_annotations_folder = os.path.join(output_folder, 'annotations')
    if not os.path.exists(output_annotations_folder):
        os.makedirs(output_annotations_folder)
    
    # Get shapes
    original_width, original_height = original_shape
    target_width, target_height = target_size

    # Compute scaling factors
    width_scale = target_width / original_width
    height_scale = target_height / original_height

    # Load JSON annotations
    with open(original_json_file, 'r') as f:
        annotations = json.load(f)

    # Update image dimensions
    for image_info in annotations['images']:
        image_info['width'] = target_width
        image_info['height'] = target_height

    # Resize images and update bounding box coordinates
    for annotation in annotations['annotations']:
        image_id = annotation['image_id']
        image_info = next(img for img in annotations['images'] if img['id'] == image_id)

        # Update bounding box coordinates by dividing by 10
        bbox = annotation['bbox']

        rescaled_bbox = [
            bbox[0] * width_scale,
            bbox[1] * height_scale,
            bbox[2] * width_scale,
            bbox[3] * height_scale
        ]

        # old
        # annotation['bbox'] = [coord / 10 for coord in bbox]
        annotation['bbox'] = rescaled_bbox

        # Update area by dividing by 100
        annotation['area'] /= width_scale * height_scale

    # Save updated JSON annotations
    full_json_file_path = os.path.join(output_annotations_folder, f'updated_annotation_{target_size[0]}_{target_size[1]}.json')
    with open(full_json_file_path, 'w') as f:
        json.dump(annotations, f, indent=2)

    return full_json_file_path

In [6]:
# Resize images and update annotations
output_json_path = resize_annotations(original_json_file, output_folder, target_size) # , input_folder, output_folder

# Coco Splitting

In [7]:
from pathlib import Path
from cocohelper import COCOHelper
from cocohelper.splitters.proportional import ProportionalDataSplitter

root_dir = Path(output_json_path)
image_dir = ""

In [8]:
root_dir

PosixPath('/Users/johannes/Code/labelstudio/NanoStore_Full_COCO_resized/annotations/updated_annotation_640_640.json')

In [9]:
print(f"Loading dataset: {output_json_path}")
ch = COCOHelper.load_json(output_json_path, img_dir=image_dir)

Loading dataset: /Users/johannes/Code/labelstudio/NanoStore_Full_COCO_resized/annotations/updated_annotation_640_640.json


In [10]:
splitter = ProportionalDataSplitter(70, 20, 10)
ch_train, ch_val, ch_test = splitter.apply(ch)
dest_dir = Path(os.path.join(output_folder, 'annotations'))

for ch, ch_name in zip([ch_train, ch_val, ch_test], ["train", "val", "test"]):
    print(f"Saving dataset: '{ch_name}'")
    fname = dest_dir / f"{ch_name}_640.json"
    ch.write_annotations_file(fname)

Saving dataset: 'train'
Saving dataset: 'val'
Saving dataset: 'test'
