# Harmonizing image data

## Setup Paths and Variables
- `annotation_files_path` is set to the directory where the annotation files are stored.
- `scale_values` is a list of scale factors.
- `target_path` indicate the directory where you would like the scaled annotation saved.


## Collect Annotation Files
- `annotation_files` is a list of paths to all annotation files in the `annotation_files_path` directory.

## Loop Over Annotation Files
- For each annotation file in `annotation_files`:
  - The file path is printed.
  - The file is opened and its content is loaded as JSON into `coco_data`.

## Access COCO Data
- `annotations` and `images` are extracted from `coco_data`.
- `image_names` is a list of file names of the images.

## Process Annotations for Each Scale
- For each scale in `scale_values`:
  - `annotations` are processed by a function `write_json_file` (not defined in the snippet) with the current `image_id` and `scale`.
  - A new file name is generated based on the current date and time, and the scale value.
  - The `annotations` in `coco_data` are updated.
  - The updated `coco_data` is saved to the new file.
  - A message is printed indicating the new file has been saved.

In [None]:
# libraries
import os
import json
import datetime
import numpy as np

In [None]:
# User Settings

# This is where you'll find the folder with all your annotations that you want to scale.
annotation_files_path = './coco_annotations'

# scale_value = target GSD / source GSD
scale_values = [1, 2, 3, 4]

# This is where you want the annotations saved.
target_path = "./scaled_coco_annotations"

In [None]:
def write_new_annotations(annotations: list, scale: float):
    """Write annotation data to json format

    Args:
        annotations (list): original annotations
        scale (float): the factor to scale between the original image and the lower resolution image

    Returns:
        list: new and scaled annotations
    """
    
    new_annotations = []
    index = len(annotations) + 1
    for ann in annotations:
        bbox = ann["bbox"]  # Bounding box coordinates (x, y, width, height)
        bbox = np.array(bbox)/scale
        bbox = bbox.tolist()  # Convert to Python list
        
        segmentation = ann["segmentation"]
        segmentation = np.array(segmentation)/scale
        segmentation = segmentation.tolist()  # Convert to Python list
        new_ann = {}
        new_ann["id"] = index
        new_ann["image_id"] = ann["image_id"]
        new_ann["category_id"] = ann["category_id"]
        new_ann["segmentation"] = segmentation
        new_ann["area"] = float(ann["area"]/scale**2)
        new_ann["bbox"] = bbox
        new_ann["iscrowd"] = ann["iscrowd"]
        new_ann["attributes"] = ann["attributes"]
        
        index +=1
        new_annotations.append(new_ann)

    return new_annotations

def get_filename(path: str):
    """Get the basefile name form a file path

    Args:
        path (str): _description_

    Returns:
        str: Name of the file
    """
    # Normalize the path to use the correct separator for the current OS
    normalized_path = os.path.normpath(path)
    # Extract the file name
    filename = os.path.basename(normalized_path)
    return filename


In [None]:
# collect all annotation files
annotation_files = [os.path.join(annotation_files_path, file) for file in os.listdir(annotation_files_path) if file[-4:]=="json"]

# loop over annonation files
for coco_annotations_file in annotation_files:
    print(coco_annotations_file)
    with open(coco_annotations_file, 'r') as f:
        coco_data = json.load(f)

    # access coco data
    annotations = coco_data['annotations']
    images = coco_data['images']
    image_names = [image['file_name'] for image in images]

    for scale in scale_values:
        new_annotations = write_new_annotations(annotations, scale)

        coco_data["annotations"] = new_annotations
        
        # create new annotation files
        current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        coco_annotations_file_name = get_filename(coco_annotations_file)
        coco_annotations_file_name = coco_annotations_file_name.split(".")[0]
        new_file = f'{current_time}_{coco_annotations_file_name}_scaled_{scale}.json'.replace(':', '-').replace(' ', '_')
        new_file = os.path.join(target_path, new_file)

        with open(new_file, 'w') as f:
            json.dump(coco_data, f)

        print(f"Saved data to {new_file}")