# Version 1.0
Inputs:
(json file, coco) orginal annotations, uses these annotations as base
(json file, coco) augmented (to be added) annotations

(folder) orginal images
(folder) augmented (to be added) images

(folder) speicherort der kombinierten Daten
(variable) new annotations file name
(variable) "move" or "copy" dof images

Output: 
Combines images and annotations in a selected folder

In [1]:
# Inputs in dieses Script sollten zwei datensets im coco format sein, das erste dataset wird als grundlage für die nummerierung des zweiten verwendet, bei mehreren datasets sollte dieses script mehrmals verwendet werden

import os
import json
import datetime
import shutil
import glob
from PIL import Image

# json files and images must be in the same directory
path_to_original_images = "../../../old/BAA/Data/val_max/"
path_to_original_data_anno = "../../../old/BAA/Data/val_max/coco_val_max.json"
path_to_augmented_images = "../../../old/BAA/Data/val_aug/"
paths_to_data_augmented_anno = "../../../old/BAA/Data/val_aug/Augmented_A_coco_val_max.json"
path_to_output_folder = "../../../old/BAA/Data/val_combined/"

new_filename_coco = "coco_Combined"
move_or_copy = "copy" # or "move"


# define functions
def createSubsetStructure(originalData, description):
	subsetStructure = {
		"info": [{"year": int(datetime.date.today().year)},
				{"version": "1.0"},
				{"description": description},
				{"contributer": "Michael Infanger"},
				{"url": ""},
				{"date_created": str(datetime.datetime.now())}],
		"categories": originalData["categories"],
		"images": [],
		"annotations": []
	}
	return subsetStructure


def writeJson(dictObject, filename):
	if filename.split(".")[-1] != "json":
		filename = f"{filename}.json"
	jsonObject = json.dumps(dictObject, indent=4)
	with open(filename, "w") as g:
		g.write(jsonObject)


def copyImagesPath(image_path_list, path_output):
    for i in image_path_list:
        shutil.copy(i, path_output)


def moveImagesPath(image_path_list, path_output):
    for i in image_path_list:
        shutil.move(i, path_output)


def moveOrCopyFiles(image_path_list, path_output, mode="copy"):
	if mode == "move":
		moveImagesPath(image_path_list, path_output)
	elif mode == "copy":
		copyImagesPath(image_path_list, path_output)
	else:
		raise ValueError("Only copy or move are accepted inputs")





# verify file existences
if os.path.exists(path_to_original_images) != True:
	raise ValueError("Folder for original images doesent exist!")
if os.path.exists(path_to_original_data_anno) != True:
	raise ValueError("File for original annotations doesent exist!")
if os.path.exists(path_to_augmented_images) != True:
	raise ValueError("Folder for augmented images doesent exist!")
if os.path.exists(paths_to_data_augmented_anno) != True:
	raise ValueError("File for augmented annotations doesent exist!")
# create output file if not existing
if os.path.exists(path_to_output_folder) != True:
	os.mkdir(path_to_output_folder)
	print("Output folder was created at {}".format(path_to_output_folder))


# load data from paths
with open(path_to_original_data_anno, "r") as f:
    coco_original = json.load(f)

with open(paths_to_data_augmented_anno, "r") as f:
    coco_augmented = json.load(f)


description_of_new_coco_file = "Combined Data from {} and {}".format(path_to_original_data_anno.split("/")[-1], paths_to_data_augmented_anno.split("/")[-1])


Output folder was created at ../../../old/BAA/Data/val_combined/


In [2]:
# create a new coco structure
coco_combined = createSubsetStructure(coco_original, description_of_new_coco_file)

# load data from the original data into the new data
coco_combined["images"] = coco_original["images"]
coco_combined["annotations"] = coco_original["annotations"]

# load data from augmented into the combines file, adjust the ids and corresponding image_ids
# also check for the same filename, if needed change the augmented one
highest_img_id = coco_combined["images"][-1]["id"]
highest_ann_id = coco_combined["annotations"][-1]["id"]

filenames = []
filepaths_and_names = []
for i in coco_combined["images"]:
    filenames.append(i["file_name"])
    filepaths_and_names.append(os.path.join(path_to_original_images, i["file_name"]))

for i in coco_augmented["images"]:
    highest_img_id += 1
    id_old = i["id"]
    image_info = i
    image_info["id"] = highest_img_id
    
    if i["file_name"] in filenames:
        new_filename = "D" + image_info["file_name"]
        path_img = path_to_augmented_images + i["file_name"]
        os.rename(path_img, path_to_augmented_images + new_filename)
        image_info["file_name"] = new_filename
        filenames.append(new_filename)
        filepaths_and_names.append(os.path.join(path_to_augmented_images, new_filename))
    else:
        filepaths_and_names.append(os.path.join(path_to_augmented_images, i["file_name"]))
    
    coco_combined["images"].append(image_info)
    
    # needs to be here because of the old id, could be better with an early terminate and late starte (start where the last stopped)
    for n in coco_augmented["annotations"]:
        if n["image_id"] == id_old:
            annotation_info = n
            highest_ann_id += 1
            annotation_info["image_id"] = highest_img_id
            annotation_info["id"] = highest_ann_id

            coco_combined["annotations"].append(annotation_info)


# write coco file
writeJson(coco_combined, path_to_output_folder + new_filename_coco)

# move or copy the images
moveOrCopyFiles(filepaths_and_names, path_to_output_folder, move_or_copy)