In [1]:
import glob
import json
import numpy as np
import os
import shutil
from tqdm import tqdm
from pycocotools.coco import COCO  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoDemo.ipynb

In [2]:
source_annotation_path = "/media/data/coco17/coco/seed1/10shot_novel.json"
source_image_folder = "/media/data/coco17/coco/train2017"
syn_annotation_folder = "/media/data/dad/cnet/experiments/coco10novel/syn_n2000_o1_s1_p640_promptenhanced"
target_folder = "/media/data/dad/cnet/experiments/coco10novel/mix_n2000_o1_s1_p640_promptenhanced"

if not os.path.exists(target_folder):
    os.mkdir(target_folder)

target_image_folder = os.path.join(target_folder, "images")
target_annotation_path = os.path.join(target_folder, "annotation.json")

if not os.path.exists(target_image_folder):
    os.mkdir(target_image_folder)

In [3]:
with open(source_annotation_path, "r") as f:
    source_annotation = json.load(f)
coco = COCO(source_annotation_path)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [4]:
print(source_annotation.keys())
print(source_annotation["images"][0])
print(source_annotation["annotations"][0])
print(source_annotation["categories"][0])
print(source_annotation["images"][0].keys())
print(source_annotation["annotations"][0].keys())
print(source_annotation["categories"][0].keys())
print(len(source_annotation["images"]))
print(len(source_annotation["annotations"]))
print(len(source_annotation["categories"]))

dict_keys(['images', 'annotations', 'categories'])
{'license': 2, 'file_name': '000000568790.jpg', 'coco_url': 'http://images.cocodataset.org/train2017/000000568790.jpg', 'height': 426, 'width': 640, 'date_captured': '2013-11-21 00:25:07', 'flickr_url': 'http://farm2.staticflickr.com/1082/5110147496_81f18ceee0_z.jpg', 'id': 568790}
{'segmentation': [[290.06, 195.29, 297.72, 190.5, 304.42, 188.59, 315.91, 184.76, 323.57, 184.76, 329.31, 180.93, 336.97, 175.19, 341.76, 172.31, 345.59, 172.31, 349.42, 187.63, 355.16, 200.08, 369.52, 213.48, 378.13, 216.35, 384.84, 218.27, 389.62, 218.27, 399.2, 216.35, 406.85, 200.08, 411.64, 175.19, 411.64, 164.66, 409.73, 153.17, 403.02, 144.55, 392.49, 135.94, 388.67, 134.02, 384.84, 134.02, 381.96, 128.28, 375.26, 118.71, 371.43, 116.79, 362.82, 116.79, 355.16, 120.62, 352.29, 120.62, 341.76, 115.83, 330.27, 115.83, 312.08, 110.09, 300.59, 108.18, 301.55, 115.83, 314.95, 119.66, 329.31, 123.49, 347.5, 133.07, 357.07, 136.89, 361.86, 145.51, 364.73, 15

In [5]:
print(len(coco.getImgIds()))
print(len(coco.getAnnIds()))
print(max(coco.getImgIds()))
print(max(coco.getAnnIds()))
print(coco.getCatIds(catNms=['person','dog','skateboard']))

117
200
570015
2223021
[1, 18]


In [6]:
# 1. copy source image data
# 2. add sync anno to annotation, copy sync image

In [7]:
# 1. copy source image data
img_names = [img['file_name'] for img in coco.loadImgs(coco.getImgIds())]
for img_name in img_names:
    shutil.copy(os.path.join(source_image_folder, img_name), os.path.join(target_image_folder, img_name))

In [8]:
# 2. add sync anno to annotation, copy syn image
curr_img_id = max(coco.getImgIds()) + 1
curr_ann_id = max(coco.getAnnIds()) + 1
syn_data_paths = glob.glob(os.path.join(syn_annotation_folder, "*"))
new_images = source_annotation["images"]
new_anns = source_annotation["annotations"]

for syn_data_path in tqdm(syn_data_paths):
    # copy sync image
    syn_image_path = os.path.join(syn_data_path, "syn000.jpg")  # source path
    syn_image_name = "%012d.jpg"%curr_img_id  # target name
    shutil.copy(syn_image_path, os.path.join(target_image_folder, syn_image_name))
    
    layout_cats_path = os.path.join(syn_data_path, "layout_cats.npy")
    layout_bboxes_path = os.path.join(syn_data_path, "layout_bboxes.npy")
    prompt_path = os.path.join(syn_data_path, "prompt.npy")
    cats = np.load(layout_cats_path)
    bboxes = np.load(layout_bboxes_path)
    prompt = np.load(prompt_path)[0]
    
    catids = coco.getCatIds(catNms=cats)
    
    # add sync anno to annotation
    # TODO: image shape hard coded
    new_images.append({'file_name': syn_image_name, 'height': 640, 'width': 640, 'id': curr_img_id, 'prompt': prompt,})
    
    num_objects = cats.shape[0]
    for i in range(num_objects):
        new_anns.append({
            'image_id': curr_img_id, 
            'bbox': bboxes[i].tolist(), 
            'area': float(bboxes[i][-1] * bboxes[i][-2]), 
            'category_id': catids[i], 
            'id': curr_ann_id
        })
        curr_ann_id += 1
    
    curr_img_id += 1

source_annotation["images"] = new_images
source_annotation["annotations"] = new_anns


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:01<00:00, 1259.03it/s]


In [9]:
print(len(source_annotation["images"]))
print(len(source_annotation["annotations"]))
print(len(source_annotation["categories"]))

2117
2200
20


In [10]:
with open(target_annotation_path, "w+") as f:
    json.dump(source_annotation, f)