# mix_annotations.ipynb
Combines coco .json files into one

### Import json files by a dict of paths

In [1]:
import json 

def read_annotations(ann_paths):
    #Create empty list to store the datasets info
    my_dicts = []
    #Iterate for every dataset path
    for ann_path in ann_paths:
        # Read the .json file
        with open(ann_path) as f:
            data = json.load(f)
            my_dicts.append(data)
            print("Completed: ", ann_path )
    return my_dicts

### Mix annotations from the loaded dicts

In [2]:
import copy
def mix_annotations(my_dicts):
    combined_dict = copy.deepcopy(my_dicts[0])
    im_id = len(my_dicts[0]["images"])
    ann_id = len(my_dicts[0]["annotations"])
    # print(im_id, ann_id)
    for d in my_dicts[1:]:
        for image in d["images"]:
            temp = image.copy()
            temp["id"] = im_id + temp["id"] 
            combined_dict["images"].append(temp)
        for ann in d["annotations"]:
            temp = ann.copy()
            temp["id"] = ann_id + temp["id"]
            temp["image_id"] = im_id + temp["image_id"] 
            combined_dict["annotations"].append(temp)

        im_id = len(d["images"])
        ann_id = len(d["annotations"])
    return combined_dict


## Mixing images example

In [3]:
ann_paths = [
    "/home/josmar/proyectos/codes/01_annotation_tools/test_data/annotations/mixed_ucb.json",
    "/home/josmar/proyectos/codes/datasets/ucb_gait_frames/annotations/ucb_gait_poly.json",
    ]
my_dicts= read_annotations(ann_paths)

Completed:  /home/josmar/proyectos/codes/01_annotation_tools/test_data/annotations/mixed_ucb.json
Completed:  /home/josmar/proyectos/codes/datasets/ucb_gait_frames/annotations/ucb_gait_poly.json


In [4]:
index = 0
for d in my_dicts:
    print("\nDataset ",index)
    print("Image IDs:\t" , len(d["images"]))
    print("Annotation IDs:\t" , len(d["annotations"]))
    index+=1


Dataset  0
Image IDs:	 20000
Annotation IDs:	 39999

Dataset  1
Image IDs:	 12300
Annotation IDs:	 12300


In [5]:
combined_dict = mix_annotations(my_dicts)

In [6]:
print("\nCombined dataset")
print("Image IDs:\t" , len(combined_dict["images"]))
print("Annotation IDs:\t" , len(combined_dict["annotations"]))


Combined dataset
Image IDs:	 32300
Annotation IDs:	 52299


### Saving the generated dataset

In [7]:
out_path = "/home/josmar/proyectos/codes/01_annotation_tools/test_data/annotations/combined.json"
with open(out_path, 'w') as fp:
    json.dump(combined_dict, fp)

## Creating Train Test and Val datasets with the generated json
Based on https://github.com/akarazniewicz/cocosplit/blob/master/cocosplit.py

In [4]:
def save_coco(file, info, licenses, images, annotations, categories):
    with open(file, 'wt', encoding='UTF-8') as coco:
        json.dump({ 'info': info, 'licenses': licenses, 'images': images, 
            'annotations': annotations, 'categories': categories}, coco, indent=2, sort_keys=True)

In [5]:
def filter_annotations(annotations, images):
    image_ids = funcy.lmap(lambda i: int(i['id']), images)
    return funcy.lfilter(lambda a: int(a['image_id']) in image_ids, annotations)

In [6]:
import random
import json
import funcy
from sklearn.model_selection import train_test_split

out_path = "/home/josmar/proyectos/codes/01_annotation_tools/test_data/annotations/combined.json"
has_annotations = True
train_split = 0.8
val_split = 0.1
train_file = "/home/josmar/proyectos/codes/01_annotation_tools/test_data/annotations/train_combined.json"
val_file = "/home/josmar/proyectos/codes/01_annotation_tools/test_data/annotations/val_combined.json"
test_file = "/home/josmar/proyectos/codes/01_annotation_tools/test_data/annotations/test_combined.json"
with open(out_path, 'rt', encoding='UTF-8') as annotations:
    
    coco = json.load(annotations)
    info = coco['info']
    licenses = coco['licenses']
    images = coco['images']
    annotations = coco['annotations']
    categories = coco['categories']

    number_of_images = len(images)

    images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations)

    if has_annotations:
        images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images)

    x, y = train_test_split(images, train_size=train_split, shuffle=True)

    val_split = round(val_split/(1-train_split) , 2)
    y, z = train_test_split(y, train_size=val_split, shuffle=True)
    
    save_coco(train_file, info, licenses, x, filter_annotations(annotations, x), categories)
    save_coco(val_file, info, licenses, y, filter_annotations(annotations, y), categories)
    save_coco(test_file, info, licenses, z, filter_annotations(annotations, z), categories)

    print("Saved\n \
        {} entries in {}\n \
        {} entries in {}\n \
        {} entries in {}".format(len(x), train_file, len(y), val_file, len(z), test_file))

Saved
         25840 entries in /home/josmar/proyectos/codes/01_annotation_tools/test_data/annotations/train_combined.json
         3230 entries in /home/josmar/proyectos/codes/01_annotation_tools/test_data/annotations/val_combined.json
         3230 entries in /home/josmar/proyectos/codes/01_annotation_tools/test_data/annotations/test_combined.json


In [30]:
train_split = 0.8
val_split = 0.1

print(val_split)

0.5
