In [None]:
# https://github.com/microsoft/CameraTraps/blob/main/data_management/lila/create_lila_test_set.py

In [None]:
# auto Yaml generator

# import yaml

# data = {
#     'path' : './data/islands',  # dataset root dir
#     'train' : 'images/train2017',  # train images (relative to 'path') 128 images
#     'val' : 'images/train2017',  # val images (relative to 'path') 128 images
#     A = 'a',
#     B = dict(
#         C = 'c',
#         D = 'd',
#         E = 'e',
#     )
# }

# with open('data.yml', 'w') as outfile:
#     yaml.dump(data, outfile, default_flow_style=False)

In [45]:
import json
import random
import numpy as np
import shutil
import os
import glob

In [3]:
with open('./data/islands/metadata.json') as f:
    d = json.load(f)

In [14]:
def gen_dataset(d, n_empty= 1000, n_nempty=1000):
    n_empty_images_per_dataset = n_empty
    n_non_empty_images_per_dataset = n_nempty

    category_id_to_name = {c['id']:c['name'] for c in d['categories']}
    category_name_to_id = {c['name']:c['id'] for c in d['categories']}


    human_category_id = category_name_to_id['human'] if 'human' in category_name_to_id.keys() else -1 # filter out humans


    if 'empty' not in category_name_to_id:
        print('Warning: no empty images available for {}'.format('dataset'))
        empty_category_id = -1
        empty_annotations = []
        empty_annotations_to_download = []
    else:
        empty_category_id = category_name_to_id['empty']        
        empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] == empty_category_id]
        empty_annotations_to_download = random.sample(empty_annotations, n_empty_images_per_dataset)        
        
    non_empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] not in (empty_category_id, human_category_id)]

    non_empty_annotations_to_download = random.sample(non_empty_annotations, n_non_empty_images_per_dataset)
    annotations_to_download = empty_annotations_to_download + non_empty_annotations_to_download
    image_ids_to_download = set([ann['image_id'] for ann in annotations_to_download])
    assert len(image_ids_to_download) == len(set(image_ids_to_download))

    images_to_download = []
    for im in d['images']:
        if im['id'] in image_ids_to_download:
            images_to_download.append(im)
    assert len(images_to_download) == len(image_ids_to_download)
    
    return images_to_download

In [46]:
images_to_download = gen_dataset(d, 100, 100)
train, validate, test = np.split(images_to_download, [int(.6*len(images_to_download)), int(.8*len(images_to_download))])

In [47]:
len(train)

120

In [43]:
ids = [i.get('id') for i in images_to_download]
[d['annotations'] for id in ids if id]

TypeError: list indices must be integers or slices, not str

In [44]:
train

array([{'id': '986b26ec-2908-4e36-af55-98b77b121937', 'file_name': 'loc-h500ee07133376/000/019.jpg', 'seq_id': '98029aeb-27d4-4213-ab8d-a4c2244bc5aa', 'seq_num_frames': 18, 'frame_num': 13, 'original_relative_path': '2011_09_Set/Station%201/2011/2011-09-13/IMG_0020.JPG', 'location': 'h500ee07133376', 'temperature': '21 c', 'width': 1920, 'height': 1080},
       {'id': '1d87e5fc-f5c6-419f-af83-1bed6bc648f6', 'file_name': 'loc-h500ee07133376/000/058.jpg', 'seq_id': '111faf1f-e62d-4350-87db-a8c648699410', 'seq_num_frames': 4, 'frame_num': 1, 'original_relative_path': '2011_09_Set/Station%201/2011/2011-09-13/IMG_0059.JPG', 'location': 'h500ee07133376', 'temperature': '23 c', 'width': 1920, 'height': 1080},
       {'id': 'a68f92c2-9cf4-43fd-83ff-99d2ca07f16e', 'file_name': 'loc-h500ee07133376/000/074.jpg', 'seq_id': '2aadcfe6-34bb-4a94-9ab1-94c1aff08467', 'seq_num_frames': 2, 'frame_num': 0, 'original_relative_path': '2011_09_Set/Station%201/2011/2011-09-13/IMG_0075.JPG', 'location': 'h500e

In [50]:
train_path = './data/ultralytics/images/train/'
val_path = './data/ultralytics/images/val/'
test_path = './data/ultralytics/images/test/'

for p in (train_path, val_path, test_path):
    files = glob.glob(p)
    for f in files:
        os.remove(f)

basepath = './data/islands/images/images/'
for im in train: 
    shutil.copy2(basepath+im['file_name'], train_path)
for im in validate: 
    shutil.copy2(basepath+im['file_name'], val_path)
for im in test: 
    shutil.copy2(basepath+im['file_name'], test_path)

In [None]:
import matplotlib.pyplot as plt


In [None]:
# https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py
def loadAnns(ids=[]):
    """
    Load anns with the specified ids.
    :param ids (int array)       : integer ids specifying anns
    :return: anns (object array) : loaded ann objects
    """

    return [self.anns[id] for id in ids]
