In [None]:
# https://github.com/microsoft/CameraTraps/blob/main/data_management/lila/create_lila_test_set.py
# https://github.com/cindyweng/coco-to-yolo-by-category/blob/5fcd1ae51af89c1c678d903a4aff5d32cba25b0b/coco-to-yolo-by-category.py#L41

In [None]:
# auto Yaml generator

# import yaml

# data = {
#     'path' : './data/islands',  # dataset root dir
#     'train' : 'images/train2017',  # train images (relative to 'path') 128 images
#     'val' : 'images/train2017',  # val images (relative to 'path') 128 images
#     A = 'a',
#     B = dict(
#         C = 'c',
#         D = 'd',
#         E = 'e',
#     )
# }

# with open('data.yml', 'w') as outfile:
#     yaml.dump(data, outfile, default_flow_style=False)

In [32]:
import json
import random
import numpy as np
import shutil
import os
import glob
from pathlib import Path
import matplotlib.pyplot as plt
from operator import itemgetter 
from itertools import groupby

random.seed(42)

In [2]:
with open('./data/islands/metadata.json') as f:
    d = json.load(f)

In [3]:
def gen_dataset(d, n_empty= 1000, n_nempty=1000):
    n_empty_images_per_dataset = n_empty
    n_non_empty_images_per_dataset = n_nempty

    category_id_to_name = {c['id']:c['name'] for c in d['categories']}
    category_name_to_id = {c['name']:c['id'] for c in d['categories']}


    human_category_id = category_name_to_id['human'] if 'human' in category_name_to_id.keys() else -1 # filter out humans


    if 'empty' not in category_name_to_id:
        print('Warning: no empty images available for {}'.format('dataset'))
        empty_category_id = -1
        empty_annotations = []
        empty_annotations_to_download = []
    else:
        empty_category_id = category_name_to_id['empty']        
        empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] == empty_category_id]
        empty_annotations_to_download = random.sample(empty_annotations, n_empty_images_per_dataset)        
        
    non_empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] not in (empty_category_id, human_category_id)]

    non_empty_annotations_to_download = random.sample(non_empty_annotations, n_non_empty_images_per_dataset)
    annotations_to_download = empty_annotations_to_download + non_empty_annotations_to_download
    image_ids_to_download = set([ann['image_id'] for ann in annotations_to_download])
    assert len(image_ids_to_download) == len(set(image_ids_to_download))

    images_to_download = []
    for im in d['images']:
        if im['id'] in image_ids_to_download:
            images_to_download.append(im)
    assert len(images_to_download) == len(image_ids_to_download)
    
    return images_to_download

In [4]:
images_to_download = gen_dataset(d, 100, 100)
train, validate, test = np.split(images_to_download, [int(.6*len(images_to_download)), int(.8*len(images_to_download))])

In [31]:
basepath = './data/islands/images/images/'
train_path = './data/ultralytics/images/train/'
val_path = './data/ultralytics/images/val/'
test_path = './data/ultralytics/images/test/'

for p in (train_path, val_path, test_path):
    files = glob.glob(p)
    for f in glob.glob(p + '\*.jpg', recursive=True):
        if f.endswith('.jpg'): # obsolete double check for personal sanity"
            os.remove(f)

for im in train: 
    shutil.copy2(basepath+im['file_name'], train_path)
for im in validate: 
    shutil.copy2(basepath+im['file_name'], val_path)
for im in test: 
    shutil.copy2(basepath+im['file_name'], test_path)

In [55]:
# metadata_full = d 
# for i in range(len(metadata_full['images'])):
#     metadata_full['images'][i]['image_id'] = metadata_full['images'][i].pop('id')

# my_id = itemgetter('image_id')
# merged_dicts = []

# for k, v in groupby(sorted((metadata_full['annotations'] + metadata_full['images']), key=my_id), key=my_id):
#     merged_dicts.append({key:val for d in v for key, val in d.items()})

In [66]:
anns = [d['annotations'] for id in ids if id]

In [85]:
train[0]['id']

'39a5096b-1476-447e-b8eb-afee0d90c838'

In [88]:
a = lookup.get('39a5096b-1476-447e-b8eb-afee0d90c838')

In [90]:
a

In [47]:
ids = [i.get('id') for i in train]
lookup = {
    m["image_id"]: {"bbox": m["bbox"], "category_id": m["category_id"]}
    for m in d["annotations"]
    if m["image_id"] in ids
}

In [48]:
lookup.get(train[0].get('id'))

{'bbox': [0, 0, 1919, 1079], 'category_id': 0}

In [43]:
train[0]

{'id': '6910ebc2-95db-46d4-a384-962859b27ac6',
 'file_name': 'loc-h500ee07133376/003/197.jpg',
 'seq_id': '5daed48f-54d8-481e-9f33-47e279fada36',
 'seq_num_frames': 13,
 'frame_num': 4,
 'original_relative_path': '2011_09_Set/Station%201/2011/2011-11-02/IMG_3195.JPG',
 'location': 'h500ee07133376',
 'temperature': '28 c',
 'width': 1920,
 'height': 1080}

In [44]:
d['annotations'][0]

{'id': '16e360cc-4a53-11eb-b9b3-000d3a74c7de',
 'image_id': 'dd8b68e9-360b-429e-a43b-892c2e036455',
 'category_id': 0,
 'sequence_level_annotation': False,
 'bbox': [0, 0, 1919, 1079]}

In [53]:

def truncate(n, decimals=0):
    multiplier = 10 ** decimals
    return int(n * multiplier) / multiplier


def createLabelsSingle(imageList, basedir, metadata_full):
    # For single objects only

    ids = [i.get('id') for i in imageList]
    # generate lookup for bbox and category id based on image id
    lookup = {
        m["image_id"]: {"bbox": m["bbox"], "category_id": m["category_id"]}
        for m in d["annotations"]
        if m["image_id"] in ids
    }

    for im in imageList:

        ann = lookup.get(im['id'])

        dw = 1. / im['width']
        dh = 1. / im['height']
        
        
        filename = im['file_name'].replace(".jpg", ".txt")
        print(filename, ann)

        with open(basedir + "../labels/" + filename, "a") as myfile:
            xmin = ann["bbox"][0]
            ymin = ann["bbox"][1]
            xmax = ann["bbox"][2] + ann["bbox"][0]
            ymax = ann["bbox"][3] + ann["bbox"][1]
            
            x = (xmin + xmax)/2
            y = (ymin + ymax)/2
            
            w = xmax - xmin
            h = ymax-ymin
            
            x = x * dw
            w = w * dw
            y = y * dh
            h = h * dh
            
            mystring = str(ann['category_id'] + str(truncate(x, 7)) + " " + str(truncate(y, 7)) + " " + str(truncate(w, 7)) + " " + str(truncate(h, 7)))
            myfile.write(myst
            ring)
            myfile.write("\n")

        myfile.close()

In [54]:
createLabelsSingle(train, train_path, d)

loc-h500ee07133376/003/197.txt {'bbox': [0, 0, 1919, 1079], 'category_id': 0}


FileNotFoundError: [Errno 2] No such file or directory: './data/ultralytics/images/train/../labels/loc-h500ee07133376/003/197.txt'