In [1]:
# https://docs.opencv.org/3.4/d1/dc5/tutorial_background_subtraction.html
import cv2
import numpy as np
import json
import shutil
import os
import glob
from matplotlib import pyplot as plt
from scipy import ndimage
import cv2 as cv
import random
from itertools import groupby
from operator import itemgetter 
from sklearn.model_selection import train_test_split
random.seed(42)

In [28]:
basepath = '../data/islands/images/images/' # refactor to match path naming
metadata_path = '../data/islands/metadata.json'
seq_path = '../data/testseq/'
with open(metadata_path) as f:
    d = json.load(f)

In [29]:
d['categories']

[{'id': 0, 'name': 'empty'},
 {'id': 1, 'name': 'human'},
 {'id': 2, 'name': 'fox'},
 {'id': 3, 'name': 'skunk'},
 {'id': 4, 'name': 'rodent'},
 {'id': 5, 'name': 'bird'},
 {'id': 6, 'name': 'other'}]

In [30]:
d['images']

[{'id': 'dd8b68e9-360b-429e-a43b-892c2e036455',
  'file_name': 'loc-h500ee07133376/000/000.jpg',
  'seq_id': '836f6487-50fd-42f5-8dcc-336fc538b7a8',
  'seq_num_frames': 6,
  'frame_num': 0,
  'original_relative_path': '2011_09_Set/Station%201/2011/2011-09-13/IMG_0001.JPG',
  'location': 'h500ee07133376',
  'temperature': '21 c',
  'width': 1920,
  'height': 1080},
 {'id': '46619c4d-2d36-4cca-9cd6-933c669a44ab',
  'file_name': 'loc-h500ee07133376/000/001.jpg',
  'seq_id': '836f6487-50fd-42f5-8dcc-336fc538b7a8',
  'seq_num_frames': 6,
  'frame_num': 1,
  'original_relative_path': '2011_09_Set/Station%201/2011/2011-09-13/IMG_0002.JPG',
  'location': 'h500ee07133376',
  'temperature': '21 c',
  'width': 1920,
  'height': 1080},
 {'id': '62782485-48ed-4da7-b795-7af43393aaf1',
  'file_name': 'loc-h500ee07133376/000/002.jpg',
  'seq_id': '836f6487-50fd-42f5-8dcc-336fc538b7a8',
  'seq_num_frames': 6,
  'frame_num': 2,
  'original_relative_path': '2011_09_Set/Station%201/2011/2011-09-13/IMG_000

In [31]:
d['annotations']

[{'id': '16e360cc-4a53-11eb-b9b3-000d3a74c7de',
  'image_id': 'dd8b68e9-360b-429e-a43b-892c2e036455',
  'category_id': 0,
  'sequence_level_annotation': False,
  'bbox': [0, 0, 1919, 1079]},
 {'id': '16e360cd-4a53-11eb-97ff-000d3a74c7de',
  'image_id': '46619c4d-2d36-4cca-9cd6-933c669a44ab',
  'category_id': 0,
  'sequence_level_annotation': False,
  'bbox': [0, 0, 1919, 1079]},
 {'id': '16e360ce-4a53-11eb-832e-000d3a74c7de',
  'image_id': '62782485-48ed-4da7-b795-7af43393aaf1',
  'category_id': 0,
  'sequence_level_annotation': False,
  'bbox': [0, 0, 1919, 1079]},
 {'id': '16e360cf-4a53-11eb-9447-000d3a74c7de',
  'image_id': 'c4465075-477d-45b2-be07-fad35b2fec2c',
  'category_id': 1,
  'sequence_level_annotation': False,
  'bbox': [0, 657, 1919, 391]},
 {'id': '16e360d0-4a53-11eb-90ec-000d3a74c7de',
  'image_id': '7e2b120a-78ef-4c06-bdae-4f4070acf05e',
  'category_id': 0,
  'sequence_level_annotation': False,
  'bbox': [0, 0, 1919, 1079]},
 {'id': '16e360d1-4a53-11eb-bc2c-000d3a74c7d

In [32]:
# remove humans and images that are not in a sequence. 
human_imageid = [i.get('image_id') for i in d['annotations'] if i.get('category_id') == 1] #5981 human labeled. 
noseq_imageid = [i.get('id') for i in d['images'] if i.get('seq_num_frames') <= 2]
id_filter = set(noseq_imageid + human_imageid)
d['annotations'] = [i for i in d['annotations'] if i.get('image_id') not in id_filter]
d['images'] = [i for i in d['images'] if i.get('id') not in id_filter]

In [33]:
# change the category id of other to human
for anno in d['annotations']:
    if anno.get('category_id') == 6:
        anno['category_id'] = 1

some EDA

In [34]:
from collections import Counter
c = Counter([i.get('seq_num_frames') for i in d['images']])
c.most_common()

[(3, 50949),
 (10, 41433),
 (6, 21322),
 (20, 17669),
 (9, 10153),
 (30, 8574),
 (12, 6508),
 (15, 4767),
 (4, 4652),
 (40, 4377),
 (18, 3506),
 (60, 3115),
 (5, 2951),
 (50, 2698),
 (21, 2369),
 (24, 2246),
 (7, 1875),
 (27, 1642),
 (8, 1606),
 (70, 1330),
 (90, 1260),
 (36, 1245),
 (33, 1089),
 (39, 955),
 (42, 838),
 (120, 838),
 (11, 809),
 (741, 741),
 (80, 720),
 (14, 693),
 (669, 669),
 (63, 581),
 (16, 560),
 (45, 542),
 (17, 519),
 (13, 484),
 (480, 480),
 (57, 456),
 (23, 451),
 (438, 438),
 (435, 435),
 (48, 399),
 (51, 357),
 (81, 356),
 (19, 339),
 (84, 336),
 (110, 330),
 (160, 320),
 (75, 300),
 (100, 300),
 (96, 288),
 (54, 275),
 (270, 270),
 (130, 259),
 (255, 255),
 (41, 245),
 (22, 240),
 (240, 240),
 (78, 234),
 (114, 228),
 (69, 207),
 (102, 204),
 (31, 182),
 (170, 170),
 (159, 159),
 (25, 150),
 (149, 149),
 (72, 144),
 (141, 141),
 (138, 138),
 (66, 132),
 (29, 127),
 (126, 126),
 (105, 105),
 (99, 105),
 (32, 96),
 (47, 94),
 (34, 92),
 (44, 88),
 (87, 87),
 (

In [35]:
from collections import Counter
c = Counter([i.get('category_id') for i in d['annotations']])
c.most_common()

[(0, 102230), (4, 77759), (2, 44337), (5, 9284), (3, 957), (1, 144)]

# start dataset build

In [36]:
for i in range(len(d['images'])):
    d['images'][i]['image_id'] = d['images'][i].pop('id')

my_id = itemgetter('image_id')
meta_anno = []

for k, v in groupby(sorted((d['annotations'] + d['images']), key=my_id), key=my_id):
    meta_anno.append({key:val for d in v for key, val in d.items()})

NameError: name 'groupby' is not defined

In [27]:
d['annotations']

[{'id': '16e360cc-4a53-11eb-b9b3-000d3a74c7de',
  'image_id': 'dd8b68e9-360b-429e-a43b-892c2e036455',
  'category_id': 0,
  'sequence_level_annotation': False,
  'bbox': [0, 0, 1919, 1079]},
 {'id': '16e360cd-4a53-11eb-97ff-000d3a74c7de',
  'image_id': '46619c4d-2d36-4cca-9cd6-933c669a44ab',
  'category_id': 0,
  'sequence_level_annotation': False,
  'bbox': [0, 0, 1919, 1079]},
 {'id': '16e360ce-4a53-11eb-832e-000d3a74c7de',
  'image_id': '62782485-48ed-4da7-b795-7af43393aaf1',
  'category_id': 0,
  'sequence_level_annotation': False,
  'bbox': [0, 0, 1919, 1079]},
 {'id': '16e360d0-4a53-11eb-90ec-000d3a74c7de',
  'image_id': '7e2b120a-78ef-4c06-bdae-4f4070acf05e',
  'category_id': 0,
  'sequence_level_annotation': False,
  'bbox': [0, 0, 1919, 1079]},
 {'id': '16e360d1-4a53-11eb-bc2c-000d3a74c7de',
  'image_id': '70738107-86c9-4539-aad3-3ca1aee9d0cf',
  'category_id': 0,
  'sequence_level_annotation': False,
  'bbox': [0, 0, 1919, 1079]},
 {'id': '16e360d2-4a53-11eb-aeb2-000d3a74c7de

In [None]:
train_test_split(meta_anno, test_size=(full_length//5), random_state=42, stratify=labels)

In [8]:
for anno in d['annotations']:
    if anno.get('category_id') == 22:
        anno['category_id'] = 8

6