In [1]:
import os, sys
import argparse
import numpy as np
from collections import defaultdict
import json
import time
import multiprocessing
import copy
import os.path as osp
# from utils import IdGenerator, id2rgb
import pdb
import torch
try:
    import PIL.Image     as Image
except:
    print("Failed to import the image processing packages.")
    sys.exit(-1)
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import pylab

In [2]:
stage='train'  # 'train' or 'val'

In [3]:
inst_gt_json_file = "../datasets/lvis/annotations/lvis_v0.5_"+stage+".json"
data_path = '../datasets/lvis/images/'+stage+'2017'
sorted_cls_id_file = os.path.join('./lvis_sorted_id_all.json')

with open(inst_gt_json_file, 'r') as f:
    inst_gt = json.load(f)
with open(sorted_cls_id_file, 'r') as f:
    sorted_cls_id = json.load(f)

## set length of the base set & the size of each step

In [5]:
base_size = 270
step_size  = 160
n_step = (len(inst_gt['categories']) - base_size) / step_size
sorted_class_ids_base = sorted_cls_id[:base_n]
json.dump(sorted_class_ids_base, open(os.path.join('../datasets/lvis/annotations', 'lvis_sorted_id_base.json'), 'w'))

sorted_class_ids_step = [[] for _ in range(n_step)]
for i in range(n_step):
    sorted_class_ids_step[i] = sorted_cls_id[:base_n+step_size*(i+1)]
    json.dump(sorted_class_ids_step[i], open(os.path.join('../datasets/lvis/annotations', 'lvis_sorted_id_step_'+str(i+1)+'.json'), 'w'))


In [8]:
import torch
import torchvision

min_keypoints_per_image = 10


def _count_visible_keypoints(anno):
    return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)


def _has_only_empty_bbox(anno):
    return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)


def has_valid_annotation(anno):
    # if it's empty, there is no annotation
    if len(anno) == 0:
        return False
    # if all boxes have close to zero area, there is no annotation
    if _has_only_empty_bbox(anno):
        return False
    # keypoints task have a slight different critera for considering
    # if an annotation is valid
    if "keypoints" not in anno[0]:
        return True
    # for keypoint detection tasks, only consider valid images those
    # containing at least min_keypoints_per_image
    if _count_visible_keypoints(anno) >= min_keypoints_per_image:
        return True
    return False

class COCODataset(torchvision.datasets.coco.CocoDetection):
    def __init__(self, root, ann_file, sorted_id, remove_images_without_annotations=False):
        super(COCODataset, self).__init__(root, ann_file)
        self.ids = sorted(self.ids)

        # filter images without detection annotations
        if remove_images_without_annotations:
            ids = []
            for img_id in self.ids:
                ann_ids = self.coco.getAnnIds(imgIds=img_id, catIds = sorted_id,iscrowd=None)
#                 ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None)

                anno = self.coco.loadAnns(ann_ids)
                if has_valid_annotation(anno):
                    ids.append(img_id)
            self.ids = ids

        self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()}

        self.category_id_to_sorted_id = {
            v: i + 1 for i, v in enumerate(sorted_id)
        }
        self.sorted_id_to_category_id = {
            v: k for k, v in self.category_id_to_sorted_id.items()
        }
        self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
        self.img_map_to_id = {v: k for k, v in self.id_to_img_map.items()}

#         self._transforms = transforms
        
    def __getitem__(self, idx):
        img, anno = super(COCODataset, self).__getitem__(idx)
#         print(anno)
        return img, anno, idx
    
    def get_img_info(self, index):
        img_id = self.id_to_img_map[index]
        img_data = self.coco.imgs[img_id]
        return img_data
    
    


loading annotations into memory...
Done (t=21.46s)
creating index...
index created!


## subset construction

### 1. base set

In [9]:
dataset_base = COCODataset(data_path, inst_gt_json_file, sorted_class_ids_base, True)

inst_gt_subset = inst_gt.copy()
annotations_subset = []

for class_i in sorted_class_ids_base:
    ann_list = dataset_base.coco.getAnnIds(catIds=class_i)
    annotations_subset.extend(dataset_base.coco.loadAnns(ids=ann_list))
inst_gt_subset['annotations'] = annotations_subset

if stage =='val':
    for cat_i in inst_gt_subset['categories']:
        if cat_i['id'] in sorted_class_ids_base:
            cat_i['step_state'] = 'b0'
        else:
            for step_i in range(n_step):
                if cat_i['id'] in sorted_class_ids_step[step_i]:
                    cat_i['step_state'] = 't'+str(step_i)
json.dump(inst_gt_subset, open(os.path.join('../datasets/lvis/annotations', 'lvis_v0.5_'+stage+'_base.json'), 'w'))


### 2. incremental set

In [None]:
if stage == 'train':
    for i in range(n_step):
        dataset_step_i = COCODataset(data_path, inst_gt_json_file, sorted_class_ids_step[i], True)
        inst_gt_step_i = {}
        annotations_step_i= []
        images_step_i = []
        
        for class_c in sorted_class_ids_step[i]:
            img_ids_cls_c_wo_select = dataset_step_i.coco.getImgIds(catIds=class_c)
            img_ids_cls_c = []
            for img_id_cls_c in img_ids_cls_c_wo_select:
                ann_ids = dataset_step_i.coco.getAnnIds(imgIds=img_id_cls_c, catIds = class_c,iscrowd=None)
                anno = dataset_step_i.coco.loadAnns(ann_ids)
                if has_valid_annotation(anno):
                    img_ids_cls_c.append(img_id_cls_c)
            images_step_i.extend(img_ids_cls_c)
            annIds = dataset_step_i.coco.getAnnIds(imgIds=img_ids_cls_c, catIds=class_c)
            annotations_step_i.extend(dataset_step_i.coco.loadAnns(ids=annIds))
        images_step_i = list(np.unique(np.array(images_step_i)))

        inst_gt_step_i['annotations'] = annotations_step_i
        inst_gt_step_i['categories'] = list(item for item in inst_gt['categories'] if item['id'] in sorted_class_ids_step[i])
        inst_gt_step_i['images'] = list(item for item in inst_gt['images'] if item['id'] in images_step_i)

        json.dump(inst_gt_step_i, open(os.path.join('../datasets/lvis/annotations', 'lvis_v0.5_train_step'+str(i+1)+'.json'), 'w'))


In [None]:
if stage == 'val':
    for i in range(n_step):
        dataset_step_i = COCODataset(data_path, inst_gt_json_file, sorted_class_ids_step[i], True)
        inst_gt_step_i = {}
        annotations_step_i= []
        images_step_i = []
        
        for class_c in sorted_class_ids_step[i]:
            img_ids_cls_c = dataset_step_i.coco.getImgIds(catIds=class_c)
            images_step_i.extend(img_ids_cls_c)
            annIds = dataset_step_i.coco.getAnnIds(imgIds=img_ids_cls_c,catIds=class_c)
            annotations_step_i.extend(dataset_step_i.coco.loadAnns(ids=annIds))
        images_step_i = list(np.unique(np.array(images_step_i)))

        inst_gt_step_i['annotations'] = annotations_step_i
        inst_gt_step_i['categories'] = list(item for item in inst_gt['categories'] if item['id'] in sorted_class_ids_step[i])
        inst_gt_step_i['images'] = list(item for item in inst_gt['images'] if item['id'] in images_step_i)
    
    for cat_i in inst_gt_step_i['categories']:
        if cat_i['id'] in sorted_class_ids_base:
            cat_i['step_state'] = 'b0'
        else:
            for step_i in range(n_step):
                if cat_i['id'] in sorted_class_ids_step[step_i]:
                    cat_i['step_state'] = 't'+str(step_i)
    json.dump(inst_gt_step_i, open(os.path.join('../datasets/lvis/annotations', 'lvis_v0.5_val_step'+str(i+1)+'.json'), 'w'))
