In [6]:
import os

In [7]:
source_root = '../../annotation_fine/data'
source_dir_list = ['data_Task_0129_v2', 'data_Task_0205_v2', 'data_Task_0219_v2', 'data_val_v2']
target_root = '../data/data_v2'

### Process masks

In [None]:
# softlink all `images` and `masks` to `target_root/images` and `target_root/masks`
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

for source_dir in source_dir_list:
    source_mask_path = os.path.join(source_root, source_dir, 'final_masks')
    target_mask_root = os.path.join(target_root, 'masks')
    create_dir(target_mask_root)
    mask_dirs = [d for d in os.listdir(source_mask_path) if os.path.isdir(os.path.join(source_mask_path, d))]
    for d in mask_dirs:
        source_path = os.path.join(source_mask_path, d)
        dest_path = os.path.join(target_mask_root, d)
        
        # Check if the destination path already exists
        if os.path.lexists(dest_path):
            print(f"Destination path {dest_path} already exists. Skipping...")
            continue

        # Ensure that you have the necessary permissions
        if not os.access(os.path.dirname(dest_path), os.W_OK):
            print(f"Write permission not available for {os.path.dirname(dest_path)}. Skipping...")
            continue

        # Use an absolute path
        source_path = os.path.abspath(source_path)
        dest_path = os.path.abspath(dest_path)

        # Create the soft link
        try:
            os.symlink(source_path, dest_path)
        except Exception as e:
            print(f"Failed to create soft link from {source_path} to {dest_path}: {e}")


### Process images

In [22]:
source_img_path = '../data/vidor/frames'
target_img_root = os.path.join(target_root, 'images')
create_dir(target_img_root)

img_dirs = [d for d in os.listdir(source_img_path) if os.path.isdir(os.path.join(source_img_path, d))]
for d in img_dirs:
    source_path = os.path.join(source_img_path, d)
    dest_path = os.path.join(target_img_root, d)

    # Check if the destination path already exists
    if os.path.lexists(dest_path):
        print(f"Destination path {dest_path} already exists. Skipping...")
        continue

    # Ensure that you have the necessary permissions
    if not os.access(os.path.dirname(dest_path), os.W_OK):
        print(f"Write permission not available for {os.path.dirname(dest_path)}. Skipping...")
        continue

    # Use an absolute path
    source_path = os.path.abspath(source_path)
    dest_path = os.path.abspath(dest_path)

    # Create the soft link
    try:
        os.symlink(source_path, dest_path)
    except Exception as e:
        print(f"Failed to create soft link from {source_path} to {dest_path}: {e}")


### Process json

In [3]:
import json
def load_json(file):
     with open(file, "r") as f:
        return json.load(f)

def save_json(obj, file):
    with open(file, "w") as f:
        json.dump(obj, f)

In [33]:
full_json = []
for source_dir in source_dir_list:
    source_json_path = os.path.join(source_root, source_dir, 'pvsg.json')
    json_file = load_json(source_json_path)
    for json_content in json_file:
        object_content = json_content['objects']
        sorted_object_content = sorted(object_content, key=lambda x: not x['is_thing'])
        json_content['objects'] = sorted_object_content
        full_json.append(json_content)
print('Finish merge, number of files: ', len(full_json))
save_json(full_json, os.path.join(target_root, 'pvsg.json'))

Finish merge, number of files:  287


### Regularize all objects

In [8]:
import os
full_json = load_json(os.path.join(target_root, 'pvsg.json'))

In [9]:
stuff_classes = ["floor", "ground", "grass", "rock", "tree", 
                 "ceiling", "wall", "snow", "sky", "water", "sand"]

In [10]:
# print wrong objects
for json_content in full_json:
    for object_dict in json_content['objects']:
        if object_dict['category'] in stuff_classes:
            if object_dict['is_thing']:
                print(json_content['video_id'], object_dict)
        else:
            if not object_dict['is_thing']:
                print(json_content['video_id'], object_dict)

1002_4060588783 {'object_id': 2, 'category': 'table', 'is_thing': False, 'status': []}
1018_6782372702 {'object_id': 1, 'category': 'shoe', 'is_thing': False, 'status': []}
1019_3217749642 {'object_id': 1, 'category': 'curtain', 'is_thing': False, 'status': []}
1101_5408448203 {'object_id': 1, 'category': 'carpet', 'is_thing': False, 'status': []}
1010_5424217420 {'object_id': 2, 'category': 'child', 'is_thing': False, 'status': []}
1006_4533035526 {'object_id': 4, 'category': 'ballon', 'is_thing': False, 'status': []}
0089_7180987582 {'object_id': 7, 'category': 'car', 'is_thing': False, 'status': []}
0006_2889117240 {'object_id': 2, 'category': 'box', 'is_thing': False, 'status': []}
1008_3243801466 {'object_id': 1, 'category': 'table', 'is_thing': False, 'status': []}
1008_4294147314 {'object_id': 1, 'category': 'baby', 'is_thing': False, 'status': []}
1008_5635020430 {'object_id': 3, 'category': 'camera', 'is_thing': False, 'status': []}
1009_12760412393 {'object_id': 1, 'category'

In [11]:
# modify wrong objects
for json_content in full_json:
    for i, object_dict in enumerate(json_content['objects']):
        if object_dict['category'] in stuff_classes:
            json_content['objects'][i]['is_thing'] = False
        else:
            json_content['objects'][i]['is_thing'] = True

In [20]:
# list all classes
category_list = []

for json_content in full_json:
    for object_dict in json_content['objects']:
        category_list.append(object_dict['category'])

all_category = set(category_list)
all_stuff = set(stuff_classes)
all_object = all_category - all_stuff
print('Object: ', len(all_object))
print('Stuff: ', len(all_stuff))
print('All: ', len(all_category))

Object:  94
Stuff:  11
All:  105


In [28]:
all_object_list = sorted(list(all_object))
print('Object', all_object_list)
all_stuff_list = sorted(list(all_stuff))
print('Stuff', all_stuff_list)

Object ['adult', 'baby', 'bag', 'ball', 'ballon', 'basket', 'bat', 'bed', 'bench', 'bike', 'bird', 'blanket', 'board', 'book', 'bottle', 'bowl', 'box', 'bread', 'bucket', 'cabinet', 'cake', 'camera', 'can', 'candle', 'car', 'card', 'carpet', 'cart', 'cat', 'cellphone', 'chair', 'child', 'cloth', 'computer', 'cookie', 'countertop', 'cup', 'curtain', 'dog', 'door', 'drawer', 'dustbin', 'fan', 'fence', 'flower', 'fork', 'fridge', 'fruit', 'gift', 'glass', 'glasses', 'guitar', 'hat', 'helmet', 'horse', 'iron', 'knife', 'light', 'lighter', 'mat', 'meat', 'microphone', 'mop', 'net', 'others', 'oven', 'paper', 'piano', 'pillow', 'pizza', 'plant', 'plate', 'racket', 'rag', 'ring', 'scissor', 'shelf', 'shoe', 'sink', 'slide', 'sofa', 'sponge', 'spoon', 'stairs', 'stand', 'stove', 'table', 'teapot', 'towel', 'toy', 'tray', 'tv', 'vegetable', 'window']
Stuff ['ceiling', 'floor', 'grass', 'ground', 'rock', 'sand', 'sky', 'snow', 'tree', 'wall', 'water']


In [21]:
save_json(full_json, os.path.join(target_root, 'pvsg.json'))

In [22]:
video_list = [json_content['video_id'] for json_content in full_json]

In [24]:
len(video_list)

287

In [26]:
train_list = video_list[:-20]
test_list = video_list[-20:]

In [30]:
pvsg_json = {
    'train': train_list,
    'val': test_list,
    'object': all_object_list,
    'stuff': all_stuff_list,
    'data': full_json,
}
save_json(pvsg_json, os.path.join(target_root, 'pvsg_data.json'))