In [41]:
import pycocotools.mask as mask_utils
import numpy as np
from itertools import groupby
import os
from pathlib import Path
from PIL import Image
from collections import defaultdict
from tqdm import tqdm
import json
import pickle

import sys
sys.path.append("/mnt/lustre/jkyang/wxpeng/CVPR23/PVSG_Image")

def load_json(filepath):
    with open(filepath, "r") as f:
        return json.load(f)
def load_pickle(filepath):
    with open(filepath, "rb") as f:
        return pickle.load(f)

class PVSGAnnotation:
    def __init__(self, anno_file):
        with open(anno_file, "r") as f:
            anno = json.load(f)
            
        self.anno = anno
        videos = {}
        for video_anno in anno:
            videos[video_anno['video_id']] = video_anno
        self.videos = videos


    def __getitem__(self, vid):
        assert vid in self.videos
        return self.videos[vid]

In [168]:
label_path = "/mnt/lustre/jkyang/wxpeng/CVPR23/PVSG_Image/work_dirs/test_default_ckpt4_tracking_and_save_qf2/0028_4021064662/quantitive/masks.txt"
gt_masks_root = Path("/mnt/lustre/jkyang/wxpeng/CVPR23/PVSG_Image/data/pvsg_demo/train/masks")
vid = "0028_4021064662"
qf_root = Path("/mnt/lustre/jkyang/wxpeng/CVPR23/PVSG_Image/work_dirs/test_default_ckpt4_tracking_and_save_qf2")
gt = PVSGAnnotation("/mnt/lustre/jkyang/wxpeng/CVPR23/pvsg_data/pvsg_demo.json")

In [169]:
# ---------------------------------------------- load pred masks --------------------------------------------------
def load_pred_mask_tubes(label_path):
    # label_path: mask txt output from unitrack
    print("Loading pred mask tubes...", flush=True)
    labels = []
    results = []
    with open(label_path, 'r') as f:
        for line in f:
            labels.append(line.strip().split())

    for label in tqdm(labels):
        frame_id, track_id, _, h, w, m = label
        rle = {'size':(int(h),int(w)), 'counts':m}
        mask = mask_utils.decode(rle)
        results.append(dict(fid=frame_id, tid=track_id, mask=mask))
    
    def key_func(k):
        return k['tid']
    
    # sort data by 'tid' key.
    results = sorted(results, key=key_func)
    # group by tid
    masks_grp_by_tid = {}
    for key, value in groupby(results, key_func):
        masks_grp_by_tid[key] = list(value)
    return masks_grp_by_tid

# ------------------------------------------------- load gt masks ----------------------------------------------------
def read_pan_mask_next_frame(mask_tubes, pan_mask, cur_len, h, w):
    cur_ids = list(mask_tubes.keys())
    new_ids = list(np.unique(pan_mask))
    all_ids = list(set(cur_ids + new_ids))
    # for no mask frame -- all zeros
    dummy_mask = np.zeros((h,w))

    for instance_id in all_ids:
        if instance_id == 0:
            continue
        if instance_id not in new_ids: # this frame has no this object
            mask_tubes[instance_id].append(dummy_mask)
            continue
        if instance_id not in cur_ids: # this object first show up
            mask_tubes[instance_id].extend([dummy_mask for i in range(cur_len)])
            
        mask_tubes[instance_id].append((pan_mask == instance_id).astype(int))
    return mask_tubes

def load_gt_mask_tubes(gt_mask_path):
    print("Loading gt mask tubes...", flush=True)
    gt_pan_mask_paths = [str(x) for x in sorted(gt_mask_path.rglob("*.png"))]
    
    mask_tubes = defaultdict(list)
    cur_len = 0
    for mask_path in tqdm(gt_pan_mask_paths):
        pan_mask = np.array(Image.open(mask_path))
        h, w = pan_mask.shape
        mask_tubes = read_pan_mask_next_frame(mask_tubes, pan_mask, cur_len, h, w)
        cur_len += 1
    return mask_tubes


def check_has_none(mask_tube):
    return any(ele is None for ele in mask_tube)

# ---------------------------------------- match gt mask tube and pred mask tube -------------------------------------------------
def binaryMaskIOU(mask1, mask2):   
    mask1_area = np.count_nonzero(mask1 == 1)
    mask2_area = np.count_nonzero(mask2 == 1)
    intersection = np.count_nonzero(np.logical_and(mask1==1, mask2==1))
    iou = intersection/(mask1_area+mask2_area-intersection)
    return iou

def match_gt_pred_mask_tubes(pred_mask_tubes, gt_mask_tubes):
    print("Matching pred mask tubes with gt mask tubes...", flush=True)
    # init iou score for all pred mask tube
    assigned_labels = {tid: -1 for tid in pred_mask_tubes.keys()}
    iou_scores = {tid: -1 for tid in pred_mask_tubes.keys()}

    iou_thres = 0.85
    for tid, pred_mask_tube in tqdm(pred_mask_tubes.items()): # iterare all pred mask tubes
        gt_id = -1 # pred mask tube只会对应一个gt tube，但是可能对应的是part of it
        iou_score = -np.inf
        # compute viou with all gt mask tubes
        for gt_instance_id, gt_mask_tube in gt_mask_tubes.items():
            viou = 0.0
            count = 0.0
            for pred_mask_dict in pred_mask_tube: # iterate every frame in a pred mask tube
                fid = int(pred_mask_dict['fid']) - 1 # our gt starts from 0, but pred starts from 1, so need - 1
                pred_mask = pred_mask_dict['mask']
                gt_mask = gt_mask_tube[fid]
                iou = binaryMaskIOU(pred_mask, gt_mask)
                viou += iou
                count += 1
            viou = viou / count
            if viou >= iou_thres and viou > iou_score:
                gt_id = gt_instance_id
                iou_score = viou
        if gt_id != -1: # has some > 0.85
            iou_scores[tid] = iou_score # remember this highest iou score for this pred mask tube
            assigned_labels[tid] = gt_id # remember which part of gt is assigned to a pred(tid) - 这条gt_tube的dummy_id的部分被assign给当前pred mask tube

    return assigned_labels, iou_scores
        
       
# ----------------------- assign labels to qf tube pair (match qf and gt relation) ----------------------------------------------
# get filtered qf_tube out
def filter_qf_tubes(assigned_labels, qf_tube_obj_list):
    # take out those have assigned labels in gt
    qf_tubes_filtered = []
    for (tid, gt_id), qf_tube_obj in zip(assigned_labels.items(), qf_tube_obj_list):
        if gt_id == -1:
            continue
        qf_tubes_filtered.append(dict(tid=int(tid),
                                      gt_id=gt_id,
                                      qf_tube=qf_tube_obj.qf_tube))
    return qf_tubes_filtered

def pair_qf_tubes(qf_tubes_filtered):
    # pair to get true/false tube to indicate which part should have a label (both have qf tubes)
    # avoid pairing with itself (same gt_id)
    pairs = []
    for s in qf_tubes_filtered:
        for o in qf_tubes_filtered:
            if (s['tid'] == o['tid']) or (s['gt_id'] == o['gt_id']):
                continue
            s_indicator = np.array([True if ele is not None else False for ele in s['qf_tube']])
            o_indicator = np.array([True if ele is not None else False for ele in o['qf_tube']])
            pair_indicator = s_indicator * o_indicator
            # also get tubes here
            s_qf_tube = [x['query_feat'] if x is not None else None for x in s['qf_tube']]
            o_qf_tube = [x['query_feat'] if x is not None else None for x in o['qf_tube']]
            so_qf_tubes = [s_qf_tube, o_qf_tube]
            pairs.append(dict(so_tid=[s['tid'], o['tid']],
                              so_gt_id=[s['gt_id'], o['gt_id']],
                              so_qf_tubes=so_qf_tubes,
                              indicator=pair_indicator))
    return pairs
    

def assign_relation_label(gt_relations, pairs):
    # assigne label (label list) to every qf_tube pair in pairs
    print("Assigning labels to every qf tube pair...", flush=True)
    if len(pairs) == 0:
        return [], []
    
    labels = []
    pairs_filtered = []
    num_frames = len(pairs[0]['indicator'])
    for pair in tqdm(pairs):
        label_this_pair = [[] for i in range(num_frames)]
        s_gt_id, o_gt_id = pair['so_gt_id']
        indicator = pair['indicator']
        has_rel = False
        for gt_relation in gt_relations:
            if s_gt_id == gt_relation[0] and o_gt_id == gt_relation[1]:
                predicate = gt_relation[2]
                intervals = gt_relation[3]
                for interval in intervals: # one might have sevel time intervals for a relation
                    # remember our interval is not close on the right side but "range()" does! need + 1
                    start, end = interval[0], interval[1]
                    for i in range(start, end + 1):
                        if indicator[i]: # if also has a pair tube here
                            has_rel = True
                            label_this_pair[i].append(predicate)
        if has_rel:
            labels.append(dict(so_tid=pair['so_tid'],
                               so_gt_id=pair['so_gt_id'],
                               label=label_this_pair))
            pairs_filtered.append(pair['so_qf_tubes'])
    return pairs_filtered, labels
    
def get_labels_single_video(assigned_labels, qf_tube_obj_list, gt_relations):
    qf_tubes_filtered = filter_qf_tubes(assigned_labels, qf_tube_obj_list)
    pairs = pair_qf_tubes(qf_tubes_filtered)
    pairs_filtered, labels = assign_relation_label(gt_relations, pairs)
    return pairs_filtered, labels

# ---------------------------------------------- main ---------------------------------------------------------------------
save_root = Path("/mnt/lustre/jkyang/wxpeng/CVPR23/PVSG_Image/work_dirs/test_default_ckpt4_tracking_and_save_qf2")
gt_masks_root = Path("/mnt/lustre/jkyang/wxpeng/CVPR23/PVSG_Image/data/pvsg_demo/train/masks") 
gt = PVSGAnnotation("/mnt/lustre/jkyang/wxpeng/CVPR23/pvsg_data/pvsg_demo.json")
def postprocess_resutls_assign_labels_to_qf_tubes_single_video(vid):
    pred_mask_path = save_root / vid / "quantitive/masks.txt"
    pred_mask_tubes = load_pred_mask_tubes(pred_mask_path)
    
    gt_mask_path = gt_masks_root / vid
    gt_mask_tubes = get_gt_mask_tubes_one_video(gt_mask_path)
    
    assigned_labels, iou_scores = match_gt_pred_mask_tubes(pred_mask_tubes, gt_mask_tubes)
    
    qf_tube_obj_list = load_pickle(save_root / vid / "query_feats.pickle")
    gt_relations = gt[vid]['relations']
    pairs_filtered, labels = get_labels_single_video(assigned_labels, qf_tube_obj_list, gt_relations)
    return pairs_filtered, labels

In [170]:
pred_mask_path = save_root / vid / "quantitive/masks.txt"
pred_mask_tubes = load_pred_mask_tubes(pred_mask_path)

Loading pred mask tubes...


100%|███████████████████████████████████████| 381/381 [00:00<00:00, 2343.93it/s]


In [171]:
gt_mask_path = gt_masks_root / vid
gt_mask_tubes = load_gt_mask_tubes(gt_mask_path)

Loading gt mask tubes...


100%|███████████████████████████████████████████| 95/95 [00:01<00:00, 83.78it/s]


In [172]:
assigned_labels, iou_scores = match_gt_pred_mask_tubes(pred_mask_tubes, gt_mask_tubes)

Matching pred mask tubes with gt mask tubes...


100%|█████████████████████████████████████████████| 8/8 [00:01<00:00,  4.26it/s]


In [179]:
qf_tube_obj_list = load_pickle(qf_root / vid / "query_feats.pickle")
gt_relations = gt[vid]['relations']
qf_tubes_filtered = filter_qf_tubes(assigned_labels, qf_tube_obj_list)
pairs = pair_qf_tubes(qf_tubes_filtered)
pairs_filtered, labels = assign_relation_label(gt_relations, pairs)

Assigning labels to every qf tube pair...


100%|█████████████████████████████████████████| 10/10 [00:00<00:00, 5888.40it/s]


In [73]:
pairs_filtered, labels = postprocess_resutls_assign_labels_to_qf_tubes_single_video(vid)

Loading pred mask tubes...


100%|███████████████████████████████████████| 381/381 [00:00<00:00, 2438.36it/s]

Loading gt mask tubes...



100%|███████████████████████████████████████████| 95/95 [00:01<00:00, 92.68it/s]

Matching pred mask tubes with gt mask tubes...



100%|█████████████████████████████████████████████| 8/8 [00:01<00:00,  4.21it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 10/10 [00:00<00:00, 10849.21it/s]


### Datasets

In [198]:
import os
import copy
from pathlib import Path
from click import utils

from mmdet.datasets.builder import DATASETS


class QueryFeaturePairDataset:
    def __init__(self,
                 data_root="/mnt/lustre/jkyang/wxpeng/CVPR23/PVSG_Image/data/pvsg_demo",
                 split="train",
                 mode="random", # if a whole video in a batch
                 results_root="/mnt/lustre/jkyang/wxpeng/CVPR23/PVSG_Image/work_dirs/test_default_ckpt4_tracking_and_save_qf2",
                 anno_file_path="/mnt/lustre/jkyang/wxpeng/CVPR23/pvsg_data/pvsg_demo.json"):
        self.data_root = Path(data_root)
        self.split = split
        self.mode = mode
        self.gt_mask_root = self.data_root / split / "masks"
        self.results_root = Path(results_root) # results from train or test (different process)
        self.gt = PVSGAnnotation(anno_file_path)
        self.vids = os.listdir(self.gt_mask_root)

        self.data = []
        self.labels = []
        if split == "train":
            for vid in self.vids:
                print("Processing video {}".format(vid), flush=True)
                if self.gt[vid]['relations'] is None:
                    continue
                if vid == "1007_6631583821" or vid == "0046_11919433184": # this vid relation is not ready yet
                    continue
                pairs_filtered_this_video, labels_this_video = self.assign_labels_to_qf_tubes_single_video(vid)
                if len(labels_this_video) == 0:
                    continue
                if mode == "random": # one data is a pair in one frame
                    for tube_pair, tube_label in zip(pairs_filtered_this_video, labels_this_video):
                        for frame_s, frame_o, frame_label in zip(tube_pair[0], tube_pair[1], tube_label['label']):
                            if len(frame_label) == 0:
                                continue
                            self.data.append([frame_s, frame_o])
                            self.labels.append(frame_label)
                else: # one data is a pair tube in one whole video
                    for tube_pair, tube_label in zip(pairs_filtered_this_video, labels_this_video):
                        self.data.append(tube_pair)
                        self.labels.append(tube_label['label'])
        else:
            pass # TODO
                




    
    def assign_labels_to_qf_tubes_single_video(self, vid):
        pred_mask_path = self.results_root / vid / "quantitive/masks.txt"
        pred_mask_tubes = load_pred_mask_tubes(pred_mask_path)
        gt_mask_path = self.gt_mask_root / vid
        gt_mask_tubes = load_gt_mask_tubes(gt_mask_path)
        assigned_labels, iou_scores = match_gt_pred_mask_tubes(pred_mask_tubes, gt_mask_tubes)
        qf_tube_obj_list = load_pickle(self.results_root / vid / "query_feats.pickle")
        gt_relations = self.gt[vid]['relations']
        pairs_filtered, labels = get_labels_single_video(assigned_labels, qf_tube_obj_list, gt_relations)
        return pairs_filtered, labels

        
    def prepare_train_img(self, idx):
        return {'data': self.data[idx],
                'label': self.labels[idx]}

    def __getitem__(self, idx):
        if self.split == "train":
            return self.prepare_train_img(idx)
    def __len__(self):
        return len(self.data)




In [199]:
data = QueryFeaturePairDataset()

Processing video 1100_9117425466
Loading pred mask tubes...


100%|███████████████████████████████████████| 447/447 [00:00<00:00, 3359.62it/s]

Loading gt mask tubes...



100%|████████████████████████████████████████| 116/116 [00:00<00:00, 141.52it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 12/12 [00:01<00:00,  7.44it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 18/18 [00:00<00:00, 20627.72it/s]

Processing video 0057_7001078933
Loading pred mask tubes...



100%|███████████████████████████████████████| 401/401 [00:00<00:00, 3635.25it/s]

Loading gt mask tubes...



100%|████████████████████████████████████████| 106/106 [00:00<00:00, 111.10it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 17/17 [00:02<00:00,  7.23it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 70/70 [00:00<00:00, 34782.76it/s]

Processing video 0028_4021064662
Loading pred mask tubes...



100%|███████████████████████████████████████| 381/381 [00:00<00:00, 2822.48it/s]

Loading gt mask tubes...



100%|██████████████████████████████████████████| 95/95 [00:00<00:00, 111.31it/s]

Matching pred mask tubes with gt mask tubes...



100%|█████████████████████████████████████████████| 8/8 [00:01<00:00,  4.19it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 10/10 [00:00<00:00, 11683.30it/s]

Processing video 1007_6631583821
Processing video 1006_4580824633
Loading pred mask tubes...



100%|███████████████████████████████████████| 707/707 [00:00<00:00, 2921.55it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 203/203 [00:02<00:00, 99.70it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 14/14 [00:02<00:00,  5.48it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 26/26 [00:00<00:00, 15684.15it/s]

Processing video 0010_8610561401
Loading pred mask tubes...



100%|███████████████████████████████████████| 725/725 [00:00<00:00, 3482.19it/s]

Loading gt mask tubes...



100%|████████████████████████████████████████| 180/180 [00:01<00:00, 129.99it/s]

Matching pred mask tubes with gt mask tubes...



100%|█████████████████████████████████████████████| 6/6 [00:02<00:00,  2.78it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 12/12 [00:00<00:00, 12381.71it/s]

Processing video 1012_4024008346
Loading pred mask tubes...



100%|███████████████████████████████████████| 444/444 [00:00<00:00, 3694.23it/s]

Loading gt mask tubes...



100%|██████████████████████████████████████████| 98/98 [00:00<00:00, 121.54it/s]

Matching pred mask tubes with gt mask tubes...



100%|█████████████████████████████████████████████| 9/9 [00:01<00:00,  5.62it/s]


Assigning labels to every qf tube pair...


100%|████████████████████████████████████████| 28/28 [00:00<00:00, 46200.04it/s]

Processing video 1000_6828150903
Loading pred mask tubes...



100%|█████████████████████████████████████| 1199/1199 [00:00<00:00, 3733.58it/s]

Loading gt mask tubes...



100%|████████████████████████████████████████| 340/340 [00:03<00:00, 108.33it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 13/13 [00:04<00:00,  2.91it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 36/36 [00:00<00:00, 21857.98it/s]

Processing video 1124_9861436503
Loading pred mask tubes...



100%|█████████████████████████████████████| 1878/1878 [00:00<00:00, 3561.17it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 323/323 [00:03<00:00, 89.69it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 14/14 [00:10<00:00,  1.28it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 30/30 [00:00<00:00, 20777.60it/s]

Processing video 1015_4698622422
Loading pred mask tubes...



100%|███████████████████████████████████████| 829/829 [00:00<00:00, 2540.56it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 209/209 [00:02<00:00, 95.31it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 47/47 [00:04<00:00,  9.51it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 68/68 [00:00<00:00, 22930.75it/s]

Processing video 0046_11919433184
Processing video 1001_7007447516
Loading pred mask tubes...



100%|█████████████████████████████████████| 2504/2504 [00:00<00:00, 2833.46it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 418/418 [00:06<00:00, 62.68it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 48/48 [00:25<00:00,  1.90it/s]


Assigning labels to every qf tube pair...


100%|████████████████████████████████████████| 86/86 [00:00<00:00, 10329.91it/s]

Processing video 1002_5280626374
Loading pred mask tubes...



100%|███████████████████████████████████████| 909/909 [00:00<00:00, 2310.84it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 176/176 [00:02<00:00, 61.65it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 16/16 [00:05<00:00,  2.94it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 20/20 [00:00<00:00, 27369.03it/s]

Processing video 1021_4278168115
Loading pred mask tubes...



100%|█████████████████████████████████████| 2292/2292 [00:00<00:00, 2672.84it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 399/399 [00:06<00:00, 62.25it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 99/99 [00:22<00:00,  4.47it/s]

Assigning labels to every qf tube pair...



100%|██████████████████████████████████████| 112/112 [00:00<00:00, 24091.60it/s]

Processing video 1025_6244382586
Loading pred mask tubes...



100%|███████████████████████████████████████| 500/500 [00:00<00:00, 3106.14it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 103/103 [00:01<00:00, 93.22it/s]

Matching pred mask tubes with gt mask tubes...



100%|█████████████████████████████████████████████| 9/9 [00:02<00:00,  3.48it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 12/12 [00:00<00:00, 19776.68it/s]


In [218]:
len(data)

3758

In [220]:
data[0].keys()

dict_keys(['data', 'label'])

In [225]:
data[0]

{'data': [array([-1.35119677e+00, -7.66641349e-02,  3.63612384e-01, -4.74074274e-01,
          8.78877878e-01,  4.82602507e-01,  1.42951167e+00, -1.09497976e+00,
         -1.81490564e+00, -1.39861774e+00, -3.37105960e-01,  5.79884291e-01,
          3.68436337e-01,  6.71831250e-01, -3.01698148e-01, -2.25422883e+00,
         -1.16654110e+00, -7.18733072e-01,  7.62744844e-01, -9.02247488e-01,
         -6.74047291e-01, -1.09617257e+00, -1.47436988e+00,  2.53762037e-01,
          1.74886167e+00, -1.00636601e+00,  2.19042927e-01, -5.54755591e-02,
         -8.58831048e-01, -8.99135232e-01, -5.69001377e-01,  7.47522771e-01,
         -1.53397143e-01, -1.42662144e+00,  4.71043706e-01, -2.11648762e-01,
         -2.24184275e-01,  6.79252148e-01,  8.20285201e-01, -9.35805321e-01,
          5.99625781e-02, -1.15795076e+00, -1.16194993e-01,  7.23607242e-01,
         -2.58702904e-01, -1.43395531e+00, -4.90796149e-01,  3.04228097e-01,
          7.29733050e-01, -1.65820158e+00,  9.39160213e-02, -2.52273

In [223]:
len(data[0]['data'])

2

In [224]:
len(data[0]['data'][0])

256

In [211]:
data1 = QueryFeaturePairDataset(mode="whole")

Processing video 1100_9117425466
Loading pred mask tubes...


100%|███████████████████████████████████████| 447/447 [00:00<00:00, 3281.06it/s]

Loading gt mask tubes...



100%|████████████████████████████████████████| 116/116 [00:00<00:00, 153.84it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 12/12 [00:01<00:00,  7.31it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 18/18 [00:00<00:00, 17327.86it/s]

Processing video 0057_7001078933
Loading pred mask tubes...



100%|███████████████████████████████████████| 401/401 [00:00<00:00, 3587.51it/s]

Loading gt mask tubes...



100%|████████████████████████████████████████| 106/106 [00:00<00:00, 119.56it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 17/17 [00:02<00:00,  7.36it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 70/70 [00:00<00:00, 32399.17it/s]

Processing video 0028_4021064662
Loading pred mask tubes...



100%|███████████████████████████████████████| 381/381 [00:00<00:00, 2745.68it/s]

Loading gt mask tubes...



100%|██████████████████████████████████████████| 95/95 [00:00<00:00, 123.60it/s]

Matching pred mask tubes with gt mask tubes...



100%|█████████████████████████████████████████████| 8/8 [00:01<00:00,  4.22it/s]

Assigning labels to every qf tube pair...



100%|█████████████████████████████████████████| 10/10 [00:00<00:00, 9706.79it/s]

Processing video 1007_6631583821
Processing video 1006_4580824633
Loading pred mask tubes...



100%|███████████████████████████████████████| 707/707 [00:00<00:00, 2785.50it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 203/203 [00:02<00:00, 93.90it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 14/14 [00:02<00:00,  5.06it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 26/26 [00:00<00:00, 12552.01it/s]

Processing video 0010_8610561401
Loading pred mask tubes...



100%|███████████████████████████████████████| 725/725 [00:00<00:00, 3343.11it/s]

Loading gt mask tubes...



100%|████████████████████████████████████████| 180/180 [00:01<00:00, 124.44it/s]

Matching pred mask tubes with gt mask tubes...



100%|█████████████████████████████████████████████| 6/6 [00:02<00:00,  2.59it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 12/12 [00:00<00:00, 13548.22it/s]

Processing video 1012_4024008346
Loading pred mask tubes...



100%|███████████████████████████████████████| 444/444 [00:00<00:00, 3619.44it/s]

Loading gt mask tubes...



100%|██████████████████████████████████████████| 98/98 [00:00<00:00, 140.46it/s]

Matching pred mask tubes with gt mask tubes...



100%|█████████████████████████████████████████████| 9/9 [00:01<00:00,  5.49it/s]


Assigning labels to every qf tube pair...


100%|████████████████████████████████████████| 28/28 [00:00<00:00, 23255.55it/s]

Processing video 1000_6828150903
Loading pred mask tubes...



100%|█████████████████████████████████████| 1199/1199 [00:00<00:00, 3566.65it/s]

Loading gt mask tubes...



100%|████████████████████████████████████████| 340/340 [00:03<00:00, 106.51it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 13/13 [00:04<00:00,  2.69it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 36/36 [00:00<00:00, 19405.60it/s]

Processing video 1124_9861436503
Loading pred mask tubes...



100%|█████████████████████████████████████| 1878/1878 [00:00<00:00, 3618.73it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 323/323 [00:03<00:00, 92.59it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 14/14 [00:11<00:00,  1.22it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 30/30 [00:00<00:00, 16043.49it/s]

Processing video 1015_4698622422
Loading pred mask tubes...



100%|███████████████████████████████████████| 829/829 [00:00<00:00, 2613.77it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 209/209 [00:02<00:00, 92.27it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 47/47 [00:04<00:00,  9.60it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 68/68 [00:00<00:00, 16713.31it/s]

Processing video 0046_11919433184
Processing video 1001_7007447516
Loading pred mask tubes...



100%|█████████████████████████████████████| 2504/2504 [00:00<00:00, 2623.69it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 418/418 [00:06<00:00, 63.37it/s]


Matching pred mask tubes with gt mask tubes...


100%|███████████████████████████████████████████| 48/48 [00:24<00:00,  1.96it/s]


Assigning labels to every qf tube pair...


100%|████████████████████████████████████████| 86/86 [00:00<00:00, 11189.32it/s]

Processing video 1002_5280626374
Loading pred mask tubes...



100%|███████████████████████████████████████| 909/909 [00:00<00:00, 2266.48it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 176/176 [00:02<00:00, 63.47it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 16/16 [00:05<00:00,  3.00it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 20/20 [00:00<00:00, 25412.32it/s]

Processing video 1021_4278168115
Loading pred mask tubes...



100%|█████████████████████████████████████| 2292/2292 [00:00<00:00, 2632.16it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 399/399 [00:06<00:00, 60.19it/s]

Matching pred mask tubes with gt mask tubes...



100%|███████████████████████████████████████████| 99/99 [00:21<00:00,  4.67it/s]


Assigning labels to every qf tube pair...


100%|██████████████████████████████████████| 112/112 [00:00<00:00, 23893.09it/s]

Processing video 1025_6244382586
Loading pred mask tubes...



100%|████████████████████████████████████████| 500/500 [00:00<00:00, 906.51it/s]

Loading gt mask tubes...



100%|█████████████████████████████████████████| 103/103 [00:01<00:00, 99.06it/s]

Matching pred mask tubes with gt mask tubes...



100%|█████████████████████████████████████████████| 9/9 [00:02<00:00,  3.55it/s]

Assigning labels to every qf tube pair...



100%|████████████████████████████████████████| 12/12 [00:00<00:00, 19691.57it/s]


In [219]:
len(data1)

43

In [228]:
data1[0]['label']

[[],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 ['running to'],
 ['running to'],
 ['running to'],
 ['running to'],
 ['running to'],
 ['running to'],
 ['running to'],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 []]

### One-hot encoding relation labels

In [248]:
import pandas as pd

In [231]:
gt_anno = load_json("/mnt/lustre/jkyang/wxpeng/CVPR23/pvsg_data/pvsg_demo.json")
predicates = []
for anno in gt_anno:
    relations = anno['relations']
    if relations is None:
        continue
    for relation in relations:
        predicates.append(relation[2])
PREDICATES = sorted(list(set(predicates))) # sort by first letter

In [247]:
PREDICATES 

['behind',
 'caressing',
 'catching',
 'chasing',
 'falling on',
 'grabbing',
 'hitting',
 'holding',
 'in front of',
 'jumping to',
 'kicking',
 'kissing',
 'looking at',
 'next to',
 'on',
 'passing over',
 'picking',
 'playing with',
 'pulling',
 'pushing',
 'putting down',
 'riding',
 'riding on',
 'runinng to',
 'running on',
 'running to',
 'sitting on',
 'stading on',
 'standing on',
 'standng on',
 'thowing',
 'throwing',
 'touching',
 'walking',
 'walking on']

In [249]:
predicates_df = pd.DataFrame(PREDICATES)

In [244]:
def encode_predicate_label(predicate_list):
    for 

35

In [None]:
l = 