In [1]:
import ConfigArgs as args
import torchvision
from pycocotools import mask as coco_mask
from tqdm import tqdm
import json
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [16]:
class ConvertCocoPolysToMask(object):
    def __init__(self, return_masks=False):
        self.return_masks = return_masks

    def __call__(self, image, target):
        w, h = image.size

        image_id = target["image_id"]
        image_id = torch.tensor([image_id])

        anno = target["annotations"]
        classes = [obj["category_id"] for obj in anno]
        classes = torch.tensor(classes, dtype=torch.int64)

        # TODO add relation gt in the target
        rel_annotations = target['rel_annotations']
        triplet_target_promt = target['triplet_target_promt']
        triplet_target = target['triplet_target']
        rel_labels = target['rel_labels']

        target["image_id"] = image_id
        # TODO add relation gt in the target
        target['rel_annotations'] = torch.tensor(rel_annotations)
        # target['triplet_target'] = triplet_target
        # target['rel_labels'] = rel_labels
        # target['triplet_target_promt'] = triplet_target_promt

        return image, target
    
class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder, ann_file, transforms, return_masks):
        super(CocoDetection, self).__init__(img_folder, ann_file)
        self._transforms = transforms
        self.prepare = ConvertCocoPolysToMask(return_masks)

        #TODO load relationship
        with open('/'.join(ann_file.split('/')[:-1])+'/rel.json', 'r') as f:
            all_rels = json.load(f)

        with open('/home/duypd/ThisPC-DuyPC/SG-Retrieval/Datasets/VisualGenome/categories.json', 'r') as f:
            categories = json.load(f)
        if 'train' in ann_file:
            self.rel_annotations = all_rels['train']
        elif 'val' in ann_file:
            self.rel_annotations = all_rels['val']
        else:
            self.rel_annotations = all_rels['test']

        self.rel_categories = all_rels['rel_categories']
        self.categories = categories['categories']
        

    def __getitem__(self, idx):
        img, target = super(CocoDetection, self).__getitem__(idx)
        image_id = self.ids[idx]
        rel_target = self.rel_annotations[str(image_id)]
        triplets_txt_promt = []
        triplets_txt = []
        rel_labels = []
        for item in rel_target:
            rel_txt = self.rel_categories[item[2]]
            sub = self.categories[target[item[0]]['category_id'] - 1]['name']
            obj = self.categories[target[item[1]]['category_id'] - 1]['name']
            rel_labels.append(rel_txt)
            triplets_txt_promt.append(sub + ' ' + '[MASK]' + ' ' + obj)
            triplets_txt.append(sub + ' ' + rel_txt + ' ' + obj)

        target = {
            'image_id': image_id, 
            'annotations': {
                # 'rel_annotations': rel_target, 
                'rel_labels': rel_labels,
                # 'triplet_target_promt': triplets_txt_promt,
                'triplet_target': triplets_txt
            }
        }

        # img, target = self.prepare(img, target)
        # if self._transforms is not None:
        #     img, target = self._transforms(img, target)
        # return img, target # triplets_txt_promt,
        # return img,triplets_txt, target # triplets_txt_promt,
        return target # triplets_txt_promt,

In [17]:
img_folder = '/home/duypd/ThisPC-DuyPC/SG-Retrieval/Datasets/VisualGenome/VG_100K'
ann_file = '/home/duypd/ThisPC-DuyPC/SG-Retrieval/Datasets/VisualGenome/anno_reltr/train.json'
dataset = CocoDetection(img_folder, ann_file, transforms=None, return_masks=False)
len(dataset)

loading annotations into memory...
Done (t=1.58s)
creating index...
index created!


57723

In [21]:
tgt = []
for idx in range(len(dataset)):
    # tgt = dataset.__getitem__(idx)
    tgt.append(dataset.__getitem__(idx))


In [18]:
target = dataset.__getitem__(3012)

In [26]:
with open('/home/duypd/ThisPC-DuyPC/SG-Retrieval/Datasets/VisualGenome/retrieval.json', 'w') as f:
    json.dump(tgt, f)


In [24]:
len(tgt)

57723

In [17]:
with open('/home/duypd/ThisPC-DuyPC/SG-Retrieval/Datasets/VisualGenome/anno_reltr/rel.json', 'r') as f:
    all_rels = json.load(f)

with open('/home/duypd/ThisPC-DuyPC/SG-Retrieval/Datasets/VisualGenome/categories.json', 'r') as f:
    categories = json.load(f)

with open('/home/duypd/ThisPC-DuyPC/SG-Retrieval/Datasets/VisualGenome/anno_reltr/train.json', 'r') as f:
    train = json.load(f)

rel_annotations = all_rels['train']

rel_categories = all_rels['rel_categories']
categories = categories['categories']

In [27]:
def create_ground_truth_for_triplet(data, query_triplet):
    ground_truth_images = []
    
    # Lặp qua từng phần tử của bộ dữ liệu
    for item in data:
        image_id = item['image_id']
        triplet_targets = item['annotations']['triplet_target']
        
        # Kiểm tra xem query_triplet có trong danh sách triplet_target hay không
        if query_triplet in triplet_targets:
            ground_truth_images.append(image_id)
    
    return ground_truth_images

In [38]:
query_triplet = "train on street"

# Tạo danh sách ground truth cho triplet này
ground_truth_images = create_ground_truth_for_triplet(tgt, query_triplet)

print(f"Số lượng hình ảnh chứa triplet '{query_triplet}': {len(ground_truth_images)}")
print(f"Image IDs: {ground_truth_images}")

Số lượng hình ảnh chứa triplet 'train on street': 10
Image IDs: [2347308, 2348659, 2352843, 2357116, 2357284, 2357866, 2361213, 2370486, 2383858, 2399020]
