In [2]:
import os
import json

import os.path as osp
os.chdir(osp.dirname(os.getcwd()))



In [3]:

import pandas as pd 

from collections import Counter

import numpy as np

from loguru import logger
'''
description:  获取SR3D 作者划分好的 训练集 和测试集
param {*} split
return {*}
'''
def get_split_list(dataset_name="sr3d" ,split='train'):
    with open('data/meta_data/%s_%s_scans.txt' % (dataset_name,split)) as f:
        scan_ids = set(eval(f.read()))
    logger.info(f" len of {dataset_name,split} : {len(scan_ids)}")
    return scan_ids





'''
description:  从SR3D作者那获取 详细的标注数据
param {*} split_name
return {*}
'''
def get_refer_it_3D(split_name='sr3d'):
    scanrefer_root="datasets/refer_it_3d"
    data = pd.read_csv(osp.join(scanrefer_root,split_name+".csv"))
    
    logger.info(f"len of {split_name} : {data.shape[0]}")
    all_attrs = data.columns
    logger.info(f" column of {split_name} : {all_attrs}")
    logger.info(f"scene number : {len(set(data['scan_id']))}")

    stat = Counter(data['scan_id'])

    scane_stat = np.array([v for k ,v in stat.items()])
    avg_sample =scane_stat.mean()
    min_sample =scane_stat.min()
    max_sample =scane_stat.max()


    logger.info(f"min sample: {min_sample} \n max sample : {max_sample} \n avg sample: {avg_sample}")

    
    print(data.iloc[0,:])
      



def generate_NR3D_labeled_scene_txt(labeled_ratio):
    nr3d_ids = get_split_list(dataset_name='nr3d')

    num_scans = len(nr3d_ids)
    num_labeled_scans = int(num_scans*labeled_ratio)


    choices = np.random.choice(num_scans, num_labeled_scans, replace=False)#* 从num_scans 挑选num_labeled_scans 个场景 出来 

    labeled_scan_names = list(np.array(list(nr3d_ids))[choices])
    
    with open(os.path.join('data/meta_data/nr3d_train_{}.txt'.format(labeled_ratio)), 'w') as f:
        f.write('\n'.join(labeled_scan_names))
    
    logger.info('\tSelected {} labeled scans, remained {} unlabeled scans'.format(len(labeled_scan_names),num_scans- len(labeled_scan_names)))


    
    
sr3d_ids = get_split_list()
sr3d_ids_test = get_split_list(split = 'test')


nr3d_ids = get_split_list(dataset_name='nr3d')
nr3d_ids_test = get_split_list(dataset_name='nr3d',split = 'test')



# get_refer_it_3D('sr3d+')
# get_refer_it_3D('sr3d')
# get_refer_it_3D('sr3d_test')
# get_refer_it_3D('sr3d_train')
# get_refer_it_3D('sr3d_train')
get_refer_it_3D('nr3d')


2022-10-14 19:01:31.134 | INFO     | __main__:get_split_list:16 -  len of ('sr3d', 'train') : 1018
2022-10-14 19:01:31.136 | INFO     | __main__:get_split_list:16 -  len of ('sr3d', 'test') : 255
2022-10-14 19:01:31.137 | INFO     | __main__:get_split_list:16 -  len of ('nr3d', 'train') : 511
2022-10-14 19:01:31.138 | INFO     | __main__:get_split_list:16 -  len of ('nr3d', 'test') : 130
2022-10-14 19:01:31.276 | INFO     | __main__:get_refer_it_3D:32 - len of nr3d : 41503
2022-10-14 19:01:31.277 | INFO     | __main__:get_refer_it_3D:34 -  column of nr3d : Index(['assignmentid', 'stimulus_id', 'utterance', 'correct_guess',
       'speaker_id', 'listener_id', 'scan_id', 'instance_type', 'target_id',
       'tokens', 'dataset', 'mentions_target_class', 'uses_object_lang',
       'uses_spatial_lang', 'uses_color_lang', 'uses_shape_lang'],
      dtype='object')
2022-10-14 19:01:31.311 | INFO     | __main__:get_refer_it_3D:35 - scene number : 641
2022-10-14 19:01:31.319 | INFO     | __main_

assignmentid                                                         32618
stimulus_id                             scene0525_00-plant-5-9-10-11-12-62
utterance                The plant at the far right hand side of the bo...
correct_guess                                                         True
speaker_id                                                              47
listener_id                                                            310
scan_id                                                       scene0525_00
instance_type                                                        plant
target_id                                                                9
tokens                   ['the', 'plant', 'at', 'the', 'far', 'right', ...
dataset                                                               nr3d
mentions_target_class                                                 True
uses_object_lang                                                      True
uses_spatial_lang        

In [69]:



for x in np.linspace(0.1,0.9,9):
    generate_NR3D_labeled_scene_txt(round(x,1))


In [29]:

def get_scanrefer(split=None):
    if split is not None :
        path = "datasets/scanrefer/ScanRefer_filtered_%s.json"%split
    else :
        path = "datasets/scanrefer/ScanRefer_filtered.json"

    with open (path,'r')as f :
        data =json.load(f)
    
    length = len(data)
    logger.info(f" len of {split} split : {length}")
    all_scene = set([x['scene_id']  for x in data])
    logger.info(f" scene number  of {split} split : {len(all_scene)}")

    all_object_id = set([x['object_id']  for x in data])
    logger.info(f" object number  of {split} split : {len(all_object_id)}")

    all_anno_id = set([x['ann_id']  for x in data])
    logger.info(f" anno number  of {split} split : {len(all_anno_id)}")

    print(data[0])
    
    return all_scene



def generate_scanrefer_labeled_scene_txt(labeled_ratio):
    all_scenes = get_scanrefer(split='train')

    num_scans = len(all_scenes)
    num_labeled_scans = int(num_scans*labeled_ratio)


    choices = np.random.choice(num_scans, num_labeled_scans, replace=False)#* 从num_scans 挑选num_labeled_scans 个场景 出来 

    labeled_scan_names = list(np.array(list(all_scenes))[choices])
    
    with open(os.path.join('datasets/scanrefer/ScanRefer_filtered_train_{}.txt'.format(labeled_ratio)), 'w') as f:
        f.write('\n'.join(labeled_scan_names))
    
    logger.info('\tSelected {} labeled scans, remained {} unlabeled scans'.format(len(labeled_scan_names),num_scans- len(labeled_scan_names)))


    
    
        


# get_scanrefer(split='val')
# get_scanrefer()



In [30]:

for x in np.linspace(0.1,0.9,9):
    generate_scanrefer_labeled_scene_txt(round(x,1))

2022-10-07 16:06:36.774 | INFO     | __main__:get_scanrefer:11 -  len of train split : 36665
2022-10-07 16:06:36.791 | INFO     | __main__:get_scanrefer:13 -  scene number  of train split : 562
2022-10-07 16:06:36.802 | INFO     | __main__:get_scanrefer:16 -  object number  of train split : 87
2022-10-07 16:06:36.806 | INFO     | __main__:get_scanrefer:19 -  anno number  of train split : 5
2022-10-07 16:06:36.835 | INFO     | __main__:generate_scanrefer_labeled_scene_txt:41 - 	Selected 56 labeled scans, remained 506 unlabeled scans
2022-10-07 16:06:36.993 | INFO     | __main__:get_scanrefer:11 -  len of train split : 36665
2022-10-07 16:06:37.009 | INFO     | __main__:get_scanrefer:13 -  scene number  of train split : 562
2022-10-07 16:06:37.020 | INFO     | __main__:get_scanrefer:16 -  object number  of train split : 87
2022-10-07 16:06:37.025 | INFO     | __main__:get_scanrefer:19 -  anno number  of train split : 5


{'scene_id': 'scene0000_00', 'object_id': '39', 'object_name': 'cabinet', 'ann_id': '1', 'description': 'a white cabinet in the corner of the room. in the direction from the door and from the inside . it will be on the left, there is a small brown table on the left side of the cabinet and a smaller table on the right side of the cabinet', 'token': ['a', 'white', 'cabinet', 'in', 'the', 'corner', 'of', 'the', 'room', '.', 'in', 'the', 'direction', 'from', 'the', 'door', 'and', 'from', 'the', 'inside', '.', 'it', 'will', 'be', 'on', 'the', 'left', ',', 'there', 'is', 'a', 'small', 'brown', 'table', 'on', 'the', 'left', 'side', 'of', 'the', 'cabinet', 'and', 'a', 'smaller', 'table', 'on', 'the', 'right', 'side', 'of', 'the', 'cabinet']}
{'scene_id': 'scene0000_00', 'object_id': '39', 'object_name': 'cabinet', 'ann_id': '1', 'description': 'a white cabinet in the corner of the room. in the direction from the door and from the inside . it will be on the left, there is a small brown table on

2022-10-07 16:06:37.053 | INFO     | __main__:generate_scanrefer_labeled_scene_txt:41 - 	Selected 112 labeled scans, remained 450 unlabeled scans
2022-10-07 16:06:37.272 | INFO     | __main__:get_scanrefer:11 -  len of train split : 36665
2022-10-07 16:06:37.288 | INFO     | __main__:get_scanrefer:13 -  scene number  of train split : 562
2022-10-07 16:06:37.298 | INFO     | __main__:get_scanrefer:16 -  object number  of train split : 87
2022-10-07 16:06:37.303 | INFO     | __main__:get_scanrefer:19 -  anno number  of train split : 5
2022-10-07 16:06:37.331 | INFO     | __main__:generate_scanrefer_labeled_scene_txt:41 - 	Selected 168 labeled scans, remained 394 unlabeled scans


{'scene_id': 'scene0000_00', 'object_id': '39', 'object_name': 'cabinet', 'ann_id': '1', 'description': 'a white cabinet in the corner of the room. in the direction from the door and from the inside . it will be on the left, there is a small brown table on the left side of the cabinet and a smaller table on the right side of the cabinet', 'token': ['a', 'white', 'cabinet', 'in', 'the', 'corner', 'of', 'the', 'room', '.', 'in', 'the', 'direction', 'from', 'the', 'door', 'and', 'from', 'the', 'inside', '.', 'it', 'will', 'be', 'on', 'the', 'left', ',', 'there', 'is', 'a', 'small', 'brown', 'table', 'on', 'the', 'left', 'side', 'of', 'the', 'cabinet', 'and', 'a', 'smaller', 'table', 'on', 'the', 'right', 'side', 'of', 'the', 'cabinet']}


2022-10-07 16:06:37.564 | INFO     | __main__:get_scanrefer:11 -  len of train split : 36665
2022-10-07 16:06:37.579 | INFO     | __main__:get_scanrefer:13 -  scene number  of train split : 562
2022-10-07 16:06:37.590 | INFO     | __main__:get_scanrefer:16 -  object number  of train split : 87
2022-10-07 16:06:37.595 | INFO     | __main__:get_scanrefer:19 -  anno number  of train split : 5
2022-10-07 16:06:37.623 | INFO     | __main__:generate_scanrefer_labeled_scene_txt:41 - 	Selected 224 labeled scans, remained 338 unlabeled scans


{'scene_id': 'scene0000_00', 'object_id': '39', 'object_name': 'cabinet', 'ann_id': '1', 'description': 'a white cabinet in the corner of the room. in the direction from the door and from the inside . it will be on the left, there is a small brown table on the left side of the cabinet and a smaller table on the right side of the cabinet', 'token': ['a', 'white', 'cabinet', 'in', 'the', 'corner', 'of', 'the', 'room', '.', 'in', 'the', 'direction', 'from', 'the', 'door', 'and', 'from', 'the', 'inside', '.', 'it', 'will', 'be', 'on', 'the', 'left', ',', 'there', 'is', 'a', 'small', 'brown', 'table', 'on', 'the', 'left', 'side', 'of', 'the', 'cabinet', 'and', 'a', 'smaller', 'table', 'on', 'the', 'right', 'side', 'of', 'the', 'cabinet']}


2022-10-07 16:06:37.865 | INFO     | __main__:get_scanrefer:11 -  len of train split : 36665
2022-10-07 16:06:37.881 | INFO     | __main__:get_scanrefer:13 -  scene number  of train split : 562
2022-10-07 16:06:37.892 | INFO     | __main__:get_scanrefer:16 -  object number  of train split : 87
2022-10-07 16:06:37.897 | INFO     | __main__:get_scanrefer:19 -  anno number  of train split : 5
2022-10-07 16:06:37.925 | INFO     | __main__:generate_scanrefer_labeled_scene_txt:41 - 	Selected 281 labeled scans, remained 281 unlabeled scans
2022-10-07 16:06:38.084 | INFO     | __main__:get_scanrefer:11 -  len of train split : 36665
2022-10-07 16:06:38.100 | INFO     | __main__:get_scanrefer:13 -  scene number  of train split : 562
2022-10-07 16:06:38.111 | INFO     | __main__:get_scanrefer:16 -  object number  of train split : 87
2022-10-07 16:06:38.116 | INFO     | __main__:get_scanrefer:19 -  anno number  of train split : 5


{'scene_id': 'scene0000_00', 'object_id': '39', 'object_name': 'cabinet', 'ann_id': '1', 'description': 'a white cabinet in the corner of the room. in the direction from the door and from the inside . it will be on the left, there is a small brown table on the left side of the cabinet and a smaller table on the right side of the cabinet', 'token': ['a', 'white', 'cabinet', 'in', 'the', 'corner', 'of', 'the', 'room', '.', 'in', 'the', 'direction', 'from', 'the', 'door', 'and', 'from', 'the', 'inside', '.', 'it', 'will', 'be', 'on', 'the', 'left', ',', 'there', 'is', 'a', 'small', 'brown', 'table', 'on', 'the', 'left', 'side', 'of', 'the', 'cabinet', 'and', 'a', 'smaller', 'table', 'on', 'the', 'right', 'side', 'of', 'the', 'cabinet']}
{'scene_id': 'scene0000_00', 'object_id': '39', 'object_name': 'cabinet', 'ann_id': '1', 'description': 'a white cabinet in the corner of the room. in the direction from the door and from the inside . it will be on the left, there is a small brown table on

2022-10-07 16:06:38.144 | INFO     | __main__:generate_scanrefer_labeled_scene_txt:41 - 	Selected 337 labeled scans, remained 225 unlabeled scans
2022-10-07 16:06:38.355 | INFO     | __main__:get_scanrefer:11 -  len of train split : 36665
2022-10-07 16:06:38.371 | INFO     | __main__:get_scanrefer:13 -  scene number  of train split : 562
2022-10-07 16:06:38.382 | INFO     | __main__:get_scanrefer:16 -  object number  of train split : 87
2022-10-07 16:06:38.387 | INFO     | __main__:get_scanrefer:19 -  anno number  of train split : 5
2022-10-07 16:06:38.415 | INFO     | __main__:generate_scanrefer_labeled_scene_txt:41 - 	Selected 393 labeled scans, remained 169 unlabeled scans


{'scene_id': 'scene0000_00', 'object_id': '39', 'object_name': 'cabinet', 'ann_id': '1', 'description': 'a white cabinet in the corner of the room. in the direction from the door and from the inside . it will be on the left, there is a small brown table on the left side of the cabinet and a smaller table on the right side of the cabinet', 'token': ['a', 'white', 'cabinet', 'in', 'the', 'corner', 'of', 'the', 'room', '.', 'in', 'the', 'direction', 'from', 'the', 'door', 'and', 'from', 'the', 'inside', '.', 'it', 'will', 'be', 'on', 'the', 'left', ',', 'there', 'is', 'a', 'small', 'brown', 'table', 'on', 'the', 'left', 'side', 'of', 'the', 'cabinet', 'and', 'a', 'smaller', 'table', 'on', 'the', 'right', 'side', 'of', 'the', 'cabinet']}


2022-10-07 16:06:38.640 | INFO     | __main__:get_scanrefer:11 -  len of train split : 36665
2022-10-07 16:06:38.656 | INFO     | __main__:get_scanrefer:13 -  scene number  of train split : 562
2022-10-07 16:06:38.667 | INFO     | __main__:get_scanrefer:16 -  object number  of train split : 87
2022-10-07 16:06:38.671 | INFO     | __main__:get_scanrefer:19 -  anno number  of train split : 5
2022-10-07 16:06:38.700 | INFO     | __main__:generate_scanrefer_labeled_scene_txt:41 - 	Selected 449 labeled scans, remained 113 unlabeled scans


{'scene_id': 'scene0000_00', 'object_id': '39', 'object_name': 'cabinet', 'ann_id': '1', 'description': 'a white cabinet in the corner of the room. in the direction from the door and from the inside . it will be on the left, there is a small brown table on the left side of the cabinet and a smaller table on the right side of the cabinet', 'token': ['a', 'white', 'cabinet', 'in', 'the', 'corner', 'of', 'the', 'room', '.', 'in', 'the', 'direction', 'from', 'the', 'door', 'and', 'from', 'the', 'inside', '.', 'it', 'will', 'be', 'on', 'the', 'left', ',', 'there', 'is', 'a', 'small', 'brown', 'table', 'on', 'the', 'left', 'side', 'of', 'the', 'cabinet', 'and', 'a', 'smaller', 'table', 'on', 'the', 'right', 'side', 'of', 'the', 'cabinet']}


2022-10-07 16:06:38.938 | INFO     | __main__:get_scanrefer:11 -  len of train split : 36665
2022-10-07 16:06:38.954 | INFO     | __main__:get_scanrefer:13 -  scene number  of train split : 562
2022-10-07 16:06:38.965 | INFO     | __main__:get_scanrefer:16 -  object number  of train split : 87
2022-10-07 16:06:38.969 | INFO     | __main__:get_scanrefer:19 -  anno number  of train split : 5
2022-10-07 16:06:38.998 | INFO     | __main__:generate_scanrefer_labeled_scene_txt:41 - 	Selected 505 labeled scans, remained 57 unlabeled scans


{'scene_id': 'scene0000_00', 'object_id': '39', 'object_name': 'cabinet', 'ann_id': '1', 'description': 'a white cabinet in the corner of the room. in the direction from the door and from the inside . it will be on the left, there is a small brown table on the left side of the cabinet and a smaller table on the right side of the cabinet', 'token': ['a', 'white', 'cabinet', 'in', 'the', 'corner', 'of', 'the', 'room', '.', 'in', 'the', 'direction', 'from', 'the', 'door', 'and', 'from', 'the', 'inside', '.', 'it', 'will', 'be', 'on', 'the', 'left', ',', 'there', 'is', 'a', 'small', 'brown', 'table', 'on', 'the', 'left', 'side', 'of', 'the', 'cabinet', 'and', 'a', 'smaller', 'table', 'on', 'the', 'right', 'side', 'of', 'the', 'cabinet']}
