In [1]:
import json
import os
import random

# should move to dataset path
dataset = "scannetpp"
os.chdir(f"/mnt/bn/nlhei-nas/liubangya/proj/vlm/datasets/{dataset}")
DS_ROOT = f"/mnt/bn/nlhei-nas/liubangya/proj/vlm/datasets/{dataset}/structured-data"
TEST_SPLIT = 0.2
CAP_FILE = f"/mnt/bn/nlhei-nas/liubangya/proj/vlm/datasets/{dataset}/captions_all.yaml"
grid_cfg_file = f"/mnt/bn/nlhei-nas/liubangya/proj/vlm/QA/grid_cfg_{dataset}.json"

# function pool here
from templates.filter import *
from templates.func import *
from templates.QA import QADataset

def scene_valid_fn(sc_name):
    scene_path = os.path.join(DS_ROOT, sc_name)
    finished_flag = "finished.flag"
    finished_flag_path = os.path.join(scene_path, finished_flag)
    return os.path.exists(finished_flag_path)

random.seed(0)
# myCap = Captioner(CAP_FILE)
myCap = None
ds = QADataset(DS_ROOT, myCap)


  0%|          | 0/301 [00:00<?, ?it/s]

Found 301 scenes in /mnt/bn/nlhei-nas/liubangya/proj/vlm/datasets/scannetpp/structured-data


In [8]:
%load_ext autoreload
%autoreload 2
from templates.task.tracking_tasks import TrackingTasks


with open(grid_cfg_file, "r") as f:
    grid_cfg = json.load(f)
H = grid_cfg["H"]
W = grid_cfg["W"]
patchsize_H = grid_cfg["patchsize_H"]
patchsize_W = grid_cfg["patchsize_W"]
n_frames = 2

tasks_cfg = {
    "total_QAs": 50000,
    "roi_frame_only": True,
    "H": H, "W": W, "patchsize_H": patchsize_H, "patchsize_W": patchsize_W,
    "motion_thres": n_frames, "num_frame": n_frames,
    "prefix": "scannetpp_new"
}


total_qas = tasks_cfg["total_QAs"]
H_grids = tasks_cfg["H"] // tasks_cfg["patchsize_H"]
W_grids = tasks_cfg["W"] // tasks_cfg["patchsize_W"]
prefix = tasks_cfg["prefix"]
# task_dir = f"/mnt/bn/nlhei-nas/liubangya/proj/vlm-found3d/tasks/grid_idx_{H_grids}x{W_grids}"
task_dir = f"/mnt/bn/nlhei-nas/liubangya/proj/vlm/workspace/image_{prefix}_{H_grids}x{W_grids}_{n_frames}"
OUTPUT_DIR = os.path.join(task_dir, "pairs")
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_QWEN = f"{OUTPUT_DIR}/QA_pairs_qwen.json"
OUTPUT_JSON = f"{OUTPUT_DIR}/QA_pairs.json"

task_obj = {
    os.path.basename(task_dir): {
        "train_qa": OUTPUT_QWEN.replace(".json", ".train.json"),
        "test_qa": OUTPUT_QWEN.replace(".json", ".test.json"),
        "train_qa_meta": OUTPUT_JSON.replace(".json", ".train.json"),
        "test_qa_meta": OUTPUT_JSON.replace(".json", ".test.json"),
    }
}
print(json.dumps(task_obj, indent=4))


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
{
    "image_scannetpp_new_16x24_2": {
        "train_qa": "/mnt/bn/nlhei-nas/liubangya/proj/vlm/workspace/image_scannetpp_new_16x24_2/pairs/QA_pairs_qwen.train.json",
        "test_qa": "/mnt/bn/nlhei-nas/liubangya/proj/vlm/workspace/image_scannetpp_new_16x24_2/pairs/QA_pairs_qwen.test.json",
        "train_qa_meta": "/mnt/bn/nlhei-nas/liubangya/proj/vlm/workspace/image_scannetpp_new_16x24_2/pairs/QA_pairs.train.json",
        "test_qa_meta": "/mnt/bn/nlhei-nas/liubangya/proj/vlm/workspace/image_scannetpp_new_16x24_2/pairs/QA_pairs.test.json"
    }
}


In [13]:

myfilter = filter_all(
    filter_visiblity,
    filter_area_fn(1e4, 4e5),
    black_list_fn([
            "movable_object.trafficcone",
            "movable_object.barrier",
            "door",
            "doorframe",
            "wall",
            "window",
            "table",
        ])
    )


taskset = TrackingTasks(
    captioner=myCap,
    basefilter=myfilter,
    cfg=tasks_cfg
)


qas, stats = taskset.produce(
    dataset=ds,
    num_qas=total_qas,
    verbose=False
)

print(f"total {len(qas)} qas")
print(f"stats: {json.dumps(stats, indent=2)}")

all_dumps = [qa.dump() for qa in qas]
content_stats = {
    "objs": set(),
    "scenes": set(),
}
for qa in all_dumps:
    content_stats["objs"].update(qa["objs"])
    content_stats["scenes"].update([qa["scene"]])
print(f"total objects: {len(content_stats['objs'])}")
print(f"total scenes: {len(content_stats['scenes'])}")



Generated 500 QAs, stats: {'obj_cross_frame_tracking': 500}
Generated 1000 QAs, stats: {'obj_cross_frame_tracking': 1000}
Generated 1500 QAs, stats: {'obj_cross_frame_tracking': 1500}
Generated 2000 QAs, stats: {'obj_cross_frame_tracking': 2000}
Generated 2500 QAs, stats: {'obj_cross_frame_tracking': 2500}
Generated 3000 QAs, stats: {'obj_cross_frame_tracking': 3000}
Generated 3500 QAs, stats: {'obj_cross_frame_tracking': 3500}
Generated 4000 QAs, stats: {'obj_cross_frame_tracking': 4000}
Generated 4500 QAs, stats: {'obj_cross_frame_tracking': 4500}
Generated 5000 QAs, stats: {'obj_cross_frame_tracking': 5000}
Generated 5500 QAs, stats: {'obj_cross_frame_tracking': 5500}
Generated 6000 QAs, stats: {'obj_cross_frame_tracking': 6000}
Generated 6500 QAs, stats: {'obj_cross_frame_tracking': 6500}
Generated 7000 QAs, stats: {'obj_cross_frame_tracking': 7000}
Generated 7500 QAs, stats: {'obj_cross_frame_tracking': 7500}
Generated 7500 QAs, stats: {'obj_cross_frame_tracking': 7500}
Generated 

In [14]:
# train test split
# adding idx
# process image idx

num_test = int(len(qas) * TEST_SPLIT)
qas_train = qas[:-num_test]
qas_test = qas[-num_test:]

all_frames = not taskset.cfg["roi_frame_only"]

with open(OUTPUT_QWEN.replace(".", ".test."), "w") as f:
    qas_dumps = [qa.qwen_format(all_frames=all_frames) for qa in qas_test]
    for i, qa in enumerate(qas_dumps):
        qa["id"] = i
    json.dump(
        qas_dumps, f, indent=2
    )
with open(OUTPUT_QWEN.replace(".", ".train."), "w") as f:
    qas_dumps = [qa.qwen_format(all_frames=all_frames) for qa in qas_train]
    for i, qa in enumerate(qas_dumps):
        qa["id"] = i
    json.dump(
        qas_dumps, f, indent=2
    )
with open(OUTPUT_JSON.replace(".", ".test."), "w") as f:
    qas_dumps = [qa.dump() for qa in qas_test]
    for i, qa in enumerate(qas_dumps):
        qa["id"] = i
    json.dump(qas_dumps, f, indent=2)
with open(OUTPUT_JSON.replace(".", ".train."), "w") as f:
    qas_dumps = [qa.dump() for qa in qas_train]
    for i, qa in enumerate(qas_dumps):
        qa["id"] = i
    json.dump(qas_dumps, f, indent=2)
