In [None]:
import numpy as np
import open3d as o3d
import clip
import torch
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from evaluation_vis_util import draw_plotly, create_bbox
from sklearn.cluster import DBSCAN
import os
from pathlib import Path
import json
from utils import find_clusters, ground_open_scene_embedding, get_transformation_matrix, is_label_unique, construct_bbox_corners, get_box3d_min_max, box3d_iou

In [None]:
root_directory = ''
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-L/14@336px", device=device)

In [None]:
def get_val_set():
    json_dict = {}
    # List of all subfolders and their files
    subfolders_files = [(dp, filenames) for dp, _, filenames in os.walk(root_directory)]

    # Dictionary comprehension to pick only the first JSON from each subfolder
    json_dict = {os.path.basename(dp): os.path.join(dp, filenames[0]) for dp, filenames in subfolders_files if any(fn.endswith('.json') for fn in filenames)}

    return json_dict

In [None]:
json_dict = get_val_set()

In [None]:
number = 0
for scene_name, json_path in json_dict.items():
    scene_path = json_dict[scene_name]
    with open(scene_path, 'r') as file:
        data = json.load(file)
    furnitures = data['objects']
    for furniture in furnitures:
        for idx, description in enumerate(furniture['description']):
            number += 1

In [None]:
acc_25 = 0
acc_50 = 0
acc_25_unique = 0
acc_50_unique = 0
acc_25_multiple = 0
acc_50_multiple = 0
list_iou = []
total_object = 0
total_unique_object = 0
total_multiple_object = 0
session_id_list = []
is_unique_list = []

result_dict ={
    'scene_name': list(),
    'description': list(),
    'centroid_list': list(),
    'extent_list': list(),
    'similarity_mean_list_list': list(),
    'ground truth': list()
}

In [None]:
for scene_name, json_path in json_dict.items():
    scene_path = json_dict[scene_name]
    with open(scene_path, 'r') as file:
        data = json.load(file)
    furnitures = data['objects']
    alignment_matrix = Path("/workspace/chat-with-nerf-eval/data/scannet/scans")
    specific_file_path = alignment_matrix / scene_name / (scene_name + ".txt")
    axisAlignment_matrix = get_transformation_matrix(specific_file_path)
    
    mesh = o3d.io.read_triangle_mesh(f"/workspace/chat-with-nerf-eval/data/scannet/scans/{scene_name}/{scene_name}_vh_clean_2.ply")  # replace with your file path and format
    if not mesh.has_vertex_normals(): mesh.compute_vertex_normals()
    if not mesh.has_triangle_normals(): mesh.compute_triangle_normals()
    # aligned_vertices = np.load("/workspace/openscene_data/scene0025_00/scene0025_00_aligned_vert.npy")
    mesh_vertices = np.asarray(mesh.vertices)
    axis_align_matrix = np.array(axisAlignment_matrix).reshape((4,4))
    pts = np.ones((mesh_vertices.shape[0], 4))
    pts[:,0:3] = mesh_vertices[:,0:3]
    pts = np.dot(pts, axis_align_matrix.transpose()) # Nx4
    aligned_vertices = np.copy(mesh_vertices)
    aligned_vertices[:,0:3] = pts[:,0:3]
    mesh.vertices = o3d.utility.Vector3dVector(aligned_vertices)
    clip_embedding = np.load(f'/workspace/openscene_data/{scene_name}/{scene_name}_vh_clean_2_openscene_feat_distill.npy')
    for furniture in furnitures:
        bbox = furniture['bbox']
        center_original, extents_original = bbox[:3], bbox[3:6]
        corners_original = construct_bbox_corners(center_original, extents_original)
        label = furniture['label']
        is_unique = is_label_unique(furnitures, label)
        descriptions = furniture['description']
        if is_unique:
            total_unique_object += len(descriptions)
        else:
            total_multiple_object += len(descriptions)
        for description in descriptions:
            is_unique_list.append(is_unique)
            total_object += 1
            centroids, extents, similarity_mean_list = ground_open_scene_embedding(description, device, model, clip_embedding, mesh)
            result_dict['scene_name'].append(scene_name)
            result_dict['description'].append(description)
            result_dict['centroid_list'].append(centroids)
            result_dict['extent_list'].append(extents)
            result_dict['similarity_mean_list_list'].append(similarity_mean_list)
            result_dict['ground truth'].append(center_original + extents_original)
            iou3d_list = []
            for center, extend in zip(centroids, extents):
                prediction = construct_bbox_corners(center, extend)
                iou3d = box3d_iou(np.array(corners_original), prediction)
                iou3d_list.append(iou3d)
            
            max_iou = max(iou3d_list)
            if max_iou > 0.25:
                acc_25 += 1
                if is_unique:
                    acc_25_unique += 1
                else:
                    acc_25_multiple += 1
            if max_iou > 0.5:
                acc_50 += 1
                if is_unique:
                    acc_50_unique += 1
                else:
                    acc_50_multiple += 1
            list_iou.append(max_iou)

In [None]:
print("acc_25 =", acc_25)
print("acc_50 =", acc_50)
print("acc_25_unique =", acc_25_unique)
print("acc_50_unique =", acc_50_unique)
print("acc_25_multiple =", acc_25_multiple)
print("acc_50_multiple =", acc_50_multiple)
print("list_iou =", list_iou)
print("total_object =", total_object)
print("total_unique_object =", total_unique_object)
print("total_multiple_object =", total_multiple_object)
print("session_id_list =", session_id_list)