## Mask3D prediction

In [2]:
%cd ..

/221019046/Projects/ZSVG3D


using downloaded [mask3d_inst_seg.zip](https://drive.google.com/file/d/1eIdmuEBeM4OxJ9dmucZvHK8_4mOhcbEV/view?usp=sharing)

In [4]:
import sys, os
import torch
from plyfile import PlyData
import numpy as np
from tqdm import tqdm


from preprocess.utils import get_train_val_split

train_scene_ids, val_scene_ids = get_train_val_split()
scene_ids = train_scene_ids + val_scene_ids

scan_dir = '/LiZhen_team/dataset/scannet/scans'
mask3d_pred_dir = '/LiZhen_team/dataset/scannet/mask3d_inst_seg'
save_dir = '/LiZhen_team/dataset/scannet/mask3d_inst_seg_pcds'
os.makedirs(save_dir, exist_ok=True)

for scan_id in tqdm(scene_ids):
    # Load point clouds with colors
    with open(os.path.join(scan_dir, scan_id, '%s_vh_clean_2.ply'%(scan_id)), 'rb') as f:
        plydata = PlyData.read(f) # elements: vertex, face
    points = np.array([list(x) for x in plydata.elements[0]]) # [[x, y, z, r, g, b, alpha]]
    coords = np.ascontiguousarray(points[:, :3])
    colors = np.ascontiguousarray(points[:, 3:6])

    align_matrix = np.eye(4)
    with open(os.path.join(scan_dir, scan_id, '%s.txt' % (scan_id)), 'r') as f:
        for line in f:
            if line.startswith('axisAlignment'):
                align_matrix = np.array([float(x) for x in line.strip().split()[-16:]]).astype(np.float32).reshape(4, 4)
                break
    # Transform the points
    pts = np.ones((coords.shape[0], 4), dtype=coords.dtype)
    pts[:, 0:3] = coords
    coords = np.dot(pts, align_matrix.transpose())[:, :3]  # Nx4
    # Make sure no nans are introduced after conversion
    assert (np.sum(np.isnan(coords)) == 0)

    ins_pcds = []
    ins_labels = []

    try:
        cur_instances = torch.load(os.path.join(mask3d_pred_dir, f"{scan_id}.pt"))
    except:
        continue
    for instance in cur_instances:
        inst_mask = instance["segments"]
        pcd = coords[inst_mask]
        color = colors[inst_mask]

        if len(pcd) < 10:
            continue

        ins_labels.append(instance["label"])
        ins_pcds.append(np.concatenate([pcd, color], axis=1))

    np.savez_compressed(os.path.join(save_dir, scan_id+'.npz'), ins_pcds=ins_pcds, ins_labels=ins_labels)


  0%|          | 0/1513 [00:00<?, ?it/s]

  val = np.asanyarray(val)
100%|██████████| 1513/1513 [1:35:57<00:00,  3.81s/it]


[optional] Using output from Mask3D

In [None]:
import sys, os
import numpy as np
from tqdm import tqdm

from preprocess.utils import get_train_val_split

train_scene_ids, val_scene_ids = get_train_val_split()
scene_ids = train_scene_ids + val_scene_ids

pc_path = 'data/processed/scannet/validation'
pred_path = 'Mask3D/eval_output/instance_evaluation_scannet200_val_query_150_topk_750_dbscan_0.95_0/decoder_-1'
save_dir = '/221019046/Data/Mask3d/scannet'
scan_dir = '/LiZhen_team/dataset/scannet/scans'

os.makedirs(save_dir, exist_ok=True)

for scene_id in tqdm(scene_ids):
    # print(scene_id)
    pc = np.load(os.path.join(pc_path, scene_id[-7:] + '.npy'))

    align_matrix = np.eye(4)
    with open(os.path.join(scan_dir, scene_id, '%s.txt' % (scene_id)), 'r') as f:
        for line in f:
            if line.startswith('axisAlignment'):
                align_matrix = np.array([float(x) for x in line.strip().split()[-16:]
                                        ]).astype(np.float32).reshape(4, 4)
                break
    # Transform the points
    pts = np.ones((pc.shape[0], 4), dtype=np.float32)
    pts[:, 0:3] = pc[:, 0:3]
    coords = np.dot(pts, align_matrix.transpose())[:, :3]     # Nx4
    # Make sure no nans are introduced after conversion
    assert (np.sum(np.isnan(coords)) == 0)
    pc[:, 0:3] = coords

    ins_pcds = []
    ins_labels = []
    ins_scores = []

    pred_info = np.genfromtxt(os.path.join(pred_path, scene_id + '.txt'), dtype='str')
    for i in range(pred_info.shape[0]):
        path = pred_info[i, 0]
        pred_class = pred_info[i, 1]
        pred_score = pred_info[i, 2]

        # control the number of instances by score
        if float(pred_score) < 0.01:
            continue

        pred_ins = np.loadtxt(os.path.join(pred_path, path))

        ins = pc[pred_ins.astype(bool), :6]
        # ins[:, 3:] = SCANNET_COLOR_MAP_200[int(pred_class)]
        pred_label = CLASS_LABELS_200[VALID_CLASS_IDS_200.index(int(pred_class))]
        # pred_label = CLASS_LABELS_20[VALID_CLASS_IDS_20.index(int(pred_class))]
        # ins[:, 3:] = SCANNET_COLOR_MAP_20[int(pred_class)]
        # print(ins.shape, pred_class, pred_score)
        ins_pcds.append(ins)
        ins_labels.append(pred_label)
        # ins_scores.append(pred_score)

    np.savez_compressed(os.path.join(save_dir, scene_id + '.npz'),
                        ins_pcds=ins_pcds,
                        ins_labels=ins_labels)

    # break


### Exctract features

In [5]:
from preprocess.utils import load_pred_ins, get_train_val_split
from models.pcd_classifier import PcdClassifier
import pickle
import torch
from tqdm import tqdm


train_scene_ids, val_scene_ids = get_train_val_split()
scene_ids = train_scene_ids + val_scene_ids


model = PcdClassifier().cuda()
ckpt_path = 'weights/pnext_cls.pth'
root_dir = '/LiZhen_team/dataset/scannet/mask3d_inst_seg_pcds'

weights = torch.load(ckpt_path, map_location='cpu')
info = model.load_state_dict(weights, strict=False)
print(info)

# save model weights
# torch.save(model.state_dict(), 'weights/pnext_cls.pth')

model.eval()


data = {}

for scan_id in tqdm(scene_ids):

    try:
        batch_labels, inst_locs, center, batch_pcds = load_pred_ins(root_dir, scan_id)
    except:
        continue
    obj_ids = list(range(len(batch_labels)))

    obj_embeds = model(batch_pcds[..., :4].cuda())     # (B, D)
    obj_embeds = obj_embeds / obj_embeds.norm(p=2, dim=-1, keepdim=True)

    data[scan_id] = {
        'batch_labels': batch_labels,
        'obj_ids': obj_ids,
        'inst_locs': inst_locs,
        'center': center,
        'obj_embeds': obj_embeds.detach().cpu()
    }

    # break

# save in pickle
with open('data/scannet/feats_3d_mask3d_ulip.pkl', 'wb') as f:
    pickle.dump(data, f)

model size:
1363264
<All keys matched successfully>


100%|██████████| 1513/1513 [08:30<00:00,  2.96it/s]


In [4]:
import numpy as np
import os
from data.scannet200_constants import SCANNET_COLOR_MAP_200, CLASS_LABELS_200, VALID_CLASS_IDS_200
from tqdm import tqdm


scan_id_file = "data/scannet/splits/scannetv2_val.txt"
scene_list = set([x.strip() for x in open(scan_id_file, 'r')])
scene_list = sorted(list(scene_list))

pc_path = 'data/processed/scannet/validation'
pred_path = 'eval_output/instance_evaluation_scannet200_val_query_150_topk_750_dbscan_0.95_0/decoder_-1'
save_dir = '/221019046/Data/Mask3d/scannet'
scan_dir = '/LiZhen_team/dataset/scannet/scans'

os.makedirs(save_dir, exist_ok=True)

for scene_id in tqdm(scene_list):
    # print(scene_id)
    pc = np.load(os.path.join(pc_path, scene_id[-7:] + '.npy'))

    align_matrix = np.eye(4)
    with open(os.path.join(scan_dir, scene_id, '%s.txt' % (scene_id)), 'r') as f:
        for line in f:
            if line.startswith('axisAlignment'):
                align_matrix = np.array([float(x) for x in line.strip().split()[-16:]]).astype(np.float32).reshape(4, 4)
                break
    # Transform the points
    pts = np.ones((pc.shape[0], 4), dtype=np.float32)
    pts[:, 0:3] = pc[:, 0:3]
    coords = np.dot(pts, align_matrix.transpose())[:, :3]  # Nx4
    # Make sure no nans are introduced after conversion
    assert (np.sum(np.isnan(coords)) == 0)
    pc[:, 0:3] = coords

    ins_pcds = []
    ins_labels = []
    ins_scores = []

    pred_info = np.genfromtxt(os.path.join(pred_path, scene_id+'.txt'), dtype='str')
    for i in range(pred_info.shape[0]):
        path = pred_info[i, 0]
        pred_class = pred_info[i, 1]
        pred_score = pred_info[i, 2]
        if float(pred_score) < 0.01:
            continue

        pred_ins = np.loadtxt(os.path.join(pred_path, path))

        ins = pc[pred_ins.astype(bool), :6]
        pred_label = CLASS_LABELS_200[VALID_CLASS_IDS_200.index(int(pred_class))]
        ins_pcds.append(ins)
        ins_labels.append(pred_label)
        ins_scores.append(pred_score)

    np.savez_compressed(os.path.join(save_dir, scene_id+'.npz'), ins_pcds=ins_pcds, ins_labels=ins_labels, ins_scores=ins_scores)

    # break


  0%|          | 0/312 [00:00<?, ?it/s]

  3%|▎         | 8/312 [01:43<48:01,  9.48s/it]  

## Visualize

In [12]:
import numpy as np
import os
import torch
from data.scannet200_constants import SCANNET_COLOR_MAP_200, CLASS_LABELS_200, VALID_CLASS_IDS_200
from tqdm import tqdm
from pytorch3d.structures import Pointclouds
from pytorch3d.vis.plotly_vis import plot_scene


scan_id_file = "data/scannet/splits/scannetv2_val.txt"
scene_list = set([x.strip() for x in open(scan_id_file, 'r')])
scene_list = sorted(list(scene_list))

pc_path = 'data/processed/scannet/validation'
pred_path = 'eval_output/instance_evaluation_scannet200_val_query_150_topk_750_dbscan_0.95_0/decoder_-1'
scan_dir = '/LiZhen_team/dataset/scannet/scans'


for scene_id in tqdm(scene_list):
    if scene_id == 'scene0647_00':
        pc = np.load(os.path.join(pc_path, scene_id[-7:] + '.npy'))

        align_matrix = np.eye(4)
        with open(os.path.join(scan_dir, scene_id, '%s.txt' % (scene_id)), 'r') as f:
            for line in f:
                if line.startswith('axisAlignment'):
                    align_matrix = np.array([float(x) for x in line.strip().split()[-16:]]).astype(np.float32).reshape(4, 4)
                    break
        # Transform the points
        pts = np.ones((pc.shape[0], 4), dtype=np.float32)
        pts[:, 0:3] = pc[:, 0:3]
        coords = np.dot(pts, align_matrix.transpose())[:, :3]  # Nx4
        # Make sure no nans are introduced after conversion
        assert (np.sum(np.isnan(coords)) == 0)
        pc[:, 0:3] = coords

        ins_pcds = []
        pred_info = np.genfromtxt(os.path.join(pred_path, scene_id + '.txt'), dtype='str')
        for i in range(pred_info.shape[0]):
            path = pred_info[i, 0]
            pred_class = pred_info[i, 1]
            pred_score = pred_info[i, 2]

            if float(pred_score) < 0.01:
                continue

            pred_ins = np.loadtxt(os.path.join(pred_path, path))

            ins = pc[pred_ins.astype(bool), :6]
            ins[:, 3:] = SCANNET_COLOR_MAP_200[int(pred_class)]
            ins_pcds.append(ins)

        break

print(len(ins_pcds))
pc = np.concatenate(ins_pcds, axis=0)
print(pc.shape)

points = torch.tensor(pc[:, :3]).unsqueeze(0).cuda()
colors = torch.tensor(pc[:, 3:]).unsqueeze(0).cuda() / 255

point_cloud = Pointclouds(points=points, features=colors)

plot_scene({"Pointcloud": {"scene0011_00": point_cloud}}, pointcloud_max_points=100000)

  0%|          | 0/312 [00:00<?, ?it/s]

 79%|███████▊  | 245/312 [00:09<00:02, 26.36it/s]


36
(84963, 6)
