In [1]:
import os
import sys
import datetime
import numpy as np
from load_scannet_data import export, export_with_2dseg
import pdb
from plyfile import PlyData, PlyElement
from PIL import Image

2022-05-08 12:27:33.921642: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


In [2]:
#BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join('../deeplab'))
from deeplab import run_semantic_segmentation_graph

In [3]:
SCANNET_DIR = 'scans'
EXPORTED_2D_DIR = 'frames_square'
TRAIN_SCAN_NAMES = [line.rstrip() for line in open('meta_data/scannet_train.txt')]
LABEL_MAP_FILE = 'meta_data/scannetv2-labels.combined.tsv'
DONOTCARE_CLASS_IDS = np.array([])
OBJ_CLASS_IDS = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39])
MAX_NUM_POINT = 50000
#OUTPUT_FOLDER = './scannet_train_detection_data'
OUTPUT_FOLDER = './scannet_train_detection_data_painted'

In [8]:
scan_name = 'scene0000_00'
#mesh_vertices, semantic_labels, instance_labels, instance_bboxes = export_one_scan_with_2dseg(scan_name, 'xx')

In [6]:
def write_ply_rgb(points, colors, out_filename):
    """ Color (N,3) points with RGB colors (N,3) within range [0,255] as OBJ file """
    colors = colors.astype(int)
    N = points.shape[0]
    vertex = []
    for i in range(N):
        c = colors[i,:]
        vertex.append( (points[i,0],points[i,1],points[i,2],c[0],c[1],c[2]) )
    vertex = np.array(vertex, dtype=[('x', 'f4'), ('y', 'f4'),('z', 'f4'),('red', 'u1'), ('green', 'u1'),('blue', 'u1')])
    
    el = PlyElement.describe(vertex, 'vertex', comments=['vertices'])
    PlyData([el], text=True).write(out_filename)

## Import mesh, axis align matrix

In [10]:
mesh_vertices = np.load(os.path.join('scannet_train_detection_data', scan_name)+'_vert.npy')

In [13]:
meta_file = os.path.join('scans', scan_name, scan_name + '.txt') # includes axisAlignment info for the train set scans. 

In [14]:
# Load scene axis alignment matrix
lines = open(meta_file).readlines()
for line in lines:
    if 'axisAlignment' in line:
        axis_align_matrix = [float(x) \
            for x in line.rstrip().strip('axisAlignment = ').split(' ')]
    if 'colorWidth' in line:
        colorW = int(line.strip('colorWidth = ').split(' ')[0])
    if 'colorHeight' in line:
        colorH = int(line.strip('colorHeight = ').split(' ')[0])

axis_align_matrix = np.array(axis_align_matrix).reshape((4,4))

In [15]:
alinged_verts = np.ones((mesh_vertices.shape[0],4))
alinged_verts[:,:3] = mesh_vertices[:,:3]

In [16]:
# Reverse axis alignment, to match color coordinates
unalign_verts = np.dot(alinged_verts, np.linalg.inv(axis_align_matrix.transpose()))

In [17]:
def read_matrix(filepath):
    out_matrix = np.zeros((4,4))

    with open(filepath, 'r') as f:
        lines = f.readlines()
        i = 0
        for line in lines:
            values = line.strip().split(' ')
            for j in range(4):
                out_matrix[i,j] = values[j]
            i += 1
    return out_matrix

In [19]:
exported_scan_dir = os.path.join('frames_square', scan_name)
color_intrinsic_file = os.path.join(exported_scan_dir, 'intrinsic', 'intrinsic_color.txt')   
color_intrinsic = read_matrix(color_intrinsic_file)

In [21]:
frame=0
pose_file = os.path.join(exported_scan_dir, 'pose', str(frame) + '.txt')
img_file = os.path.join(exported_scan_dir, 'color', str(frame) + '.jpg')

# read pose matrix(Rotation and translation)
pose_matrix = read_matrix(pose_file)   

sampled_h = np.ones((len(unalign_verts), 4))
sampled_h[:,:3] = unalign_verts[:,:3]

camera_coord = np.matmul(np.linalg.inv(pose_matrix), np.transpose(sampled_h))
camera_proj = np.matmul(color_intrinsic, camera_coord)

# Get valid points for the image
x = camera_proj[0,:]
y = camera_proj[1,:]
z = camera_proj[2,:]
filter_idx = np.where((x/z >= 0) & (x/z < colorW) & (y/z >= 0) & (y/z < colorH) & (z > 0))[0]

# Normalize by 4th coords(Homogeneous -> 3 coords system)
camera_proj_normalized = camera_proj / camera_proj[2,:]

#Get 3d -> 2d mapping
projected = camera_proj_normalized[:2, filter_idx]

#Reduce to 320,240 size
camera_proj_sm = np.zeros((5, projected.shape[-1]))

camera_proj_sm[0,:] = projected[0,:] * 320/colorW
camera_proj_sm[1,:] = projected[1,:] * 240/colorH

# Get pixel index
x = camera_proj_sm[0,:].astype(np.uint8)
y = camera_proj_sm[1,:].astype(np.uint8)

In [52]:
# Use depth information to remove occluded points(NOt available from 2d images)
depth_file = os.path.join(exported_scan_dir, 'depth', str(frame) + '.png')

depth_img = Image.open(depth_file)
depth_np = np.array(depth_img)       

In [54]:
depth_filter = np.where(depth_np[y,x]/1000 * 1.1 > camera_proj[2,filter_idx])[0]
filter_idx2 = np.take(filter_idx, depth_filter)

x = x[depth_filter]
y = y[depth_filter]

In [55]:
filter_idx2 = np.take(filter_idx, depth_filter)

In [64]:
pred_prob = np.load(os.path.join('semantic_2d_results','scene0191_01','prob_1160.npy'))

In [65]:
np.max(np.argmax(pred_prob,-1))

0

In [30]:
import tensorflow as tf

In [31]:
## Preparation for pointpainting    
n_classes = 18

# deeplabv3+ tf session
INPUT_SIZE = (321, 321)    
with tf.compat.v1.gfile.GFile('../deeplab/saved_model/scannet_2.pb', "rb") as f:
    graph_def = tf.compat.v1.GraphDef()
    graph_def.ParseFromString(f.read())

myGraph = tf.compat.v1.Graph()
with myGraph.as_default():
    tf.compat.v1.import_graph_def(graph_def, name='')

sess = tf.compat.v1.Session(graph=myGraph)

2022-05-01 14:55:39.672568: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:25:00.0 name: NVIDIA GeForce RTX 3090 computeCapability: 8.6
coreClock: 1.695GHz coreCount: 82 deviceMemorySize: 23.70GiB deviceMemoryBandwidth: 871.81GiB/s
2022-05-01 14:55:39.673788: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0
2022-05-01 14:55:39.673831: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:
2022-05-01 14:55:39.673836: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 
2022-05-01 14:55:39.673840: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N 
2022-05-01 14:55:39.675046: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22318 MB memory) -> physical GPU (device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:25:00

In [41]:
# Get sementationr esult
img = Image.open(img_file)
pred_prob, pred_class = run_semantic_segmentation_graph(img, sess, INPUT_SIZE)
pred_prob = pred_prob[:,:,:(n_classes+1)] # 0 is background class

pred_prob = pred_prob[y, x]
projected_class = pred_class[y, x]    

isPainted = np.where((projected_class > 0) & (projected_class < n_classes+1), 1, 0) # Point belongs to foreground?

In [43]:
painted_verts = np.zeros((alinged_verts.shape[0], 3 + (1 + 1 + n_classes)))

In [50]:
painted_verts[:,:3] = alinged_verts[:,:3]
painted_verts[filter_idx,3] = isPainted
painted_verts[filter_idx,4:] = pred_prob

In [51]:
mesh_vertices2 = np.zeros((50000, 6))
mesh_vertices2[:,:3] = painted_verts[:,:3]
mesh_vertices2[:,3:] = np.tile(255*painted_verts[:,3,None], (1,3))

In [52]:
write_ply_rgb(mesh_vertices2[:,:3], mesh_vertices2[:,3:], "painting_test_sample.ply")