In [13]:
import open3d as o3d
import numpy as np 
import os 
from scipy.spatial import KDTree
from tqdm import tqdm, trange

In [4]:
mp3d_scans_dir = "/media/junting/SSD_data/habitat_data/scene_datasets/mp3d/v1/scans"
scannet_scans_dir = "/media/junting/SSD_data/ScanNet/scans"
first_K = 10 # only select first K scenes to analyze 

In [22]:
def analyze_point_cloud(o3d_pcl):
    points = np.asarray(o3d_pcl.points)
    num_points = points.shape[0]
    kdtree = KDTree(points)
    dist, idx = kdtree.query(points, k=2) # closest neighbors
    scale = points.max(axis=0) - points.min(axis=0)
    return num_points, scale, dist[:,1]

In [23]:
################ analyze mp3d dataset ##############
mp3d_num_points = []
mp3d_scale = []
mp3d_dist = []
for scene in tqdm(os.listdir(mp3d_scans_dir)[:first_K], desc="Analyze mp3d dataset:"):
    scene_dir = os.path.join(mp3d_scans_dir, scene)
    ply_file = os.path.join(scene_dir, f"{scene}_semantic.ply")
    o3d_pcl = o3d.io.read_point_cloud(ply_file)
    num_points, scale, dist = analyze_point_cloud(o3d_pcl)
    mp3d_num_points.append(num_points)
    mp3d_scale.append(scale)
    mp3d_dist.append(dist)
    # break # debug 

Analyze mp3d dataset:: 100%|██████████| 10/10 [01:13<00:00,  7.33s/it]


In [24]:
################ analyze scannet dataset ##############
scannet_num_points = []
scannet_scale = []
scannet_dist = []
for scene in tqdm(os.listdir(scannet_scans_dir)[:first_K], desc="Analyze ScanNet dataset:"):
    scene_dir = os.path.join(scannet_scans_dir, scene)
    ply_file = os.path.join(scene_dir, f"{scene}_vh_clean_2.labels.ply")
    o3d_pcl = o3d.io.read_point_cloud(ply_file)
    num_points, scale, dist = analyze_point_cloud(o3d_pcl)
    scannet_num_points.append(num_points)
    scannet_scale.append(scale)
    scannet_dist.append(dist)

Analyze ScanNet dataset:: 100%|██████████| 10/10 [00:01<00:00,  6.11it/s]


In [25]:
mp3d_dist = np.concatenate(mp3d_dist, axis=0)
scannet_dist = np.concatenate(scannet_dist, axis=0)

In [35]:
# recalculate scene points number and range  
mp3d_num_points = []
mp3d_range = []
scannet_num_points = []
scannet_range = []
for scene in tqdm(os.listdir(mp3d_scans_dir), desc="Analyze mp3d dataset:"):
    scene_dir = os.path.join(mp3d_scans_dir, scene)
    ply_file = os.path.join(scene_dir, f"{scene}_semantic.ply")
    o3d_pcl = o3d.io.read_point_cloud(ply_file)
    points = np.asarray(o3d_pcl.points)
    range = points.max(axis=0) - points.min(axis=0)
    mp3d_num_points.append(points.shape[0])
    mp3d_range.append(range)

for scene in tqdm(os.listdir(scannet_scans_dir), desc="Analyze ScanNet dataset:"):
    scene_dir = os.path.join(scannet_scans_dir, scene)
    ply_file = os.path.join(scene_dir, f"{scene}_vh_clean_2.labels.ply")
    o3d_pcl = o3d.io.read_point_cloud(ply_file)
    points = np.asarray(o3d_pcl.points)
    range = points.max(axis=0) - points.min(axis=0)
    scannet_num_points.append(points.shape[0])
    scannet_range.append(range)


Analyze mp3d dataset:: 100%|██████████| 90/90 [01:10<00:00,  1.28it/s]
Analyze ScanNet dataset:: 100%|██████████| 1513/1513 [00:46<00:00, 32.45it/s]


In [41]:
mp3d_num_points_arr = np.array(mp3d_num_points, dtype=int)
mp3d_range_arr = np.stack(mp3d_range, axis=0)
scannet_num_points_arr = np.array(scannet_num_points, dtype=int)
scannet_range_arr = np.stack(scannet_range, axis=0)

## Visualization

In [27]:
############### visualize dataset difference ###########
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd 
%matplotlib inline

### Point Cloud Density 

In [None]:
# plot minimum distance with histogram
# hint for point clouds density
nbinsx=20
seed = 0
sample_n = 100000
np.random.seed(seed)
# mp3d_dist too large (13426176,)


mp3d_min_dist_sampled = np.random.choice(mp3d_dist, sample_n)
SGFN_min_dist_sampled = np.random.choice(scannet_dist, sample_n)
fig = go.Figure()
fig.add_trace(go.Histogram(x=mp3d_min_dist_sampled, opacity=0.75, name = 'mp3d', marker_color="red", histnorm='percent',     
   xbins=dict( # bins used for histogram
        start=0.0,
        end=0.05,
        size=0.005
    )))
fig.add_trace(go.Histogram(x=SGFN_min_dist_sampled, opacity=0.75, name = 'SGFN', marker_color="blue", histnorm='percent',    
   xbins=dict( # bins used for histogram
        start=0.0,
        end=0.05,
        size=0.005
    )))
# Reduce opacity to see both histograms
fig.update_layout(
    title_text='Minimum neighboring distance', # title of plot
    xaxis_title_text='Distance', # xaxis label
    yaxis_title_text='Percetage', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1 # gap between bars of the same location coordinates
)
fig.show()

### Scale of the scene

In [45]:
# # plot scene range with dim-wise histogram
# # visualize position distribution by x y z axes separately 
# nbinsx=20

# fig = make_subplots(rows=3, cols=1, subplot_titles=("X-axis", "Y-axis", "Z-axis"))

# fig.append_trace(
#     go.Histogram(x=mp3d_range_arr[:,0], opacity=0.75, name = 'mp3d', marker_color="red", histnorm='percent', nbinsx=nbinsx), 
#     row=1, col=1
# )
# fig.append_trace(
#     go.Histogram(x=scannet_range_arr[:,0], opacity=0.75, name = 'scannet', marker_color="blue", histnorm='percent', nbinsx=nbinsx), 
#     row=1, col=1
# )
# fig.append_trace(
#     go.Histogram(x=mp3d_range_arr[:,1], opacity=0.75, name = 'mp3d', marker_color="red", histnorm='percent', nbinsx=nbinsx), 
#     row=2, col=1
# )
# fig.append_trace(
#     go.Histogram(x=scannet_range_arr[:,1], opacity=0.75, name = 'scannet', marker_color="blue", histnorm='percent', nbinsx=nbinsx), 
#     row=2, col=1
# )
# fig.append_trace(
#     go.Histogram(x=mp3d_range_arr[:,2], opacity=0.75, name = 'mp3d', marker_color="red", histnorm='percent', nbinsx=nbinsx), 
#     row=3, col=1
# )
# fig.append_trace(
#     go.Histogram(x=scannet_range_arr[:,2], opacity=0.75, name = 'scannet', marker_color="blue", histnorm='percent', nbinsx=nbinsx), 
#     row=3, col=1
# )

# fig.update_layout(
#     title_text='scene range', # title of plot
# )

# # Reduce opacity to see both histograms
# # fig.update_traces(opacity=0.75)
# fig.show()

In [66]:
nbinsx=100

fig = make_subplots(rows=2, cols=1, subplot_titles=("mp3d", "scannet"))

fig.append_trace(
    go.Histogram(x=mp3d_range_arr.max(axis=1), opacity=0.75, name = 'mp3d', marker_color="red", histnorm='percent', #autobinx=False,    
        xbins=dict( # bins used for histogram
            start=0.0,
            end=50.0,
            size=1,
        ),
    ), 
    row=1, 
    col=1,   
)
fig.append_trace(
    go.Histogram(x=scannet_range_arr.max(axis=1), opacity=0.75, name = 'scannet', marker_color="blue", histnorm='percent', #autobinx=False,    
        xbins=dict( # bins used for histogram
            start=0.0,
            end=15.0,
            size=0.5,
            
        ),
    ), 
    row=2, 
    col=1
)

fig.update_layout(
    title_text='scene scale', # title of plot
)

# Reduce opacity to see both histograms
# fig.update_traces(opacity=0.75)
fig.show()

### Point numbers

In [69]:
nbinsx=100

fig = make_subplots(rows=2, cols=1, subplot_titles=("mp3d", "scannet"))

fig.append_trace(
    go.Histogram(x=mp3d_num_points_arr, opacity=0.75, name = 'mp3d', marker_color="red", histnorm='percent', nbinsx=100), 
    row=1, 
    col=1,   
)
fig.append_trace(
    go.Histogram(x=scannet_num_points_arr, opacity=0.75, name = 'scannet', marker_color="blue", histnorm='percent', nbinsx=100), 
    row=2, 
    col=1
)

fig.update_layout(
    title_text='Point cloud numbers', # title of plot
)

# Reduce opacity to see both histograms
# fig.update_traces(opacity=0.75)
fig.show()