In [9]:
import os
import re
from glob import glob
import time
import numpy as np
import pandas as pd
import open3d as o3
# from open3d import JVisualizer # For Colab Visualization
from open3d.web_visualizer import draw # for non Colab

import matplotlib.pyplot as plt

In [10]:
ROOT = os.path.dirname(os.getcwd()) + r'\Dataset\s3dis'
SAVE_PATH = os.path.dirname(os.getcwd()) + r'\Dataset\s3dis_Stanford3dDataset_v1.2_Reduced_Aligned_Version'
PARTITION_SAVE_PATH = os.path.dirname(os.getcwd()) + r'\Dataset\s3dis_Stanford3dDataset_v1.2_Reduced_Partitioned_Aligned_Version'

if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

if not os.path.exists(PARTITION_SAVE_PATH):
    os.mkdir(PARTITION_SAVE_PATH)

In [11]:
CATEGORIES = {
    'ceiling'  : 0, 
    'floor'    : 1, 
    'wall'     : 2, 
    'beam'     : 3, 
    'column'   : 4, 
    'window'   : 5,
    'door'     : 6, 
    'table'    : 7, 
    'chair'    : 8, 
    'sofa'     : 9, 
    'bookcase' : 10, 
    'board'    : 11,
    'stairs'   : 12,
    'clutter'  : 13
}

In [12]:
# unique color map generated via
# https://mokole.com/palette.html
COLOR_MAP = {
    0  : (47, 79, 79),    # ceiling - darkslategray
    1  : (139, 69, 19),   # floor - saddlebrown
    2  : (34, 139, 34),   # wall - forestgreen
    3  : (75, 0, 130),    # beam - indigo
    4  : (255, 0, 0),     # column - red 
    5  : (255, 255, 0),   # window - yellow
    6  : (0, 255, 0),     # door - lime
    7  : (0, 255, 255),   # table - aqua
    8  : (0, 0, 255),     # chair - blue
    9  : (255, 0, 255),   # sofa - fuchsia
    10 : (238, 232, 170), # bookcase - palegoldenrod
    11 : (100, 149, 237), # board - cornflower
    12 : (255, 105, 180), # stairs - hotpink
    13 : (0, 0, 0)        # clutter - black
}

map_colors = lambda x : COLOR_MAP[x]
v_map_colors = np.vectorize(map_colors)

In [13]:
area_nums = '1-6' # decide on the number of areas to obtain
area_dict = {}

# get areas based on split
areas = glob(os.path.join(ROOT, f'Area_[{area_nums}]*'))

for area in areas:
    # get all subfolders in area (corresponds to disjoint spaces (or locations))
    spaces = next(os.walk(area))[1]

    # get dict to store spaces
    space_dict = {}

    # for each space
    for space in spaces:
        space = os.path.join(area, space)
        # annotations = os.path.join(space, 'Annotations')

        # get individual segmentation filepaths
        segments = glob(os.path.join(space, '*coord.npy'))
        
        # update space dict
        space_dict.update({space.split('\\')[-1] : segments})

    # update area dict
    area_dict.update({area.split('\\')[-1] : space_dict})
        

In [14]:
area_dict

{'Area_1': {'conferenceRoom_1': ['D:\\ThangLuu\\PointCloud\\Dataset\\s3dis\\Area_1\\conferenceRoom_1\\coord.npy'],
  'conferenceRoom_2': ['D:\\ThangLuu\\PointCloud\\Dataset\\s3dis\\Area_1\\conferenceRoom_2\\coord.npy'],
  'copyRoom_1': ['D:\\ThangLuu\\PointCloud\\Dataset\\s3dis\\Area_1\\copyRoom_1\\coord.npy'],
  'hallway_1': ['D:\\ThangLuu\\PointCloud\\Dataset\\s3dis\\Area_1\\hallway_1\\coord.npy'],
  'hallway_2': ['D:\\ThangLuu\\PointCloud\\Dataset\\s3dis\\Area_1\\hallway_2\\coord.npy'],
  'hallway_3': ['D:\\ThangLuu\\PointCloud\\Dataset\\s3dis\\Area_1\\hallway_3\\coord.npy'],
  'hallway_4': ['D:\\ThangLuu\\PointCloud\\Dataset\\s3dis\\Area_1\\hallway_4\\coord.npy'],
  'hallway_5': ['D:\\ThangLuu\\PointCloud\\Dataset\\s3dis\\Area_1\\hallway_5\\coord.npy'],
  'hallway_6': ['D:\\ThangLuu\\PointCloud\\Dataset\\s3dis\\Area_1\\hallway_6\\coord.npy'],
  'hallway_7': ['D:\\ThangLuu\\PointCloud\\Dataset\\s3dis\\Area_1\\hallway_7\\coord.npy'],
  'hallway_8': ['D:\\ThangLuu\\PointCloud\\Dataset

In [17]:
import time
tic = time.time()

for area in area_dict:
    # create new directory
    save_dir = os.path.join(SAVE_PATH, area)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    for space in area_dict[area]:
        # obtain xyz points with truth labels
        coords = np.load(area_dict[area][space][0])
        cat = np.load(area_dict[area][space][0].replace("coord.npy", "segment.npy"))
        space_data = pd.DataFrame(np.hstack((coords, cat.astype(np.float32))))
        
        # save as .hdf5 file in new directory
        save_path = os.path.join(save_dir, space + '.hdf5')
        space_data.to_hdf(save_path, key = 'space_data')

toc = time.time()
print(toc - tic)

17.646140098571777


In [34]:
save_path = os.path.join(os.path.dirname(os.getcwd()) + r"\Dataset\s3dis_Stanford3dDataset_v1.2_Reduced_Aligned_Version\Area_1\conferenceRoom_1" + '.hdf5')
space_data = pd.read_hdf(save_path, key='space_data').to_numpy()
space_data

array([[-15.609,  39.505,   2.214,   3.   ],
       [-15.634,  39.518,   2.198,   3.   ],
       [-15.622,  39.514,   2.195,   3.   ],
       ...,
       [-15.339,  39.561,   0.355,   2.   ],
       [-15.331,  39.543,   0.419,   2.   ],
       [-15.351,  39.569,   0.299,   2.   ]], dtype=float32)

In [35]:
space_data.shape

(1136617, 4)

In [22]:
coords = np.load(area_dict["Area_3"]['conferenceRoom_1'][0])
cat = np.load(area_dict["Area_3"]['conferenceRoom_1'][0].replace("coord.npy", "segment.npy"))
space_data = np.hstack((coords, cat.astype(np.float32)))

In [23]:
pcd = o3.geometry.PointCloud()
pcd.points = o3.utility.Vector3dVector(space_data[:,:3])
pcd.colors = o3.utility.Vector3dVector(np.vstack(v_map_colors(space_data[:, 3])).T/255)

o3.visualization.draw_geometries([pcd])

In [28]:
def get_slice(points, xyz_s, xpart, ypart):
    ''' Obtains Point Cloud Slices from the (x,y) partitions 
        By default this will obtain roughly 1x1 partitions
        inputs:
            points - (array) could be xyz, rgb or any input array
            xyz_s - (Nx3 array) 0 min shifter point cloud array 
            xpart - xpartitions [[lower, upper]]
            ypart - ypartitions [[lower, upper]]
        '''
    x_slice = (xyz_s[:, 0] >= xpart[0]) \
              & (xyz_s[:, 0] <= xpart[1])

    y_slice = (xyz_s[:, 1] >= ypart[0]) \
              & (xyz_s[:, 1] <= ypart[1])
    
    return points[x_slice & y_slice, :]

def get_partitions(xyz, xyz_s, c=1.):
    ''' Obtains Point Cloud Space Partitions
        Inputs:
            xyz_s - (Nx3 array) 0 min shifted point cloud array 
            c - (float) factor for deciding how many partitions to create (larger --> less partitions)
        Outputs: 
            partitions - (tuple) x and y parition arrays with 
                         format: [[lower, upper]]
        '''
    ## get number of x, y bins
    range_ = np.abs(xyz.max(axis=0) - xyz.min(axis=0))
    num_xbins, num_ybins, _ = np.uint8(np.round(range_ / c))

    # uncomment this to generate ~1x1m partitions
    # num_xbins, num_ybins, _ = np.uint8(np.ceil(np.max(xyz_s, 0)))

    ## get x, y bins
    _, xbins = np.histogram(xyz_s[:, 0], bins=num_xbins)
    _, ybins = np.histogram(xyz_s[:, 1], bins=num_ybins)

    ## get x y space paritions
    x_parts = np.vstack((xbins[:-1], xbins[1:])).T
    y_parts = np.vstack((ybins[:-1], ybins[1:])).T

    return x_parts, y_parts

In [None]:
tic = time.time()

num_invalid_partitions = 0

for area in area_dict:
    # create new directory
    save_dir = os.path.join(PARTITION_SAVE_PATH, area)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    for space in area_dict[area]:
        # obtain xyz points with truth labels
        coords = np.load(area_dict[area][space][0])
        cat = np.load(area_dict[area][space][0].replace("coord.npy", "segment.npy"))
        space_data = np.hstack((coords, cat.astype(np.float32)))

        # obtain x, y partitions
        xyz = space_data[:, :3]

        # get 0 min shifted points
        xyz_s = xyz - xyz.min(axis=0)
        x_parts, y_parts = get_partitions(xyz, xyz_s, c=1.5)

        # counter for parition saving
        i = 0
        for x_part in x_parts:
            for y_part in y_parts:
                space_slice = pd.DataFrame(get_slice(space_data, xyz_s, x_part, y_part))
                # only save if partition has at least 100 points:
                if len(space_slice) > 100:
                    i += 1
                    save_path = os.path.join(save_dir, space + f'_partition{i}_.hdf5')
                    space_slice.to_hdf(save_path, key='space_slice')
                else:
                    num_invalid_partitions += 1

toc = time.time()
print(toc - tic)

99.02166676521301


In [31]:
space_data = pd.read_hdf(save_path, key='space_slice').to_numpy()
space_data

array([[-10.377,  38.873,   2.54 ,   3.   ],
       [-10.387,  38.875,   2.544,   3.   ],
       [-10.523,  38.782,   2.433,   3.   ],
       ...,
       [ -9.831,  39.247,   1.533,   2.   ],
       [ -9.833,  39.247,   1.637,   2.   ],
       [ -9.833,  39.247,   1.691,   2.   ]], dtype=float32)

In [32]:
space_data.shape

(93211, 4)