# SELECT BIM ELEMENTS
In this notebook, we select specific elements and classes from an IFC

In [1]:
#IMPORT PACKAGES
from rdflib import Graph, URIRef
import os.path
import importlib
import numpy as np
import xml.etree.ElementTree as ET
import open3d as o3d
import uuid    
import pye57 
import ifcopenshell
import ifcopenshell.geom as geom
import ifcopenshell.util
from ifcopenshell.util.selector import Selector
import random as rd
import pandas as pd
from tabulate import tabulate
import cv2

#IMPORT MODULES
from context import geomapi 
from geomapi.nodes import *
import geomapi.utils as ut
from geomapi.utils import geometryutils as gmu
import geomapi.tools as tl

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

## 1. INITIALIZE SESSION

In [58]:
## INPUTS
projectPath= os.path.join("D:\\Data\\2023-01 Colosseum")
sessionPath = os.path.join(projectPath,"Research")

#BIM
bimPath=os.path.join(sessionPath,"BIM")
ifcPath=os.path.join(sessionPath,'BIM','RMCO_S01phases.ifc')

bimGraphPath = os.path.join(sessionPath,"BIM","bimGraph.ttl")
bimClassesGraphPath = os.path.join(sessionPath,"BIM","bimClassesGraph.ttl")
bimClasses= {'COLONNA':['COLONNA','PARASTA'],
            'PILASTRO':'PILASTRO',
            'TRABEAZIONE':'TRABEAZIONE',
            'ARCO':'ARCO',
            'AGGETTO':'aggetto',
            'FACADE':['CORNICE','MUR']}
bimSeperatorGraphPath = os.path.join(sessionPath,"BIM","bimSeperatorGraph.ttl")
bimSeperators= {'LV-03_A':'3saOMnutrFHPtEwspUjESB',
                'Facade_1':'0H$kAf94TFru8CbHUTFppg',
                'LV-02.1_A':'3saOMnutrFHPtEwspUjEN8',
                'LV-02_A':'3saOMnutrFHPtEwspUjEHi',
                'LV-01_A':'3saOMnutrFHPtEwspUjEJs',
                'LV-02_B':'3saOMnutrFHPtEwspUjEjK',
                'LV-01_B': '3saOMnutrFHPtEwspUjEZ6',
                'LV-01_C': '3saOMnutrFHPtEwspUjEXN'}
bimTransform=np.array([[1.0,0.0, 0.0, -2.3118e+06  ], 
                [0.0, 1.0, 0.0, -4.6406e+06],
                [0.0, 0.0, 1.0 ,0],
                [0.0 ,0.0, 0.0, 1.000000000000]])
bimMeshExportFolder=os.path.join(os.path.join(sessionPath,'MESH'))
os.mkdir(bimMeshExportFolder) if not os.path.exists(bimMeshExportFolder) else False  
ClassPointCloudsFolder=os.path.join(os.path.join(sessionPath,'ClassPointClouds'))
os.mkdir(ClassPointCloudsFolder) if not os.path.exists(ClassPointCloudsFolder) else False
bimClassNames=[]
for c in bimClasses.values():
    c=ut.item_to_list(c)
    c=' '.join(c)
    bimClassNames.append(ut.validate_string(c))
#PCD
csvPath=os.path.join(projectPath,'PCD','COLOSSEO_UNITO_test3_indexed.csv')
# csvOutputPath=os.path.join(sessionPath,'PCD','COLOSSEO_UNITO_test3_indexed.csv')
chunksize=1000000
threshold=0.1 #distance theshold for inliers
lasPath=os.path.join(projectPath,'PCD','COLOSSEO_UNITO - Cloud.las')
outputlasPath=os.path.join(projectPath,'PCD','COLOSSEO_UNITO - Cloud - classes.las')

#MESH
meshPath=os.path.join(sessionPath,'MESH','COLONNA_PARASTA.obj')


# PARSE INPUTS & CREATE NODES

Import BIMSeperator geometries

In [5]:
# bimSeperatorNodes=to.ifc_to_nodes_by_guids(ifcPath,bimSeperators.values(),offsetTransform=bimTransform)
# for n in bimSeperatorNodes:
#     n.cartesianTransform= bimTransform 
#     n.resource.transform(bimTransform)

In [6]:
# bimMesh=gmu.join_geometries([n.resource for n in bimNodes if n.resource is not None])
# bimSeperatorMesh=gmu.join_geometries([n.resource for n in bimSeperatorNodes if n.resource is not None])
# o3d.visualization.draw_geometries([bimMesh]+[bimSeperatorMesh])

**Fig.**: Images of the BIM section of the colosseum with (a) BIM elements and (b) the point cloud, (c) selected subgroups per model type and (d) segmented point cloud per subgroup.

<img src="../docs/pics/colosseum/columns1.PNG" width = "20%">
<img src="../docs/pics/colosseum/columns2.PNG" width = "25%">

<img src="../docs/pics/colosseum/columns3.PNG" width = "23%">
<img src="../docs/pics/colosseum/columns4.PNG" width = "20%">

# IMPORT AND SEPERATE BIM INTO SUBGROUPS

Import the IFC Model.

In [7]:
bimNodes=to.ifc_to_nodes_multiprocessing(ifcPath,offsetTransform=bimTransform)
for n in bimNodes:
    n.cartesianTransform= bimTransform 
    n.resource.transform(bimTransform)
print(f' {str(len(bimNodes))} BIMNodes created!')

 461 BIMNodes created!


Select BIM objects per class based on their name.

In [8]:
print(bimClasses.values())

dict_values([['COLONNA', 'PARASTA'], 'PILASTRO', 'TRABEAZIONE', 'ARCO', 'aggetto', ['CORNICE', 'MUR']])


In [9]:
# make this exclusive

nodeLists=[]
geometries=[]
for i,c,name in zip(range(len(bimClasses.values())),bimClasses.values(),bimClassNames):
    #select nodes
    c=ut.item_to_list(c)
    nodeLists.append([n for n in bimNodes if any(name in n.name for name in c ) and n.resource is not None])
    print(f'{i},  {len(nodeLists[i])}, {c} !')
    #combine geometries
    if nodeLists[i] is not None:
        geometries.append(gmu.join_geometries([n.resource for n in nodeLists[i] if n.resource is not None]))
        #export geometries     
        o3d.io.write_triangle_mesh(os.path.join(bimMeshExportFolder,name+'.obj'),geometries[i])

0,  65, ['COLONNA', 'PARASTA'] !
1,  75, ['PILASTRO'] !
2,  13, ['TRABEAZIONE'] !
3,  61, ['ARCO'] !
4,  19, ['aggetto'] !
5,  137, ['CORNICE', 'MUR'] !


Create point cloud per mesh geometry.

In [40]:
ref_clouds=[]
for g in geometries:
    area=g.get_surface_area()
    count=int(area/(threshold*threshold))
    ref_clouds.append(g.sample_points_uniformly(number_of_points=count))

(optional) read sample mesh

In [11]:
mesh=o3d.io.read_triangle_mesh(meshPath)
k = round(mesh.get_surface_area() * 1000)
ref_clouds = [mesh.sample_points_uniformly(number_of_points = k, use_triangle_normal=True)]

Visualize the BIM Classes in different colors.

In [43]:
coloredGeometries=[g.paint_uniform_color(ut.random_color()) for g in geometries if g is not None]
o3d.visualization.draw_geometries(coloredGeometries)

# SEGMENT PCD WITH BIMCLASSES USING PYLAS

Read Las data (1.5 min for 110M points, requires 13Gb RAM)

In [59]:
import laspy
las  = laspy.read(lasPath)

Compute nearest neighbors. Fast but operates on single core (10min for 110M points, 13Gb RAM required)

In [60]:
ref_cloud,ref_arr=gmu.create_identity_point_cloud(ref_clouds)
query_points=gmu.transform_points( las.xyz,bimTransform)
indices, distances=gmu.compute_nearest_neighbors(np.asarray(ref_cloud.points), query_points)
index=ref_arr[indices]

Assign nearest index as an extra dimension in the las file. (query points take 4Gb with 110M points so put it in function)

In [61]:
query_points=None
distances=None

In [62]:
las.add_extra_dim(laspy.ExtraBytesParams(name="bimClass", type="uint8"))
las.bimClass=index[:,0]
print(las.header)
print(list(las.point_format.dimension_names))
print(las['bimClass'])


<LasHeader(1.2, <PointFormat(2, 25 bytes of extra dims)>)>
['X', 'Y', 'Z', 'intensity', 'return_number', 'number_of_returns', 'scan_direction_flag', 'edge_of_flight_line', 'classification', 'synthetic', 'key_point', 'withheld', 'scan_angle_rank', 'user_data', 'point_source_id', 'red', 'green', 'blue', '01 Materiali', '03 D', '02 TC', 'bimClass']
[1 1 1 ... 5 5 5]


Export las file

In [63]:
las.write(outputlasPath)

**Fig.**: Images of the segmented point cloud with (a) Point cloud enirched with bim class as a feature (b) the ref_clouds from the BIM model, (c) the initial point cloud.

<img src="../docs/pics/colosseum/columns5.PNG" width = "17%">
<img src="../docs/pics/colosseum/columns6.PNG" width = "23%">
<img src="../docs/pics/colosseum/columns7.PNG" width = "23%">

# SEGMENT PCD AS DATAFRAME

1. Without optimization (fast but for small point clouds e.g. 10-20M points)

In [46]:
import time
df = pd.read_csv(csvPath,
    sep= ' ',
    header=0,  
    names=["x","y","z","R", "G", "B", "M", "TC", "Nx", "Ny", "Nz" ])
arr=np.zeros(len(df))
pcd=gmu.dataframe_to_pcd(df)
# pcd.transform(bimTransform)
#compute distance to identityPointCloud   
for i,ref_cloud in enumerate(ref_clouds):
    time
    distances=pcd.compute_point_cloud_distance(ref_cloud)
    #select indices within a distance threshold
    indices=np.where(np.asarray(distances) <= threshold)[0]
    np.put(arr, indices, i)
# assign new column and export df
df = df.assign(className=arr)

# df['class'] = arr.tolist()
df.to_csv(csvPath,mode='a', header=False)

In [None]:
# for i,ref_cloud,name in zip(range(len(bimClasses.values())),ref_clouds,bimClassNames):


1. Chunked without optimization (slow but memory proof for medium point clouds e.g. 20-100M points)

In [30]:
chunks  = pd.read_csv(csvPath,
    sep= ' ',
    header=0,  
    names=["x","y","z","R", "G", "B", "M", "TC", "Nx", "Ny", "Nz" ],
    chunksize=chunksize,
    iterator=True)
for chunk in chunks: 
    # create integer based array for the classes
    arr=np.zeros(len(chunk))   
    #create point cloud
    pcd=gmu.dataframe_to_pcd(chunk)
    #transform to local coordinate system
    pcd.transform(bimTransform)
    #compute distance to identityPointCloud    
    for i,ref_cloud in enumerate(ref_clouds):
        distances=pcd.compute_point_cloud_distance(ref_cloud)
        #select indices within a distance threshold       
        ind=np.where(np.asarray(distances) <= threshold)[0]
        np.put(arr, indices, i)
    # assign new column and export df
    df['class'] = arr  
    chunk.to_csv(csvPath, mode='a', header=False)

In [None]:
 
#  # this is some unused code to iteratively write data to csv file
#   test1=chunk.iloc[ind]
#             # #export point clouds
#             with open(os.path.join(ClassPointCloudsFolder,name+'.csv'), "a") as csv:
#                 test1.to_csv(csv,mode='a')
#             print(f'{len(test1)} of {chunksize} exported.')


1. DASK multiprocessing optimization (fast and memory proof for large point clouds e.g. >100M points). Note that this is three times slower for small point clouds due to working spawning, etc.

In [35]:
import dask.dataframe as dd

df = dd.read_csv(csvPath,
                 header=0, 
                sep= ' ')


def create_and_compute_distance(partition, ref_clouds):
    points=partition[partition.columns[:3]].values    
    pcd = o3d.geometry.PointCloud(o3d.utility.Vector3dVector(points))
    for i,ref_cloud,name in zip(range(len(bimClasses.values())),ref_clouds,bimClassNames):
        distances=pcd.compute_point_cloud_distance(ref_cloud)
        #select indices within a distance threshold       
        ind=np.where(np.asarray(distances) <= threshold)[0]
        np.put(arr, indices, i)
    distance = pcd.compute_point_cloud_distance(ref_cloud)
    
    partition['distance_to_ref'] = distance
    return partition

def create_point_cloud_dask(df, ref_cloud):
    point_cloud_partitions = df.map_partitions(create_and_compute_distance, ref_clouds)
    return point_cloud_partitions.compute()

result = create_point_cloud_dask(df, ref_clouds).to_parquet
print(result)

                 //X             Y          Z    R    G   B  01_Materiali  \
0       2.311879e+06  4.640674e+06  32.195537   74   67  58           0.0   
1       2.311879e+06  4.640674e+06  32.184521   90   84  73           0.0   
2       2.311879e+06  4.640674e+06  32.204296  117  102  89           0.0   
3       2.311879e+06  4.640674e+06  32.212200  116  106  91           0.0   
4       2.311879e+06  4.640673e+06  32.135959   79   68  57           0.0   
...              ...           ...        ...  ...  ...  ..           ...   
571611  2.311875e+06  4.640673e+06  35.283402   46   54  49           0.0   
571612  2.311875e+06  4.640673e+06  35.177899   80   80  81           0.0   
571613  2.311875e+06  4.640673e+06  35.307801   44   51  48           0.0   
571614  2.311875e+06  4.640673e+06  35.254101   96  101  98           0.0   
571615  2.311875e+06  4.640673e+06  35.325299   87   88  90           0.0   

        03_D  02_TC  Original_cloud_index        Nx        Ny        Nz  \


In [None]:
# first dask attempt
# #define a function
# def compute_index(df):
#     xyz=df.iloc[:,[0,1,2]]
#     pcd=o3d.geometry.PointCloud()
#     pcd.points=o3d.utility.Vector3dVector(xyz.to_numpy())
#     # pcd=gmu.dataframe_to_pcd(df)
#     return 10

# #compute function as delayed function
# lazy_results = []
# for chunk in dask_dataframe: #test
#     chunk=chunk.compute()
#     lazy_result = dask.delayed(compute_index)(chunk)
#     lazy_results.append(lazy_result)
# # dask.compute(*lazy_results)
# futures = dask.persist(*lazy_results)  # trigger computation in the background
# client.cluster.scale(10)  # ask for ten 4-thread workers
# results = dask.compute(*futures)
# results[:5]


# results = dask.compute(*futures)
# results[:5]

In [None]:

#this function does not work because you cannot jointly write in parallel in the same file
# for chunk in dask_dataframe.partitions:    
#     chunk=chunk.compute()
#     # print(chunk[0])
#     # xyz=chunk.get(['Y', 'Z'])

#     pcd=gmu.dataframe_to_pcd(chunk)
#     # xyz=chunk.
#     # print(pcd)
    

#     # pcd=o3d.geometry.PointCloud()
#     # pcd.points=o3d.utility.Vector3dVector(xyz.to_numpy())
#     # #transform to local coordinate system
#     pcd.transform(bimTransform)
#     #compute distance to identityPointCloud    
#     for bimpcd,name in zip(bimPointClouds,bimClassNames):
#         distances=pcd.compute_point_cloud_distance(bimpcd)
#         #remove distances > threshold
#         ind=np.where(np.asarray(distances) <= threshold)[0]
#         #select indices based on closest point        
#         if ind.size >0:
#             test1=chunk.iloc[ind]
#             # #export point clouds
#             with open(os.path.join(ClassPointCloudsFolder,name+'.csv'), "a") as csv:
#                 test1.to_csv(csv,mode='a')
#             print(f'{len(test1)} of {len(chunk)} exported.')


(optional) filter distance calculation based on geometry shape.