# PREPROCESSING POINT CLOUD DATA TO LINKED DATA
In this notebook, we evaluate the point cloud assets in a session repository.
For every e57 point cloud with its accompanying xml file, a POINTCLOUDNODE metadata class is created 
that governs all the metadata of the point cloud (paths, pose, etc.).
As ouput, the method generates RDF GRAPHS (.ttl) and O3D.GEOMETRY.POINTCLOUD (.pcd) files

>This codebase operates on the scan2bim2.yml environment (python 3.8)

In [12]:
#IMPORT PACKAGES
import rdflib
from rdflib import Graph, plugin
from rdflib.serializer import Serializer #pip install rdflib-jsonld https://pypi.org/project/rdflib-jsonld/
from rdflib import Graph
from rdflib import URIRef, BNode, Literal
from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
                           PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
                           VOID, XMLNS, XSD

import os.path, time
import importlib
import numpy as np
import xml.etree.ElementTree as ET
import open3d as o3d
import uuid    

#IMPORT MODULES
import Algorithms.linkeddatatools as ld
import Classes.pointcloudnode as pc
import Algorithms.scan2bim as s2b


## 1. INITIALIZE SESSION

In [13]:
## INPUTS
project_path= "D:\\Data\\2018-06 Werfopvolging Academiestraat Gent"
session_path = project_path + "\\week 22"
bim_folderpath = project_path + "\\BIM"
pcd_folderpath = session_path + "\\PCD"
img_folderpath = session_path + "\\IMG"
ortho_folderpath =session_path + "\\ORTHO"
mesh_folderpath = session_path + "\\MESH"

## session initialization missing

In [None]:
# # run e57xmldump.exe on e57 files:THIS CURRENTLY DOESN4T WORK
# command = ""
# command = "D:\\Scan-to-BIM repository\\Scan-to-BIM-Grasshopper\\Scan2BIM\\4.Python\\e57xmldump "
# command = command + pcdfolderpath+filepath
# command = command + " > " + pcdfolderpath+filepath.replace('.e57', '.xml') 
# command = ".\e57xmldump" +' "'+filepath+'" ' +">"+ ' "'+filepath.replace('.e57', '.xml') +'" '

# # os.system(command)
# #os.system(".\e57xmldump D:\\Data\\2018-06 Werfopvolging Academiestraat Gent\\week 22\\PCD\\week 22 lidar.e57 > D:\\Data\\2018-06 Werfopvolging Academiestraat Gent\\week 22\\PCD\\week 22 lidar.xml")
# # there is an issue with this command
# # import subprocess
# # subprocess.run(["e57xmldump", "D:\\Data\\2018-06 Werfopvolging Academiestraat Gent\\week 22\\PCD\\week22_photogrammetry_densecloud - Cloud.e57","D:\\Data\\2018-06 Werfopvolging Academiestraat Gent\\week 22\\PCD\\week22_photogrammetry_densecloud - Cloud.xml"],shell=True)
# # THIS COMMAND WORKS IN CMD BUT NOT WITH OS.SYSTEM
# # command = '.\e57xmldump "D:\\Data\\2018-06 Werfopvolging Academiestraat Gent\\week 22\\PCD\\week22 photogrammetry - Cloud.e57" > "D:\\Data\\2018-06 Werfopvolging Academiestraat Gent\\week 22\\PCD\\week22 photogrammetry - Cloud.xml"'
# # os.system(command)
# .\e57xmldump "D:\\Data\\2021-07 Peter Benoitlaan\\PCD\\Peter Benoitlaan 16 1.e57" > "D:\\Data\\2021-07 Peter Benoitlaan\\PCD\\Peter Benoitlaan 16 1.xml"

## 2. READ PRESENT E57 XML/.E57 FILES CONTAINING POINT CLOUDS
A PointCloudNode is created per e57 point cloud scan in the session
This codes presumes that .e57xmldump.exe has generated an .xml metadata file for every e57 in the session
Also, the .xml should not contain <?xml version="1.0" encoding="UTF-8"?>
E.g.: .\e57xmldump "D:\\Data\\2018-06 Werfopvolging Academiestraat Gent\\week 22\\PCD\\week22 photogrammetry - Cloud.e57" > "D:\\Data\\2018-06 Werfopvolging Academiestraat Gent\\week 22\\PCD\\week22 photogrammetry - Cloud.xml"

In [20]:
# Retrieve all (.e57) files in the session along with their xml metadata
importlib.reload(ld)
importlib.reload(pc)
importlib.reload(s2b)

all_session_file_paths=ld.getListOfFiles(session_path) 
e57_file_paths=[] # these are string .e57file paths
e57_xml_paths=[] # these are string .xml file paths
pcdnodelist=[]

for file_path in all_session_file_paths:        
    if file_path.endswith(".e57"): 
        e57_file_paths.append(file_path)  
        test_e57_xml_path=file_path.replace('e57','xml')
        if test_e57_xml_path in all_session_file_paths:
            print('e57 file found with matching xml: '+file_path)
            e57_xml_paths.append(test_e57_xml_path)  
            # mytree = ET.parse(test_e57_xml_path)
            # root = mytree.getroot()
            pcdnodelist.extend(ld.read_e57_xml(test_e57_xml_path)) # this creates PointCloudNode instances for each scan
            
for pcdnode in pcdnodelist:
    pcdnode.session_path=session_path
    # create timestamp is none is present
    if pcdnode.timestamp is None:
        pcdnode.timestamp=time.ctime(os.path.getctime(session_path))    
    # create guid if none is present
    if pcdnode.guid is None:
        pcdnode.guid= '{'+str(uuid.uuid1())+'}'    

print(str(len(pcdnodelist))+' PointCloudNodes have been created based on '+ str(len(e57_xml_paths)) +' matching e57 and xml files.')
print('Note that these nodes do not yet contain actual data but only the e57 metadata')

e57 file found with matching xml: D:\Data\2018-06 Werfopvolging Academiestraat Gent\week 22\PCD\week 22 lidar_CC.e57
e57 file found with matching xml: D:\Data\2018-06 Werfopvolging Academiestraat Gent\week 22\PCD\week22 photogrammetry - Cloud.e57
e57 file found with matching xml: D:\Data\2018-06 Werfopvolging Academiestraat Gent\week 22\PCD\week22_photogrammetry_densecloud - Cloud.e57
48 PointCloudNodes have been created based on 3 matching e57 and xml files.
Note that these nodes do not yet contain actual data but only the e57 metadata


In [15]:
#set accuracy of all nodes manually based on their names 
for pcdnode in pcdnodelist:
    if 'photo'in pcdnode.name:
        pcdnode.accuracy=0.05
    elif 'blk' in pcdnode.name:
        pcdnode.accuracy=0.03
    elif 'p30' in pcdnode.name:
        pcdnode.accuracy=0.01
    else:
        pcdnode.accuracy=0.05
print('Accuracies have been set based on .e57 names (photo, blk, p30)!')

accuracies have been set based on .e57 names (photo, blk, p30)!


In [16]:
for pcdnode in pcdnodelist:
    if 'l72'in pcdnode.name:
        pcdnode.coordinate_system='Lambert72'
    elif 'l2008' in pcdnode.name:
        pcdnode.coordinate_system='Lambert2008'
    elif 'wgs84' in pcdnode.name:
        pcdnode.coordinate_system='geospatial-wgs84'
    else:
        pcdnode.coordinate_system='local'
print('Node coordinate systems have been set based on .e57 names (l72, l2008, wgs84, local)!')

Node coordinate systems have been set based on .e57 names (l72, l2008, wgs84, local)!


## 3. WRITE PCD FILE FOR EVERY .E57 FILE
A .pcd file is created per e57 point cloud scan in the session
already processed .pcd files are not overwritten

In [17]:
#Read e57 files, create o3d point clouds and write them to \\PCD\\'pcdnode.name'.pcd
b=0
for idx,pcdnode in enumerate(pcdnodelist):
    if pcdnode.get_pcd_path() is False: # Return FALSE if file does not exist  
        pcdnode.get_e57_pointcloud() #this currently fails for photogrammetric clouds (they can also be created in Cloudcompare)
        pcdnode.create_o3d_pointcloud()    
        if pcdnode.write_o3d_pointcloud():
            print('PointCloudNode '+str(idx)+' of '+str(len(pcdnodelist))+ ' : succesfully exported')
    if pcdnode.get_pcd_path() is True: # Return TRUE and set file path if file already exists    
        b=b+1
print (str(b)+' of '+str(len(pcdnodelist))+ ' PointCloudNodes have pcd files!')

48 of 48 PointCloudNodes have pcd files!


In [9]:
#test
print(str(pcdnodelist[0].o3d_pointcloud))

None


## 4. CREATE RDF GRAPHS FOR EACH POINTCLOUDNODES AND EXPORT THEM TO .TTL
An RDF graph and .ttl file is created for all point clouds in the session
(data itself is not stored in the graph, only metadata)

In [18]:
#Create graphs and serialize them in "pcdGraph.ttl"
importlib.reload(ld)
importlib.reload(pc)
importlib.reload(s2b)

a=0
g=Graph()
for idx,pcdnode in enumerate(pcdnodelist):
    pcdnode.session_path=session_path
    try:
        pcdnode.add_to_rdf_graph()
        g=g+pcdnode.rdf_graph
        pcdnode.rdf_graph_path=pcdnode.session_path+"\\PCD\\pcdGraph.ttl"
    except:
        print('PointCloudNode'+idx+' could not be serialized due to lack of metadata (did e57_xml import fail?).')
        a=a+1
        continue
    
g.serialize(destination=str(pcdnodelist[0].rdf_graph_path), format='ttl')
print (str(len(pcdnodelist)-a)+' PointCloudNodes succesfully serialized in: '+str(pcdnode.rdf_graph_path))

48 PointCloudNodes succesfully serialized in: D:\Data\2018-06 Werfopvolging Academiestraat Gent\week 22\PCD\pcdGraph.ttl


In [19]:
#test
importlib.reload(ld)
importlib.reload(pc)
importlib.reload(s2b)

pcdnodelist[0].add_to_rdf_graph()
print(pcdnodelist[0].rdf_graph.serialize())

@prefix e57: <http://libe57.org/> .
@prefix gom: <https://w3id.org/gom#> .
@prefix openlabel: <https://www.asam.net/index.php?eID=dumpFile&t=f&f=3876&token=413e8c85031ae64cc35cf42d0768627514868b2f> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix v4d3D: <https://w3id.org/v4d/core#> .

<http://academiestraat_week_22_39> a "https://w3id.org/v4d/3D#PointCloudNode" ;
    rdfs:label "{406c8bfb-1d9d-4cae-ad2f-d6e6f7f97e3b}" ;
    e57:cartesianBounds "[-4.835391998291016, 15.48379898071289, 1.5364439487457275, 63.616180419921875, 110.15341186523438, 52.724769592285156]" ;
    e57:recordCount "12044232" ;
    gom:hasCoordinateSystem "local" ;
    v4d3D:accuracy "0.05" ;
    v4d3D:e57_path "D:\\Data\\2018-06 Werfopvolging Academiestraat Gent\\week 22\\PCD\\week 22 lidar_CC.e57" ;
    v4d3D:e57_xml_path "D:\\Data\\2018-06 Werfopvolging Academiestraat Gent\\week 22\\PCD\\week 22 lidar_CC.xml" ;
    v4d3D:features3d_path "None" ;
    v4d3D:images2d_path "None" ;
    v4d3D:pcd_path

## 5. GENERATE FEATURES FOR EACH POINTLCOUDNODE