In [1]:
import subprocess
import os
import shutil
import open3d as o3d
import numpy as np
import logging
import re

from pathlib import Path

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%H:%M:%S')
logger = logging.getLogger(__name__)

CATEGORIES = [
    "1_WALL",
    "2_FLOOR",
    "3_ROOF",
    "4_CEILING",
    "5_FOOTPATH",
    "6_GRASS",
    "7_COLUMN",
    "8_DOOR",
    "9_WINDOW",
    "10_STAIR",
    "11_RAILING",
    "12_RWP",
    "13_OTHER",
]

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


## Class for data preprocessing

There are a few linked stages for preprocessing the data, so some kind of class to store things and avoid configs being passed everywhere probably makes sense.

It needs to be capable of taking care of the following tasks:
- reading the raw .bin files from Aleks and converting them into usable .ply files, as per the mesh_extraction notebook
- performing relevant transforms on these meshes (scaling, axis fixing, probably recentering too for train/test/val splitting)
- taking transformed meshes and performing visualisations to help with test/train/val splitting
- taking the finished meshes for the different splits and generating pointclouds from them at a requested resolution
- converting the data into .pth files for use in Pointcept

Let's do some explorations to see if that is possible.

In [15]:
class ParametrixPreprocessor():

    def __init__(
        self, label="park_row", cc_path="org.cloudcompare.CloudCompare",
        root_dir="/home/sogilvy/repos/Pointcept/data/parametrix"
    ):
            
        self.label = label
        self.cc_path = cc_path
        
        # Required directories
        self.root_dir = Path(root_dir)
        self.mesh_dir = self.root_dir / "meshes"
        self.raw_mesh_dir = self.mesh_dir / "raw"
        self.extraction_dir = self.mesh_dir / "extracted" / label
        self.processed_mesh_dir = self.mesh_dir / "processed" / label

        # Paths to files
        self.raw_mesh_path = self.raw_mesh_dir / f"{label}.bin"
        self.raw_mesh_path_temp = self.extraction_dir / f"{label}.bin"
    
    def prepare_mesh_extraction(self):
        """
        Prepare necessary dirs and copy raw .bin mesh file over to get
        around CC limitations.
        """
        if self.extraction_dir.exists():
            logger.info("Cleaning .bin extraction dirs.")
            shutil.rmtree(self.extraction_dir.as_posix())
        else:
            logger.info("Creating .bin extraction dirs.")
        self.extraction_dir.mkdir(parents=True, exist_ok=True)
        # Copy raw mesh file over.
        shutil.copy(self.raw_mesh_path.as_posix(), self.raw_mesh_path_temp.as_posix())       

    def split_bin_by_category(self):
        logger.info(f"Splitting bin file {self.raw_mesh_path_temp} by category...")
        for category in CATEGORIES:
            command_regex = [
                self.cc_path,
                "-SILENT",
                "-O", self.raw_mesh_path_temp.name,
                "-SELECT_ENTITIES",
                "-REGEX", category,
                "-RENAME_ENTITIES", category.lower(),
                "-NO_TIMESTAMP", "-SAVE_MESHES"
            ]
    
            logger.info(f"  extracting category: {category}")
            try:
                result = subprocess.run(command_regex, cwd=self.extraction_dir, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                # print("CloudCompare output:", result.stdout.decode())
                # print("CloudCompare errors:", result.stderr.decode())
            except subprocess.CalledProcessError as e:
                print(f"CloudCompare failed with error: {e.stderr.decode()}")
    
    def extract_meshes_from_bin(self):
        """
        Extract .ply files from the raw .bin files.
        """
        self.prepare_mesh_extraction()
        self.split_bin_by_category()
    
        # Cloudcompare appends an index suffix to renamed files when multiple entities are loaded
        # So we'll strip this suffix for deterministic file names going forward.
        # Regular expression to match filenames ending with '_<integer>.bin'        
        pattern = re.compile(r"^(.*?_\d+)\.(bin)$")
       
        for file_path in self.extraction_dir.iterdir():
            if file_path.is_file() and pattern.match(file_path.name):
                new_file_stem = re.sub(r'_\d+$', '', file_path.stem)
                new_file_path = file_path.with_name(f"{new_file_stem}{file_path.suffix}")
                file_path.rename(new_file_path)

                # Now convert the file to a .ply file for use outside of CloudCompare.
                command_convert = [
                    self.cc_path,
                    "-SILENT",
                    "-O", new_file_path.name,
                    "-M_EXPORT_FMT", "PLY",
                    "-NO_TIMESTAMP", "-SAVE_MESHES",
                ]
                try:
                    # Run the command as a subprocess
                    logger.info(f"  converting {new_file_path.name}")
                    result = subprocess.run(command_convert, cwd=self.extraction_dir, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                except subprocess.CalledProcessError as e:
                    print(f"CloudCompare failed with error: {e.stderr.decode()}")

                # Finally unlink the intermediary .bin file
                new_file_path.unlink()

        # Unlink the original .bin file duplicate
        self.raw_mesh_path_temp.unlink()
        
        # Summarise created files
        logger.info("Created .ply files:")
        for file in self.extraction_dir.iterdir():
            logger.info(f"  {file.name}")
                

p = ParametrixPreprocessor()

In [16]:
p.extract_meshes_from_bin()

04:11:38 - INFO - Cleaning .bin extraction dirs.
04:11:38 - INFO - Splitting bin file /home/sogilvy/repos/Pointcept/data/parametrix/meshes/extracted/park_row/park_row.bin by category...
04:11:38 - INFO -   extracting category: 1_WALL
04:11:39 - INFO -   extracting category: 2_FLOOR
04:11:40 - INFO -   extracting category: 3_ROOF
04:11:41 - INFO -   extracting category: 4_CEILING
04:11:42 - INFO -   extracting category: 5_FOOTPATH
04:11:43 - INFO -   extracting category: 6_GRASS
04:11:44 - INFO -   extracting category: 7_COLUMN
04:11:45 - INFO -   extracting category: 8_DOOR
04:11:47 - INFO -   extracting category: 9_WINDOW
04:11:48 - INFO -   extracting category: 10_STAIR
04:11:49 - INFO -   extracting category: 11_RAILING
04:11:50 - INFO -   extracting category: 12_RWP
04:11:51 - INFO -   extracting category: 13_OTHER
04:11:52 - INFO -   converting 1_wall.bin
04:11:53 - INFO -   converting 2_floor.bin
04:11:53 - INFO -   converting 3_roof.bin
04:11:54 - INFO -   converting 4_ceiling.b

In [4]:
p.raw_mesh_dir.as_posix()

'/home/sogilvy/repos/Pointcept/data/parametrix/meshes/raw'