# Pipeline for binary meshes to subsampled pointclouds with normals

This notebook will document the pipeline to do this programatically as opposed to using the Pointcept UI

In [10]:
import os
import struct
import numpy as np
import pandas as pd
import plyfile
import pyvista as pv

fname = "/data/sdd/parametrix/cc_training_data/park_row.bin"

In [2]:
def read_bin_file_vital_stats(file_path):
    with open(file_path, 'rb') as f:
        num_meshes = struct.unpack('I', f.read(4))[0]
        print(f"Number of meshes: {num_meshes}")
        
        for mesh_idx in range(num_meshes):
            num_points = struct.unpack('I', f.read(4))[0]
            flags = struct.unpack('B', f.read(1))[0]
            
            has_colors = flags & 2
            has_normals = flags & 4
            has_scalar_field = flags & 8
            has_mesh_name = flags & 16
            
            print(f"\nMesh {mesh_idx + 1}:")
            print(f"  Number of points: {num_points}")
            print(f"  Has colors: {'Yes' if has_colors else 'No'}")
            print(f"  Has normals: {'Yes' if has_normals else 'No'}")
            print(f"  Has scalar field: {'Yes' if has_scalar_field else 'No'}")
            print(f"  Has mesh name: {'Yes' if has_mesh_name else 'No'}")
            
            if has_mesh_name:
                mesh_name = ""
                while True:
                    char = f.read(1).decode('utf-8')
                    if char == '\x00':
                        break
                    mesh_name += char
                print(f"  Mesh name: {mesh_name}")
            
            for _ in range(num_points):
                f.read(12)  # Read X, Y, Z (3 floats each 4 bytes)
                if has_colors:
                    f.read(3)  # Read R, G, B (3 unsigned chars each 1 byte)
                if has_normals:
                    f.read(12)  # Read Nx, Ny, Nz (3 floats each 4 bytes)
                if has_scalar_field:
                    f.read(8)  # Read scalar value (double 8 bytes)

# read_bin_file_vital_stats(fname)

In [6]:
a = np.fromfile(fname)
a

array([1.10760379e-312, 4.94065646e-324, 2.99939363e-241, ...,
       5.26354425e-315, 0.00000000e+000, 7.81250000e-003])

In [11]:
def read_bin_file_header(file_path):
    file_size = os.path.getsize(file_path)
    with open(file_path, 'rb') as f:
        try:
            num_clouds = struct.unpack('I', f.read(4))[0]
            print(f"Number of clouds: {num_clouds}")

            for cloud_idx in range(num_clouds):
                if f.tell() + 5 > file_size:  # Check if there are enough bytes left to read
                    print("Reached the end of file or corrupted file structure")
                    break

                num_points = struct.unpack('I', f.read(4))[0]
                flags = struct.unpack('B', f.read(1))[0]

                has_colors = flags & 2
                has_normals = flags & 4
                has_scalar_field = flags & 8
                has_cloud_name = flags & 16

                print(f"\nCloud {cloud_idx + 1}:")
                print(f"  Number of points: {num_points}")
                print(f"  Flags: {flags:08b}")
                print(f"    Has colors: {'Yes' if has_colors else 'No'}")
                print(f"    Has normals: {'Yes' if has_normals else 'No'}")
                print(f"    Has scalar field: {'Yes' if has_scalar_field else 'No'}")
                print(f"    Has cloud name: {'Yes' if has_cloud_name else 'No'}")

                if has_cloud_name:
                    mesh_name = ""
                    while True:
                        char = f.read(1).decode('utf-8')
                        if char == '\x00':
                            break
                        mesh_name += char
                    print(f"  Cloud name: {mesh_name}")

                point_data_size = 12  # X, Y, Z (3 floats each 4 bytes)
                if has_colors:
                    point_data_size += 3  # R, G, B (3 unsigned chars each 1 byte)
                if has_normals:
                    point_data_size += 12  # Nx, Ny, Nz (3 floats each 4 bytes)
                if has_scalar_field:
                    point_data_size += 8  # Scalar value (double 8 bytes)

                # Ensure there are enough bytes left in the file for the point data
                if f.tell() + point_data_size * num_points > file_size:
                    print("Reached the end of file or corrupted file structure while reading points")
                    break

                f.seek(point_data_size * num_points, 1)  # Move the file pointer ahead by the size of the points data

            num_meshes = struct.unpack('I', f.read(4))[0]
            print(f"\nNumber of meshes: {num_meshes}")

            for mesh_idx in range(num_meshes):
                if f.tell() + 5 > file_size:  # Check if there are enough bytes left to read
                    print("Reached the end of file or corrupted file structure")
                    break

                num_triangles = struct.unpack('I', f.read(4))[0]
                flags = struct.unpack('B', f.read(1))[0]

                has_colors = flags & 2
                has_normals = flags & 4
                has_materials = flags & 16

                print(f"\nMesh {mesh_idx + 1}:")
                print(f"  Number of triangles: {num_triangles}")
                print(f"  Flags: {flags:08b}")
                print(f"    Has colors: {'Yes' if has_colors else 'No'}")
                print(f"    Has normals: {'Yes' if has_normals else 'No'}")
                print(f"    Has materials: {'Yes' if has_materials else 'No'}")

                if has_colors:
                    num_colors = struct.unpack('I', f.read(4))[0]
                    print(f"  Number of colors: {num_colors}")

                if has_normals:
                    num_normals = struct.unpack('I', f.read(4))[0]
                    print(f"  Number of normals: {num_normals}")

                if has_materials:
                    num_materials = struct.unpack('I', f.read(4))[0]
                    print(f"  Number of materials: {num_materials}")

                # Ensure there are enough bytes left in the file for the triangle data
                triangle_data_size = 12  # Each triangle has 3 indices (3 unsigned integers each 4 bytes)
                if f.tell() + triangle_data_size * num_triangles > file_size:
                    print("Reached the end of file or corrupted file structure while reading triangles")
                    break

                f.seek(triangle_data_size * num_triangles, 1)  # Move the file pointer ahead by the size of the triangles data

        except struct.error as e:
            print(f"Error while reading the file: {e}")


# Example usage
read_bin_file_header(fname)


Number of clouds: 843203395

Cloud 1:
  Number of points: 52
  Flags: 00000001
    Has colors: No
    Has normals: No
    Has scalar field: No
    Has cloud name: No

Cloud 2:
  Number of points: 72
  Flags: 01001100
    Has colors: No
    Has normals: Yes
    Has scalar field: Yes
    Has cloud name: No

Cloud 3:
  Number of points: 2164260865
  Flags: 00000001
    Has colors: No
    Has normals: No
    Has scalar field: No
    Has cloud name: No
Reached the end of file or corrupted file structure while reading points

Number of meshes: 25755648

Mesh 1:
  Number of triangles: 25755648
  Flags: 00000000
    Has colors: No
    Has normals: No
    Has materials: No
Reached the end of file or corrupted file structure while reading triangles
