In [1]:
import os
import laspy
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from tqdm import tqdm

In [34]:
datasets_folder = Path.home() / 'tree_projector/datasets'

for_instance_folder = (datasets_folder / 'FORinstance_dataset', datasets_folder / 'MixedDataset')
nibio_mls_folder = (datasets_folder / 'NIBIO_MLS', datasets_folder / 'MixedDataset')
ehydro_folder = (datasets_folder / 'EHydro_raw', datasets_folder / 'EHydro', datasets_folder / 'EHydro_full')

for_instance_folder[0].mkdir(parents=True, exist_ok=True)
for_instance_folder[1].mkdir(parents=True, exist_ok=True)
nibio_mls_folder[0].mkdir(parents=True, exist_ok=True)
nibio_mls_folder[1].mkdir(parents=True, exist_ok=True)
ehydro_folder[0].mkdir(parents=True, exist_ok=True)
ehydro_folder[1].mkdir(parents=True, exist_ok=True)
ehydro_folder[2].mkdir(parents=True, exist_ok=True)

mixed_classes = {
    'terrain': 0,
    # 'low_vegetation': 1,
    'stem': 1,
    'canopy': 2
}

ehydro_classes = {
    'terrain': 0,
    'low_vegetation': 1,
    'tree': 2,
    'others': 3
}

In [3]:
def load_point_clouds(folder):
    files = sorted(
        [f for f in folder.rglob("*") if f.is_file() and f.suffix.lower() in ('.laz', '.las')],
        key=lambda f: f.name
    )

    for path in files:
        ext = path.suffix.lower()
        file = laspy.read(path)

        min_coords = np.array([file.x.min(), file.y.min(), file.z.min()], dtype=np.int64)
        mins_world = min_coords * file.header.scales + file.header.offsets
        file.header.offsets -= mins_world

        intensity = np.array(file.intensity)
        min_intensity = np.min(intensity)
        max_intensity = np.max(intensity)

        file.add_extra_dims([
            laspy.ExtraBytesParams(name="norm_intensity", type=np.float32),
            laspy.ExtraBytesParams(name="semantic_pred", type=np.int16),
            laspy.ExtraBytesParams(name="instance_pred", type=np.int16)
        ])

        file.norm_intensity = (intensity - min_intensity) / (max_intensity - min_intensity)
        yield ext, file


In [9]:
for_instance_classes = {
    'unclassified': 0,
    'low_vegetation': 1,
    'terrain': 2,
    'out_points': 3,
    'stem': 4,
    'live_branches': 5,
    'woody_branches': 6
}

for i, (ext, file) in enumerate(tqdm(load_point_clouds(for_instance_folder[0]))):
    mask = (file.classification != for_instance_classes['out_points']) & (file.classification != for_instance_classes['unclassified'])  # Eliminamos puntos no clasificados o inválidos
    file.points = file.points[mask]

    semantic_labels = np.array(file.classification)
    remap = np.copy(semantic_labels)

    remap = np.where(semantic_labels == for_instance_classes['low_vegetation'], mixed_classes['terrain'], remap)
    remap = np.where(semantic_labels == for_instance_classes['terrain'], mixed_classes['terrain'], remap)
    remap = np.where(semantic_labels == for_instance_classes['stem'], mixed_classes['stem'], remap)
    remap = np.where(semantic_labels == for_instance_classes['live_branches'], mixed_classes['canopy'], remap)
    remap = np.where(semantic_labels == for_instance_classes['woody_branches'], mixed_classes['canopy'], remap)

    file.semantic_pred = remap
    file.instance_pred = file.treeID
    file.write(for_instance_folder[1] / f'plot_FORinstance_{i}{ext}')


28it [01:13,  2.61s/it]


In [None]:
nibio_mls_classes = {
    'ground': 1,
    'vegetation': 2,
    'lying_deadwood': 3,
    'stems': 4
}

for i, (ext, file) in enumerate(tqdm(load_point_clouds(nibio_mls_folder[0]))):
    semantic_labels = np.array(file.label)
    remap = np.copy(semantic_labels)

    remap = np.where(semantic_labels == nibio_mls_classes['ground'], mixed_classes['terrain'], remap)
    remap = np.where((semantic_labels == nibio_mls_classes['vegetation']) & (file.treeID == 0), mixed_classes['terrain'], remap)
    remap = np.where((semantic_labels == nibio_mls_classes['vegetation']) & (file.treeID != 0), mixed_classes['canopy'], remap)
    remap = np.where(semantic_labels == 3, mixed_classes['terrain'], remap)
    remap = np.where(semantic_labels == 4, mixed_classes['stem'], remap)

    file.semantic_pred = remap
    file.instance_pred = file.treeID
    file.write(nibio_mls_folder[1] / f'plot_NIBIO_MLS_{i}{ext}')


64it [01:17,  1.21s/it]


In [35]:
a_priori_chunks = 0
complete_chunks = 0
valid_chunks = 0

for i, (ext, file) in enumerate(tqdm(load_point_clouds(ehydro_folder[0]))):
    semantic_labels = np.array(file.classification)
    remap = np.full_like(semantic_labels, ehydro_classes['others'])
    instance_labels = np.array(file.PredInstance)
    
    ids = np.unique(instance_labels)
    ids = ids[ids != 0]

    remap = np.where(instance_labels == 0, ehydro_classes['terrain'], remap)
    for id in ids:
        mask = instance_labels == id
        z = np.asarray(file.z[mask])
        if z.mean() - z.min() > 6.0:
            remap[mask] = ehydro_classes['tree']
        else:
            remap[mask] = ehydro_classes['low_vegetation']

    remap = np.where(semantic_labels == 6, ehydro_classes['others'], remap)
    file.semantic_pred = remap
    file.instance_pred = instance_labels

    chunk_size = 50
    xy = np.stack([file.x, file.y], axis=1)

    min_xy = xy.min(axis=0)
    max_xy = xy.max(axis=0)
    
    chunk_idx = np.floor((xy - min_xy) / chunk_size).astype(int)
    chunk_keys, inverse = np.unique(chunk_idx, axis=0, return_inverse=True)
    
    for j, key in enumerate(chunk_keys):
        a_priori_chunks += 1

        mask = inverse == j
        pts_chunk = file.points[mask]
        xy_chunk = xy[mask]
        instance_labels_chunk = instance_labels[mask]
        semantic_pred_chunk = remap[mask]

        min_chunk = xy_chunk.min(axis=0)
        max_chunk = xy_chunk.max(axis=0)
        span = max_chunk - min_chunk
        
        if np.all(span < 0.8 * chunk_size):
            continue

        complete_chunks += 1
        uniq = np.unique(instance_labels_chunk)
        if len(uniq) < 5 or len(np.unique(semantic_pred_chunk)) < 3:
            continue

        valid_chunks += 1
        instance_labels_chunk = uniq.searchsorted(instance_labels_chunk)

        out = laspy.create(point_format=file.point_format, file_version=file.header.version)
        out.header.scales = file.header.scales
        out.header.offsets = file.header.offsets

        out.points = pts_chunk
        out.instance_pred = instance_labels_chunk
        out.write(ehydro_folder[1] / f'plot_ehydro_{i}_{j}.las')

    file.write(ehydro_folder[2] / f'plot_ehydro_{i}.las')

print(f'Chunks a priori: {a_priori_chunks}')
print(f'Chunks completos, con al menos un 80% del tamaño requerido: {complete_chunks} ({((complete_chunks / a_priori_chunks) * 100):.2f}%)')
print(f'Chunks válidos, con al menos tres instancias y tres clases presentes: {valid_chunks} ({((valid_chunks / a_priori_chunks) * 100):.2f}%)')


8it [04:15, 31.98s/it]

Chunks a priori: 1120
Chunks completos, con al menos un 80% del tamaño requerido: 1021 (91.16%)
Chunks válidos, con al menos tres instancias y tres clases presentes: 615 (54.91%)



