In [2]:
import os
import laspy
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from dotenv import load_dotenv
from tqdm import tqdm

In [3]:
load_dotenv()

datasets_folder = Path(os.environ.get('TREE_PROJECTOR_DIR', Path.home() / 'tree_projector')) / 'datasets'
for_instance_folder = (datasets_folder / 'FORinstance_dataset', datasets_folder / 'MixedDataset')
for_instance_big_folder = (datasets_folder / 'FORinstance_big_dataset', datasets_folder / 'MixedDataset')
nibio_mls_folder = (datasets_folder / 'NIBIO_MLS', datasets_folder / 'MixedDataset')
ehydro_folder = (datasets_folder / 'EHydro_raw', datasets_folder / 'EHydro', datasets_folder / 'EHydro_full')

for_instance_folder[0].mkdir(parents=True, exist_ok=True)
for_instance_folder[1].mkdir(parents=True, exist_ok=True)
for_instance_big_folder[0].mkdir(parents=True, exist_ok=True)
for_instance_big_folder[1].mkdir(parents=True, exist_ok=True)
nibio_mls_folder[0].mkdir(parents=True, exist_ok=True)
nibio_mls_folder[1].mkdir(parents=True, exist_ok=True)
ehydro_folder[0].mkdir(parents=True, exist_ok=True)
ehydro_folder[1].mkdir(parents=True, exist_ok=True)
ehydro_folder[2].mkdir(parents=True, exist_ok=True)

mixed_classes = {
    'terrain': 0,
    # 'low_vegetation': 1,
    'stem': 1,
    'canopy': 2
}

ehydro_classes = {
    'terrain': 0,
    'low_vegetation': 1,
    'tree': 2,
    'others': 3
}

In [4]:
def load_point_clouds(folder):
    files = sorted(
        [f for f in folder.rglob("*") if f.is_file() and f.suffix.lower() in ('.laz', '.las')],
        key=lambda f: f.name
    )

    for path in tqdm(files, desc=f'Procesando {folder}'):
        ext = path.suffix.lower()
        file = laspy.read(path)

        min_coords = np.array([file.x.min(), file.y.min(), file.z.min()], dtype=np.int64)
        mins_world = min_coords * file.header.scales + file.header.offsets
        file.header.offsets -= mins_world

        intensity = np.array(file.intensity)
        min_intensity = np.min(intensity)
        max_intensity = np.max(intensity)

        file.add_extra_dims([
            laspy.ExtraBytesParams(name="norm_intensity", type=np.float32),
            laspy.ExtraBytesParams(name="semantic_pred", type=np.int16),
            laspy.ExtraBytesParams(name="instance_pred", type=np.int16)
        ])

        file.norm_intensity = (intensity - min_intensity) / (max_intensity - min_intensity)
        yield ext, file

def chunkerize(file, chunk_size = 25):
    xy = np.stack([file.x, file.y], axis=1)
    chunk_idx = np.floor((xy - xy.min(axis=0)) / chunk_size).astype(int)
    chunk_keys, inverse = np.unique(chunk_idx, axis=0, return_inverse=True)
    
    chunk_masks = []
    for i in range(len(chunk_keys)):
        mask = inverse == i
        xy_chunk = xy[mask]

        min_chunk = xy_chunk.min(axis=0)
        max_chunk = xy_chunk.max(axis=0)
        span = max_chunk - min_chunk
        
        if np.all(span < 0.8 * chunk_size):
            continue

        chunk_masks.append(mask)

    return chunk_masks

def chunkerize_clean(file, chunk_size):
    xy = np.stack([file.x, file.y], axis=1)
    labels = file.instance_pred
    unique_labels = np.unique(labels)
    unique_labels = unique_labels[unique_labels != 0]

    centers = []
    for label in unique_labels:
        centers.append(xy[labels == label].mean(axis=0))

    if not centers:
        return [np.ones_like(file.instance_pred, dtype=bool)]
    
    if len(centers) == 1:
        return [(xy[:, 0] >= centers[0][0] - (chunk_size / 2)) & (xy[:, 0] <= centers[0][0] + (chunk_size / 2)) & \
            (xy[:, 1] >= centers[0][1] - (chunk_size / 2)) & (xy[:, 1] <= centers[0][1] + (chunk_size / 2))]

    centers = np.array(centers)
    while True:
        distances = np.abs(centers[None, :] - centers[:, None])
        distances = np.min(distances, axis=-1)
        mask = np.triu(np.ones_like(distances, dtype=bool), k=1)
        idxs = np.argwhere(mask)
        vals = distances[mask]
        flat_idx = vals.argmin()
        val = vals[flat_idx]
        if val > (chunk_size / 2) * 0.6:
            break

        i, j = idxs[flat_idx]
        row_proximity = distances[i, :].sum()
        col_proximity = distances[:, j].sum()
        centers = np.delete(centers, i if row_proximity < col_proximity else j, axis=0)

    masks = []
    for center in centers:
        masks.append((xy[:, 0] >= center[0] - (chunk_size / 2)) & (xy[:, 0] <= center[0] + (chunk_size / 2)) & \
            (xy[:, 1] >= center[1] - (chunk_size / 2)) & (xy[:, 1] <= center[1] + (chunk_size / 2)))
        
    return masks

def chunkerize_four(file):
    xy = np.stack([file.x, file.y], axis=1)
    center = xy.mean(axis=0)

    return [
        (xy[:, 0] > center[0]) & (xy[:, 1] > center[1]),
        (xy[:, 0] < center[0]) & (xy[:, 1] > center[1]),
        (xy[:, 0] < center[0]) & (xy[:, 1] < center[1]),
        (xy[:, 0] > center[0]) & (xy[:, 1] < center[1])
    ]


In [5]:
for_instance_classes = {
    'unclassified': 0,
    'low_vegetation': 1,
    'terrain': 2,
    'out_points': 3,
    'stem': 4,
    'live_branches': 5,
    'woody_branches': 6
}

for i, (ext, file) in enumerate(load_point_clouds(for_instance_folder[0])):
    mask = (file.classification != for_instance_classes['out_points']) & (file.classification != for_instance_classes['unclassified'])  # Eliminamos puntos no clasificados o inválidos
    file.points = file.points[mask]

    semantic_labels = np.array(file.classification)
    remap = np.copy(semantic_labels)

    remap = np.where(semantic_labels == for_instance_classes['low_vegetation'], mixed_classes['terrain'], remap)
    remap = np.where(semantic_labels == for_instance_classes['terrain'], mixed_classes['terrain'], remap)
    remap = np.where(semantic_labels == for_instance_classes['stem'], mixed_classes['stem'], remap)
    remap = np.where(semantic_labels == for_instance_classes['live_branches'], mixed_classes['canopy'], remap)
    remap = np.where(semantic_labels == for_instance_classes['woody_branches'], mixed_classes['canopy'], remap)

    file.semantic_pred = remap
    file.instance_pred = file.treeID
    chunk_masks = chunkerize_four(file)
    for j, mask in enumerate(chunk_masks):
        pts_chunk = file.points[mask]
        instance_labels_chunk = file.treeID[mask]
        unique_vals, inv = np.unique(instance_labels_chunk, return_inverse=True)

        out = laspy.create(point_format=file.point_format, file_version=file.header.version)
        out.header.scales = file.header.scales
        out.header.offsets = file.header.offsets

        out.points = pts_chunk
        out.instance_pred = inv
        out.write(for_instance_folder[1] / f'plot_FORinstance_{i}_{j}.las')


Procesando /home/samuel/tree_projector/datasets/FORinstance_dataset: 100%|██████████| 28/28 [01:09<00:00,  2.47s/it]


In [6]:
for i, (ext, file) in enumerate(load_point_clouds(for_instance_big_folder[0])):
    mask = (file.classification != for_instance_classes['out_points']) & (file.classification != for_instance_classes['unclassified'])  # Eliminamos puntos no clasificados o inválidos
    file.points = file.points[mask]

    semantic_labels = np.array(file.classification)
    remap = np.copy(semantic_labels)

    remap = np.where(semantic_labels == for_instance_classes['low_vegetation'], mixed_classes['terrain'], remap)
    remap = np.where(semantic_labels == for_instance_classes['terrain'], mixed_classes['terrain'], remap)
    remap = np.where(semantic_labels == for_instance_classes['stem'], mixed_classes['stem'], remap)
    remap = np.where(semantic_labels == for_instance_classes['live_branches'], mixed_classes['canopy'], remap)
    remap = np.where(semantic_labels == for_instance_classes['woody_branches'], mixed_classes['canopy'], remap)

    file.semantic_pred = remap
    file.instance_pred = file.treeID
    chunk_masks = chunkerize_clean(file, chunk_size=12.5)
    for j, mask in enumerate(chunk_masks):
        pts_chunk = file.points[mask]
        instance_labels_chunk = file.treeID[mask]
        unique_vals, inv = np.unique(instance_labels_chunk, return_inverse=True)

        out = laspy.create(point_format=file.point_format, file_version=file.header.version)
        out.header.scales = file.header.scales
        out.header.offsets = file.header.offsets

        out.points = pts_chunk
        out.instance_pred = inv
        out.write(for_instance_big_folder[1] / f'plot_FORinstance_big_{i}_{j}.las')


Procesando /home/samuel/tree_projector/datasets/FORinstance_big_dataset: 100%|██████████| 4/4 [00:10<00:00,  2.68s/it]


In [10]:
nibio_mls_classes = {
    'ground': 1,
    'vegetation': 2,
    'lying_deadwood': 3,
    'stems': 4
}

for i, (ext, file) in enumerate(load_point_clouds(nibio_mls_folder[0])):
    semantic_labels = np.array(file.label)
    remap = np.copy(semantic_labels)

    remap = np.where(semantic_labels == nibio_mls_classes['ground'], mixed_classes['terrain'], remap)
    remap = np.where((semantic_labels == nibio_mls_classes['vegetation']) & (file.treeID == 0), mixed_classes['terrain'], remap)
    remap = np.where((semantic_labels == nibio_mls_classes['vegetation']) & (file.treeID != 0), mixed_classes['canopy'], remap)
    remap = np.where(semantic_labels == 3, mixed_classes['terrain'], remap)
    remap = np.where(semantic_labels == 4, mixed_classes['stem'], remap)

    file.semantic_pred = remap
    file.instance_pred = file.treeID
    file.write(nibio_mls_folder[1] / f'plot_NIBIO_MLS_{i}{ext}')


Procesando /home/samuel/tree_projector/datasets/NIBIO_MLS: 100%|██████████| 64/64 [00:48<00:00,  1.33it/s]


In [None]:
a_priori_chunks = 0
complete_chunks = 0
valid_chunks = 0

for i, (ext, file) in enumerate(load_point_clouds(ehydro_folder[0])):
    semantic_labels = np.array(file.classification)
    remap = np.full_like(semantic_labels, ehydro_classes['others'])
    instance_labels = np.array(file.PredInstance)
    
    ids = np.unique(instance_labels)
    ids = ids[ids != 0]

    remap = np.where(instance_labels == 0, ehydro_classes['terrain'], remap)
    for id in ids:
        mask = instance_labels == id
        z = np.asarray(file.z[mask])
        if z.mean() - z.min() > 6.0:
            remap[mask] = ehydro_classes['tree']
        else:
            remap[mask] = ehydro_classes['low_vegetation']

    remap = np.where(semantic_labels == 6, ehydro_classes['others'], remap)
    file.semantic_pred = remap
    file.instance_pred = instance_labels

    chunk_masks = chunkerize(file)
    for mask in chunk_masks:
        pts_chunk = file.points[mask]
        instance_labels_chunk = instance_labels[mask]
        semantic_pred_chunk = remap[mask]

        uniq = np.unique(instance_labels_chunk)
        if len(uniq) < 5 or len(np.unique(semantic_pred_chunk)) < 3:
            continue

        instance_labels_chunk = uniq.searchsorted(instance_labels_chunk)

        out = laspy.create(point_format=file.point_format, file_version=file.header.version)
        out.header.scales = file.header.scales
        out.header.offsets = file.header.offsets

        out.points = pts_chunk
        out.instance_pred = instance_labels_chunk
        out.write(ehydro_folder[1] / f'plot_ehydro_{i}_{i}.las')
    
    file.write(ehydro_folder[2] / f'plot_ehydro_{i}.las')

print(f'Chunks a priori: {a_priori_chunks}')
print(f'Chunks completos, con al menos un 80% del tamaño requerido: {complete_chunks} ({((complete_chunks / a_priori_chunks) * 100):.2f}%)')
print(f'Chunks válidos, con al menos tres instancias y tres clases presentes: {valid_chunks} ({((valid_chunks / a_priori_chunks) * 100):.2f}%)')


8it [04:15, 31.98s/it]

Chunks a priori: 1120
Chunks completos, con al menos un 80% del tamaño requerido: 1021 (91.16%)
Chunks válidos, con al menos tres instancias y tres clases presentes: 615 (54.91%)



