In [8]:
import laspy
import numpy as np
import shutil
import random

from laspy import LasData
from tqdm import tqdm
from pathlib import Path
from typing import List

In [9]:
AUGMENTED_PER_POINT_CLOUD = 15
MIN_TREES_PER_SCENE = 4
MAX_TREES_PER_SCENE = 15
MIN_DISTANCE = 5.0
VAL_CHUNKS = 32

YAW_RANGE = (0.0, 360.0)
TILT_RANGE = (-2.0, 2.0)
SCALE_RANGE = (0.9, 1.1)

mixed_dataset = Path.home() / 'Panoramix3D_data' / 'datasets' / 'EHydroDataset'

raw_folder = mixed_dataset / 'raw'
grounds_folder = raw_folder / 'grounds'
trees_folder = raw_folder / 'trees'

processed_folder = mixed_dataset / 'processed'
train_folder = processed_folder / 'train'
val_folder = processed_folder / 'val'
test_folder = processed_folder / 'test'

grounds_folder.mkdir(parents=True, exist_ok=True)
trees_folder.mkdir(parents=True, exist_ok=True)

processed_folder.mkdir(parents=True, exist_ok=True)
train_folder.mkdir(parents=True, exist_ok=True)
val_folder.mkdir(parents=True, exist_ok=True)
test_folder.mkdir(parents=True, exist_ok=True)

files = list((raw_folder / 'train').glob('*.las'))

ground_counter = 0
tree_counter = 0

In [3]:
def save_las(file, points, folder):
    out = laspy.create(point_format=file.point_format, file_version=file.header.version)
    out.header.scales = file.header.scales
    out.header.offsets = np.array([0.0, 0.0, 0.0])
    out.points = points

    x = points.x
    y = points.y
    z = points.z

    out.x = x - np.array(x).mean()
    out.y = y - np.array(y).mean()
    out.z = z - np.array(z).min()

    global ground_counter, tree_counter
    if folder == 'ground':
        out.write(grounds_folder / f'ground_{ground_counter}.las')
        ground_counter += 1
    else:
        out.write(trees_folder / f'tree_{tree_counter}.las')
        tree_counter += 1

def find_n_points(points, n):
    if len(points) <= n:
        return points
    
    choosen_idx = []
    idx = np.arange(len(points))
    np.random.shuffle(idx)
    idx = list(idx)

    while len(choosen_idx) < n and len(idx) > 0:
        candidate = idx.pop()
        too_close = False
        for c in choosen_idx:
            dist = np.linalg.norm(points[c] - points[candidate])
            if dist < MIN_DISTANCE:
                too_close = True
                break

        if not too_close:
            choosen_idx.append(candidate)

    return points[np.array(choosen_idx)]

def load_trees(paths: Path) -> List[LasData]:
    trees = []
    for path in paths:
        with laspy.open(path) as f:
            las_data = f.read()

        trees.append(las_data)
    return trees

def chunkerize_four(file):
    xy = np.stack([file.x, file.y], axis=1)
    center = xy.mean(axis=0)

    return [
        (xy[:, 0] > center[0]) & (xy[:, 1] > center[1]),
        (xy[:, 0] < center[0]) & (xy[:, 1] > center[1]),
        (xy[:, 0] < center[0]) & (xy[:, 1] < center[1]),
        (xy[:, 0] > center[0]) & (xy[:, 1] < center[1])
    ]


In [28]:
for file in tqdm(files):
    with laspy.open(file) as f:
        ground = f.read()

    ground_mask = ground.semantic_gt == 0
    ground_points = ground.points[ground_mask]
    save_las(ground, ground_points, 'ground')

    unique_ids = np.unique(ground.instance_gt)
    unique_ids = unique_ids[unique_ids > 0]

    for id in unique_ids:
        tree_mask = ground.instance_gt == id
        tree_xyz = ground.points[tree_mask]
        save_las(ground, tree_xyz, 'tree')


100%|██████████| 24/24 [00:30<00:00,  1.26s/it]


In [5]:
ground_files = list((grounds_folder).glob('*.las'))
tree_files = list((trees_folder).glob('*.las'))

for file in tqdm(range(len(ground_files))):
    with laspy.open(ground_files[file]) as f:
        ground = f.read()

    ground_scale = np.random.uniform(*SCALE_RANGE)
    ground_yaw = np.radians(np.random.uniform(*YAW_RANGE))
    ground_tilt = np.radians(np.random.uniform(*TILT_RANGE))

    xyz = np.array(ground.xyz) * ground_scale
    xyz = xyz @ np.array([
        [np.cos(ground_yaw), -np.sin(ground_yaw), 0],
        [np.sin(ground_yaw), np.cos(ground_yaw), 0],
        [0, 0, 1]
    ])
    xyz = xyz @ np.array([
        [1, 0, 0],
        [0, np.cos(ground_tilt), -np.sin(ground_tilt)],
        [0, np.sin(ground_tilt), np.cos(ground_tilt)]
    ])

    ground.xyz = xyz

    for copy in range(AUGMENTED_PER_POINT_CLOUD):
        num_trees = np.random.randint(MIN_TREES_PER_SCENE, MAX_TREES_PER_SCENE + 1)
        choosen_points = find_n_points(xyz, num_trees)
        choosen_trees = np.random.choice(tree_files, num_trees, replace=False)
        trees = load_trees(choosen_trees)

        out_path = raw_folder / 'aug' / f'{ground_files[file].stem}_aug_{copy}.las'
        with laspy.open(out_path, mode='w', header=ground.header) as w:
            w.write_points(ground.points)

            for id, (point, tree) in enumerate(zip(choosen_points, trees)):
                tree.instance_gt = np.full_like(tree.instance_gt, fill_value=id + 1)

                tree_scale = np.random.uniform(*SCALE_RANGE)
                tree_yaw = np.radians(np.random.uniform(*YAW_RANGE))
                tree_tilt = np.radians(np.random.uniform(*TILT_RANGE))
                
                tree_xyz = np.array(tree.xyz) * tree_scale
                tree_xyz = tree_xyz @ np.array([
                    [np.cos(tree_yaw), -np.sin(tree_yaw), 0],
                    [np.sin(tree_yaw), np.cos(tree_yaw), 0],
                    [0, 0, 1]
                ])
                tree_xyz = tree_xyz @ np.array([
                    [1, 0, 0],
                    [0, np.cos(tree_tilt), -np.sin(tree_tilt)],
                    [0, np.sin(tree_tilt), np.cos(tree_tilt)]
                ])

                tree_xyz += point
                tree.xyz = tree_xyz

                w.write_points(tree.points)
    

100%|██████████| 55/55 [00:12<00:00,  4.25it/s]


In [6]:
folders = ['train', 'val', 'test', 'aug']
for folder in tqdm(folders):
    split_folder = raw_folder / folder
    files = list((split_folder).glob('*.las'))

    for file in files:
        with laspy.open(file) as f:
            las = f.read()

        masks = chunkerize_four(las)
        for i, mask in enumerate(masks):
            chunk = las.points[mask]
            out = laspy.create(point_format=las.point_format, file_version=las.header.version)
            out.header.scales = las.header.scales
            out.header.offsets = np.array([0.0, 0.0, 0.0])
            out.points = chunk

            x = chunk.x
            y = chunk.y
            z = chunk.z

            out.x = x - np.array(x).min()
            out.y = y - np.array(y).min()
            out.z = z - np.array(z).min()

            _, out.instance_gt = np.unique(chunk.instance_gt, return_inverse=True)

            out_folder = (processed_folder / folder) if folder != 'aug' else (processed_folder / 'train')
            out.write(out_folder / f'{file.stem}_chunk_{i}.las')

100%|██████████| 4/4 [00:13<00:00,  3.45s/it]


In [11]:
train_files = list((processed_folder / 'train').glob('*.las'))
print(f"📁 Archivos encontrados en train: {len(train_files)}")

if len(train_files) < VAL_CHUNKS:
    print(f"⚠️ Solo hay {len(train_files)} archivos en train, pero quieres mover {VAL_CHUNKS}")
    VAL_CHUNKS = len(train_files)
    print(f"📝 Ajustando a mover {VAL_CHUNKS} archivos")

selected_files = random.sample(train_files, VAL_CHUNKS)
print(f"🎯 Archivos seleccionados para mover a val: {len(selected_files)}")

moved_count = 0
for file_path in tqdm(selected_files, desc="Moviendo archivos a val"):
    try:
        destination = processed_folder / 'val' / file_path.name
        shutil.move(str(file_path), str(destination))
        moved_count += 1
    except Exception as e:
        print(f"❌ Error moviendo {file_path.name}: {e}")

print(f"✅ Se movieron {moved_count} archivos de train a val")
print(f"📊 Archivos restantes en train: {len(list((processed_folder / 'train').glob('*.las')))}")
print(f"📊 Archivos totales en val: {len(list((processed_folder / 'val').glob('*.las')))}")

📁 Archivos encontrados en train: 3268
🎯 Archivos seleccionados para mover a val: 32


Moviendo archivos a val: 100%|██████████| 32/32 [00:00<00:00, 14328.78it/s]

✅ Se movieron 32 archivos de train a val
📊 Archivos restantes en train: 3236
📊 Archivos totales en val: 64





In [9]:
# Verificar IDs únicos en todas las nubes de aug
aug_folder = raw_folder / 'aug'
aug_files = list(aug_folder.glob('*.las'))

print(f"Analizando {len(aug_files)} archivos en la carpeta 'aug'...")

all_unique_ids = set()
file_id_info = []

for aug_file in tqdm(aug_files, desc="Checking aug files"):
    with laspy.open(aug_file) as f:
        las = f.read()
    
    unique_ids = np.unique(las.semantic_gt)
    all_unique_ids.update(unique_ids)
    
    file_id_info.append({
        'file': aug_file.name,
        'unique_ids': unique_ids,
        'min_id': np.min(unique_ids),
        'max_id': np.max(unique_ids),
        'num_unique': len(unique_ids)
    })

print(f"\n📊 RESUMEN GLOBAL:")
print(f"IDs únicos encontrados en todo el conjunto 'aug': {sorted(all_unique_ids)}")
print(f"Rango de IDs: {min(all_unique_ids)} - {max(all_unique_ids)}")
print(f"Total de IDs únicos diferentes: {len(all_unique_ids)}")

print(f"\n📋 DETALLES POR ARCHIVO (primeros 10):")
for i, info in enumerate(file_id_info[:10]):
    print(f"{info['file']}: IDs {info['unique_ids']} (rango: {info['min_id']}-{info['max_id']}, total: {info['num_unique']})")

Analizando 825 archivos en la carpeta 'aug'...


Checking aug files: 100%|██████████| 825/825 [00:00<00:00, 847.67it/s]


📊 RESUMEN GLOBAL:
IDs únicos encontrados en todo el conjunto 'aug': [0, 1, 2]
Rango de IDs: 0 - 2
Total de IDs únicos diferentes: 3

📋 DETALLES POR ARCHIVO (primeros 10):
Ground_12_2025-10-13_17h24_08_872_aug_2.las: IDs [0 1 2] (rango: 0-2, total: 3)
Ground_8_2025-10-13_17h39_56_233_aug_14.las: IDs [0 1 2] (rango: 0-2, total: 3)
Ground_15_2025-10-13_17h36_53_125_aug_14.las: IDs [0 1 2] (rango: 0-2, total: 3)
Ground_4_2025-10-13_17h36_52_815_aug_8.las: IDs [0 1 2] (rango: 0-2, total: 3)
Ground_18_2025-10-13_17h36_53_204_aug_5.las: IDs [0 1 2] (rango: 0-2, total: 3)
Ground_28_2025-10-13_17h36_53_468_aug_6.las: IDs [0 1 2] (rango: 0-2, total: 3)
Ground_8_2025-10-13_17h36_52_923_aug_11.las: IDs [0 1 2] (rango: 0-2, total: 3)
Ground_7_2025-10-13_17h24_08_744_aug_10.las: IDs [0 1 2] (rango: 0-2, total: 3)
Ground_14_2025-10-13_17h36_53_100_aug_6.las: IDs [0 1 2] (rango: 0-2, total: 3)
Ground_5_2025-10-13_17h39_56_117_aug_2.las: IDs [0 1 2] (rango: 0-2, total: 3)



