In [None]:
import os

os.environ["XDG_SESSION_TYPE"] = "x11"
os.environ.pop("WAYLAND_DISPLAY", None)

import torch
import numpy as np
import pandas as pd

from pathlib import Path
from dotenv import load_dotenv

from EHydro_TreeUnet.trainers import TreeProjectorTrainer
from torchsparse.nn import functional as F

F.set_kmap_mode("hashmap_on_the_fly")

In [10]:
load_dotenv()

TREE_PROJECTOR_DIR = Path(os.environ.get('TREE_PROJECTOR_DIR', Path.home() / 'tree_projector'))
DATASET_FOLDER = 'MixedDataset'
VERSION_NAME = 'tree_projector_VS-0.2_DA-48_E-3_V5'

VOXEL_SIZE = 0.3
FEAT_KEYS = ['intensity']
CENTROID_SIGMA_MIN = 1.0
CENTROID_SIGMA_MAX = 4.0
CENTROID_SIGMA_DIVISOR = 18.0
TRAIN_PCT = 0.9
DATA_AUGMENTATION_COEF = 48
YAW_RANGE = (0.0, 360.0)
TILT_RANGE = (-5.0, 5.0)
SCALE_RANGE = (0.9, 1.3)

TRAINING = False
TEST_LR = []
EPOCHS = 5
START_ON_EPOCH = 0
BATCH_SIZE = 8
SEMANTIC_LOSS_COEF = 2.0
CENTROID_LOSS_COEF = 1.0
OFFSET_LOSS_COEF = 1.0
INSTANCE_LOSS_COEF = 1.0
BACKBONE_LR = 2e-3
SEMANTIC_LR = 1e-3
OFFSET_LR = 2e-2
CENTROID_LR = 1e-3
INSTANCE_LR = 2e-2
WEIGHT_DECAY = 0.04

RESNET_BLOCKS = [
    (3, 16, 3, 1),
    (3, 32, 3, 2),
    (3, 64, 3, 2),
    (3, 128, 3, 2),
    (1, 128, (1, 1, 3), (1, 1, 2)),
]
INSTANCE_DENSITY = 0.005
MIN_TREE_VOLUME = 5.0
SCORE_THRES = 0.2
CENTROID_THRES = 0.3
MAX_TREES_PER_SCENE = 64
DESCRIPTOR_DIM = 16

CHARTS_IGNORE_CLASS = []

In [None]:
trainer = TreeProjectorTrainer(
    tree_projector_dir=TREE_PROJECTOR_DIR,
    dataset_folder=DATASET_FOLDER,
    version_name=VERSION_NAME,

    voxel_size=VOXEL_SIZE,
    feat_keys=FEAT_KEYS,
    centroid_sigma_min=CENTROID_SIGMA_MIN,
    centroid_sigma_max=CENTROID_SIGMA_MAX,
    centroid_sigma_divisor=CENTROID_SIGMA_DIVISOR,
    train_pct=TRAIN_PCT,
    data_augmentation_coef=DATA_AUGMENTATION_COEF,
    yaw_range=YAW_RANGE,
    tilt_range=TILT_RANGE,
    scale_range=SCALE_RANGE,

    training=TRAINING,
    test_lr=TEST_LR,
    epochs=EPOCHS,
    start_on_epoch=START_ON_EPOCH,
    batch_size=BATCH_SIZE,
    semantic_loss_coef=SEMANTIC_LOSS_COEF,
    centroid_loss_coef=CENTROID_LOSS_COEF,
    offset_loss_coef=OFFSET_LOSS_COEF,
    instance_loss_coef=INSTANCE_LOSS_COEF,
    backbone_lr=BACKBONE_LR,
    semantic_lr=SEMANTIC_LR,
    offset_lr=OFFSET_LR,
    centroid_lr=CENTROID_LR,
    instance_lr=INSTANCE_LR,
    weight_decay=WEIGHT_DECAY,

    resnet_blocks=RESNET_BLOCKS,
    instance_density=INSTANCE_DENSITY,
    score_thres=SCORE_THRES,
    centroid_thres=CENTROID_THRES,
    max_trees_per_scene=MAX_TREES_PER_SCENE,
    descriptor_dim=DESCRIPTOR_DIM
)

if TRAINING:
    trainer.train()


import open3d as o3d
import matplotlib.pyplot as plt

pcd_gt = o3d.geometry.PointCloud()
pcd_pred = o3d.geometry.PointCloud()

for eval_result in trainer.eval():
    batch_idx = eval_result['semantic_output'].C.cpu().numpy()[:, 0]
    centroid_batch_idx = eval_result['centroid_confidence_output'].C.cpu().numpy()[:, 0]

    voxels = eval_result['semantic_output'].C.cpu().numpy()[:, 1:]
    centroid_voxels = eval_result['centroid_confidence_output'].C.cpu().numpy()[:, 1:]

    semantic_output = torch.argmax(eval_result['semantic_output'].F.cpu(), dim=1).numpy()
    semantic_labels = eval_result['semantic_labels'].F.cpu().numpy()

    semantic_mask = semantic_labels != 0
    centroid_score_output = np.zeros((voxels.shape[0], 1), dtype=float)
    centroid_score_output[semantic_mask] = eval_result['centroid_score_output'].F.cpu().numpy()
    centroid_score_labels = np.zeros((voxels.shape[0], 1), dtype=float)
    centroid_score_labels[semantic_mask] = eval_result['centroid_score_labels'].F.cpu().numpy()

    centroid_confidence_output = eval_result['centroid_confidence_output'].F.cpu().numpy()

    offset_output = np.zeros((voxels.shape[0], 3), dtype=float)
    offset_output[semantic_mask] = eval_result['offset_output'].F.cpu().numpy()
    offset_labels = np.zeros((voxels.shape[0], 3), dtype=float)
    offset_labels[semantic_mask] = eval_result['offset_labels'].F.cpu().numpy()

    instance_output_tmp = torch.argmax(eval_result['instance_output'].F.cpu(), dim=1).numpy()
    instance_output = np.full(voxels.shape[0], fill_value=-1, dtype=int)
    instance_output[semantic_mask] = instance_output_tmp

    gt_indices = eval_result['remap_info']['gt_indices'].cpu().numpy()
    pred_indices = eval_result['remap_info']['pred_indices'].cpu().numpy()
    num_instances = eval_result['remap_info']['num_instances']

    lut = np.full(num_instances, fill_value=-1, dtype=int)
    lut[gt_indices] = pred_indices

    start = pred_indices.max() + 1 if pred_indices.size else 0
    unmatched = np.setdiff1d(np.arange(num_instances), gt_indices)
    lut[unmatched] = np.arange(start, start + unmatched.size)
    instance_labels_tmp = lut[eval_result['instance_labels'].F.cpu().numpy()]

    instance_labels = np.full(voxels.shape[0], fill_value=-1, dtype=int)
    instance_labels[semantic_mask] = instance_labels_tmp

    rng = np.random.default_rng(0)
    palette = []
    reserved_colors = np.array([
        [0.2, 0.2, 0.2],  # Ground
        [1.0, 0.0, 0.0],  # Not matched
    ])

    while len(palette) < len(np.unique(instance_labels)):
        color = rng.random(3)
        if np.all(np.linalg.norm(reserved_colors - color, axis=1) > 0.2):
            palette.append(color)

    palette = np.array(palette)

    id2color = {uid: palette[i] for i, uid in enumerate(np.unique(instance_labels)) if i != -1}
    diff = np.setdiff1d(np.unique(instance_output), np.unique(instance_labels))
    id2color.update({uid: tuple(reserved_colors[1]) for uid in diff})
    id2color[-1] = tuple(reserved_colors[0])

    for idx in np.unique(batch_idx):
        mask = batch_idx == idx
        cloud_voxels = voxels[mask]

        cloud_semantic_output = semantic_output[mask]
        cloud_semantic_labels = semantic_labels[mask]

        cloud_centroid_score_output = centroid_score_output[mask]
        cloud_centroid_score_labels = centroid_score_labels[mask]

        cloud_offset_output = offset_output[mask]
        cloud_offset_labels = offset_labels[mask]

        cloud_instance_output = instance_output[mask]
        cloud_instance_labels = instance_labels[mask]

        mask = centroid_batch_idx == idx
        cloud_centroid_voxels = centroid_voxels[mask]
        cloud_centroid_confidence_output = centroid_confidence_output[mask]

        pcd_gt.points = o3d.utility.Vector3dVector(cloud_voxels)
        pcd_gt.translate((20 / VOXEL_SIZE, 0, 0))
        pcd_pred.points = o3d.utility.Vector3dVector(cloud_voxels)

        colors = trainer.dataset.class_colormap[cloud_semantic_labels] / 255.0
        pcd_gt.colors = o3d.utility.Vector3dVector(colors)
        colors = trainer.dataset.class_colormap[cloud_semantic_output] / 255.0
        pcd_pred.colors = o3d.utility.Vector3dVector(colors)
        o3d.visualization.draw_geometries([pcd_pred, pcd_gt])

        cmap = plt.get_cmap('viridis')
        cmap_spheres = plt.get_cmap('inferno')
        colors = cmap(cloud_centroid_score_labels[:, 0])[:, :3]
        pcd_gt.colors = o3d.utility.Vector3dVector(colors)
        colors = cmap(cloud_centroid_score_output[:, 0])[:, :3]
        pcd_pred.colors = o3d.utility.Vector3dVector(colors)

        spheres = []
        for i in np.unique(cloud_instance_output):
            if i < 0:
                continue
            
            center = centroid_voxels[i]
            confidence = centroid_confidence_output[i][0]

            sphere = o3d.geometry.TriangleMesh.create_sphere(radius=1.5)
            sphere.translate(center)
            color = cmap_spheres(confidence)[:3]
            sphere.paint_uniform_color(color)
            spheres.append(sphere)

        o3d.visualization.draw_geometries([pcd_pred, pcd_gt] + spheres)

        voxels_disp_output = cloud_voxels + cloud_offset_output
        voxels_disp_labels = cloud_voxels + cloud_offset_labels

        pcd_gt.points = o3d.utility.Vector3dVector(voxels_disp_labels)
        pcd_gt.translate((20 / VOXEL_SIZE, 0, 0))
        pcd_pred.points = o3d.utility.Vector3dVector(voxels_disp_output)

        colors = trainer.dataset.class_colormap[cloud_semantic_labels] / 255.0
        pcd_gt.colors = o3d.utility.Vector3dVector(colors)
        colors = trainer.dataset.class_colormap[cloud_semantic_output] / 255.0
        pcd_pred.colors = o3d.utility.Vector3dVector(colors)

        o3d.visualization.draw_geometries([pcd_pred, pcd_gt] + spheres)

        pcd_gt.points = o3d.utility.Vector3dVector(cloud_voxels)
        pcd_gt.translate((20 / VOXEL_SIZE, 0, 0))
        pcd_pred.points = o3d.utility.Vector3dVector(cloud_voxels)

        colors = np.array([id2color[i] for i in cloud_instance_labels], dtype=np.float64)
        pcd_gt.colors = o3d.utility.Vector3dVector(colors)
        colors = np.array([id2color[i] for i in cloud_instance_output], dtype=np.float64)
        pcd_pred.colors = o3d.utility.Vector3dVector(colors)

        o3d.visualization.draw_geometries([pcd_pred, pcd_gt] + spheres)


Parámetros totales: 16,001,308
Parámetros entrenables: 16,001,308
Resnet generates features at the following scales:
	* (0.3, 0.3, 0.3) meters -> 16 feats.
	* (0.6, 0.6, 0.6) meters -> 32 feats.
	* (1.2, 1.2, 1.2) meters -> 64 feats.
	* (2.4, 2.4, 2.4) meters -> 128 feats.
	* (2.4, 2.4, 4.8) meters -> 128 feats.

Minimum scene size: (7.2, 7.2, 14.4) meters
[Val]:   0%|          | 0/2 [00:00<?, ?it/s]
[Val]:   0%|          | 0/2 [00:00<?, ?it/s, VRAM=7.29 GB, Matched=78 / 68, TP=25 / 68, Total=9.0213 →, Semantic=0.3448 →, Centroid=2.9624 →, Offset=3.8518 →, Instance=1.8623 →]
[Val]:  50%|█████     | 1/2 [02:39<02:39, 159.77s/it, VRAM=7.29 GB, Matched=78 / 68, TP=25 / 68, Total=9.0213 →, Semantic=0.3448 →, Centroid=2.9624 →, Offset=3.8518 →, Instance=1.8623 →]
[Val]:  50%|█████     | 1/2 [02:40<02:39, 159.77s/it, VRAM=7.29 GB, Matched=55 / 49, TP=25 / 49, Total=8.8173 ↓, Semantic=0.3880 ↑, Centroid=2.3153 ↓, Offset=4.6326 ↑, Instance=1.4814 ↓]
[Val]: 100%|██████████| 2/2 [03:24<00:00, 92