In [2]:
import os
import laspy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor
from get_file_matches import get_als_dop_matches, get_tif_file_matches


In [3]:
root_dir = "data/Tschernitz"
folder1 = "dop"
folder2 = "ground_truth_masks/forest_masks"

# Match stacked TIF and ground truth mask
dop_label_matches = get_tif_file_matches(root_dir, folder1, folder2, contains2="merged")

Folder1 Path: data/Tschernitz\dop
Folder2 Path: data/Tschernitz\ground_truth_masks/forest_masks


In [4]:
als_folder = "data/Tschernitz/als"
dop_folder = "data/Tschernitz/dop"
output_base_dir = "data\Tschernitz\output\dem_chm_slope_aspect_bands"

als_dop_matches = get_als_dop_matches(als_folder, dop_folder)

In [None]:
def visualize_lidar_points(las_file, mode='3D', num_points=100000):
    """
    Visualize LiDAR point cloud data in 2D or 3D.
    
    Args:
        las_file (str): Path to the LAS/LAZ file.
        mode (str): '2D' for top-down view, '3D' for full point cloud visualization.
        num_points (int): Number of points to visualize (default = 100,000).
    """
    # Load LiDAR data
    las = laspy.read(las_file)
    points = np.vstack((las.x, las.y, las.z)).T


    x, y, z = points[:, 0], points[:, 1], points[:, 2]

    if mode == '2D':
        plt.figure(figsize=(20,16))
        plt.scatter(x, y, c=z, cmap='terrain', s=0.5, alpha=0.5)
        plt.colorbar(label="Elevation (m)")
        plt.xlabel("X Coordinate")
        plt.ylabel("Y Coordinate")
        plt.title("LiDAR Point Cloud (Top-Down 2D View)")
        plt.axis("equal")
        plt.show()

    elif mode == '3D':
        fig = plt.figure(figsize=(20, 16))
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(x, y, z, c=z, cmap='terrain', s=0.5, alpha=0.5)

        # Labels and title
        ax.set_xlabel("X Coordinate")
        ax.set_ylabel("Y Coordinate")
        ax.set_zlabel("Elevation (m)")
        ax.set_title("LiDAR Point Cloud (3D Visualization)")

        plt.show()
    else:
        raise ValueError("Invalid mode. Choose '2D' or '3D'.")



In [None]:
visualize_lidar_points(als_file, mode='2D')  # 2D visualization

In [None]:
visualize_lidar_points(als_file, mode='3D')  # 3D visualization

In [2]:
# Function to extract characteristics of LAS data
def extract_las_characteristics(file_path):
    """
    Extract characteristics such as point count, bounding box, intensity range, and classification counts.
    """
    las = laspy.read(file_path)
    
    # Extract basic information
    point_count = len(las.points)
    bounding_box = {
        "X_min": np.min(las.x),
        "X_max": np.max(las.x),
        "Y_min": np.min(las.y),
        "Y_max": np.max(las.y),
        "Z_min": np.min(las.z),
        "Z_max": np.max(las.z),
    }
    intensity_range = {
        "Intensity_min": np.min(las.intensity),
        "Intensity_max": np.max(las.intensity),
    }
    classification_counts = {
        "Classification": dict(zip(*np.unique(las.classification, return_counts=True)))
    }
    
    # Combine into a single dictionary
    characteristics = {
        "File": os.path.basename(file_path),
        "Point_Count": point_count,
        **bounding_box,
        **intensity_range,
        **classification_counts,
    }
    return characteristics



def check_invalid_coordinates(file_path):
    try:
        las = laspy.read(file_path)
        x_invalid = (las.header.mins[0] > las.x) | (las.header.maxs[0] < las.x)
        y_invalid = (las.header.mins[1] > las.y) | (las.header.maxs[1] < las.y)
        z_invalid = (las.header.mins[2] > las.z) | (las.header.maxs[2] < las.z)
        total_invalid = np.sum(x_invalid | y_invalid | z_invalid)
        total_points = len(las.points)
        return total_invalid, total_points
    except Exception as e:
        return 0, 0


def process_all_laz_files(laz_data_path_list):
    with ThreadPoolExecutor(max_workers=4) as executor:
        characteristics = list(executor.map(extract_las_characteristics, laz_data_path_list))
        coordinate_check_results = list(executor.map(check_invalid_coordinates, laz_data_path_list))
    
    df1 = pd.DataFrame(characteristics)
    df2 = pd.DataFrame([{
        "File": os.path.basename(file_path),
        "Invalid_Points": invalid,
        "Total_Points": total,
        "Percentage_Invalid": (invalid / total * 100) if total > 0 else 0
    } for (file_path, (invalid, total)) in zip(laz_data_path_list, coordinate_check_results)])
    
    return pd.merge(df1, df2, on="File", how="left")




In [3]:
def visualize_features_per_file(file_paths, subsample_ratio=0.1):
    """
    Load LAZ files, subsample, and visualize features for each file.
    """
    for file_path in file_paths:
        print(f"Processing: {file_path}")
        las = laspy.read(file_path)
        num_points = len(las.x)
        sample_size = int(num_points * subsample_ratio)
        indices = np.random.choice(num_points, sample_size, replace=False)

        # Extract features
        points = np.vstack((las.x[indices], las.y[indices], las.z[indices])).T
        intensity = las.intensity[indices]
        classification = las.classification[indices]

        # Normalize height for visualization
        height = points[:, 2]
        normalized_height = (height - height.min()) / (height.max() - height.min())

        # Visualize histograms
        fig, axes = plt.subplots(1, 3, figsize=(18, 5))

        # Intensity Histogram (Logarithmic)
        axes[0].hist(np.log1p(intensity), bins=50, color='blue', alpha=0.7)
        axes[0].set_title(f"Log-Scaled Intensity Histogram\n{file_path}")
        axes[0].set_xlabel("Log(Intensity)")
        axes[0].set_ylabel("Count")

        # Height Histogram
        axes[1].hist(height, bins=50, color='green', alpha=0.7)
        axes[1].set_title(f"Height Histogram\n{file_path}")
        axes[1].set_xlabel("Height")
        axes[1].set_ylabel("Count")

        # Classification Histogram
        axes[2].hist(classification, bins=np.arange(classification.min(), classification.max() + 2) - 0.5,
                     color='orange', alpha=0.7)
        axes[2].set_title(f"Classification Histogram\n{file_path}")
        axes[2].set_xlabel("Class ID")
        axes[2].set_ylabel("Count")

        plt.tight_layout()
        plt.show()

In [None]:
def visualize_classification_masks(data):
    """
    Visualize all unique classification classes in the LiDAR data one by one as individual masks.

    Parameters:
        data (dict): LiDAR data containing points and classification feature.
    """
    points = data['points']
    x, y = points[:, 0], points[:, 1]
    classification = data['classification']

    # Get unique classification values
    unique_classes = np.unique(classification)

    for cls in unique_classes:
        # Mask for the current classification
        mask = classification == cls
        x_cls = x[mask]
        y_cls = y[mask]

        # Create scatter plot for the current class
        plt.figure(figsize=(10, 8))
        plt.scatter(x_cls, y_cls, color='blue', s=1, label=f'Class {cls}')
        plt.title(f"LiDAR Classification Mask for Class {cls}")
        plt.xlabel("X Coordinate")
        plt.ylabel("Y Coordinate")
        plt.legend(loc='upper right')
        plt.tight_layout()
        plt.show()