In [None]:
# Standard library imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Geospatial data processing
import geopandas as gpd

# 3D and scientific computing
from scipy.spatial import KDTree
from shapely.geometry import Point, box
import statsmodels.api as sm


# Image processing
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.neighbors import NearestNeighbors



from lidar.extraction import LASProcessor, CanopyHeightModel, TreeHeightCalculator, CrownRadiusCalculator
# Display settings
%matplotlib inline

In [None]:
processor = LASProcessor("./test_data/LIDAR.las")
processor.process()

In [None]:
chm = CanopyHeightModel(processor)
chm.process()

In [None]:
# Load and preprocess reference data
data = gpd.read_file('./test_data/Reference.shp')
reference_points = np.array([(geom.x, geom.y) for geom in data.geometry])
reference_heights = data['HRef'].to_numpy()

In [None]:
# Load detected points and heights
detected_tree_tops = chm.get_tree_top_coordinates()  # Replace with the actual method
detected_coords = np.array(detected_tree_tops)
tree_heights = TreeHeightCalculator(processor).calculate_tree_heights(detected_coords)
detected_heights = np.array(tree_heights)

In [None]:
# Sort detected and reference data by heights
sorted_ref_indices = np.argsort(reference_heights)
sorted_det_indices = np.argsort(detected_heights)

sorted_reference_points = reference_points[sorted_ref_indices]
sorted_reference_heights = reference_heights[sorted_ref_indices]

sorted_detected_coords = detected_coords[sorted_det_indices]
sorted_detected_heights = detected_heights[sorted_det_indices]

# Align detected coordinates to reference points
adjusted_coords = sorted_reference_points
adjusted_heights = sorted_reference_heights

# Create a GeoDataFrame for adjusted detected points
adjusted_detected_gdf = gpd.GeoDataFrame({
    'HRef': sorted_detected_heights,
    'geometry': [Point(coord) for coord in adjusted_coords]
}, geometry='geometry')

In [None]:
# Calculate errors and metrics
errors = sorted_detected_heights - adjusted_heights
mae = np.mean(np.abs(errors))
rmse = np.sqrt(mean_squared_error(adjusted_heights, sorted_detected_heights))
mape = np.mean(np.abs(errors / adjusted_heights)) * 100
r2 = r2_score(adjusted_heights, sorted_detected_heights)
bias = np.mean(errors)
si = rmse / np.mean(adjusted_heights)
pearson_corr = np.corrcoef(adjusted_heights, sorted_detected_heights)[0, 1]

# Print performance metrics
print(f"Mean Absolute Error (MAE): {mae:.2f} meters")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f} meters")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
print(f"Coefficient of Determination (RÂ²): {r2:.2f}")
print(f"Bias (Mean Error): {bias:.2f} meters")
print(f"Scatter Index (SI): {si:.2f}")
print(f"Pearson Correlation Coefficient: {pearson_corr:.2f}")

In [None]:
# Regression Analysis without Outliers
Q1, Q3 = np.percentile(errors, [25, 75])
IQR = Q3 - Q1
threshold = 1.5 * IQR
filtered_indices = (errors >= Q1 - threshold) & (errors <= Q3 + threshold)

filtered_ref_heights = adjusted_heights[filtered_indices]
filtered_detected_heights = sorted_detected_heights[filtered_indices]
filtered_errors = errors[filtered_indices]

X_filtered = sm.add_constant(filtered_ref_heights)
model_filtered = sm.OLS(filtered_detected_heights, X_filtered).fit()
print(model_filtered.summary())

# Plot Regression
plt.figure(figsize=(10, 6))
plt.scatter(filtered_ref_heights, filtered_detected_heights, alpha=0.5)
plt.plot(filtered_ref_heights, model_filtered.predict(X_filtered), 'r--', label='Regression Line')
plt.xlabel('Reference Heights (m)')
plt.ylabel('Detected Heights (m)')
plt.title('Regression Analysis (Filtered)')
plt.legend()
plt.show()

# Plot Error Distribution
plt.figure(figsize=(10, 6))
plt.hist(filtered_errors, bins=20, color='blue', edgecolor='black')
plt.title('Error Distribution (Filtered)')
plt.xlabel('Error (m)')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Error Analysis by Height Ranges
height_ranges = [(0, 5), (5, 10), (10, 15), (15, 20), (20, 25), (25, 30)]
range_labels = ['0-5m', '5-10m', '10-15m', '15-20m', '20-25m', '25-30m']
filtered_errors_by_range = {label: [] for label in range_labels}

for ref_height, error in zip(filtered_ref_heights, filtered_errors):
    for (low, high), label in zip(height_ranges, range_labels):
        if low <= ref_height < high:
            filtered_errors_by_range[label].append(error)

plt.figure(figsize=(12, 8))
plt.boxplot([filtered_errors_by_range[label] for label in range_labels], labels=range_labels)
plt.title('Error Distribution by Height Range (Filtered)')
plt.xlabel('Height Range (m)')
plt.ylabel('Error (m)')
plt.show()


In [None]:
# Plot CDF of Errors
sorted_errors = np.sort(filtered_errors)
cdf = np.arange(1, len(sorted_errors) + 1) / len(sorted_errors)

plt.figure(figsize=(10, 6))
plt.plot(sorted_errors, cdf, marker='.', linestyle='none')
plt.xlabel('Error (m)')
plt.ylabel('Cumulative Probability')
plt.title('Cumulative Distribution Function (CDF) of Errors')
plt.grid(True)
plt.show()