## Analyze results
This notebook takes the outcomes of `1. Tree filter.ipynb`, `3. Extract tree shapes.ipynb` and `4. Extract tree trunks.ipynb`. It compares these results against ground thruth data and presents some statistics and visualizations. 

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd

import fiona
from shapely import wkt

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import contextily as cx

In [None]:
DATA_DIR = '/home/aiteam/projects/BOA/Tree_Detection_in_Aerial_Point_Clouds/datasets/'

# Inputs paths
beheer_file = DATA_DIR + 'measurements/Beheerkaart_Eigendomsrecht.gpkg'
measure_file = DATA_DIR + 'measurements/Ground_Truth_Resultaten_Opnames_V2.gpkg'
area_layer = 'Daadwerkelijke opnamegrenzen'
measure_layer = 'Boompunten'

ahn_results_file = DATA_DIR + 'HGB/Trunks/Output_centroids_only_127_481_127_482.shp' 
#ahn_results_file = DATA_DIR + 'HGB/Trunks/Output_centroids_only_124_486_125_486.shp' 

ahn_results_area_file = DATA_DIR + 'HGB/Shapes/trees_alpha_1.75_20.csv' 

# Output paths
output_image = DATA_DIR + 'output_images/final_results_vooropname_ahn.png'
output_image_analysis = DATA_DIR + 'output_images/final_results_vooropname_ahn_analysis.png'

In [None]:
CRS = 'epsg:28992'

# Maximum distance to be identified as the same tree
max_dist = 1.3

### Import data

#### Areas

In [None]:
# Measurement area
df_area = gpd.read_file(measure_file, layer=area_layer) 

In [None]:
# TODO remove
df_area = df_area[df_area['GebiedNummer'].isin(['1', '2', '3'])]
#df_area = df_area[df_area['GebiedNummer'].isin(['A', 'B', 'C'])]

In [None]:
# Beheer area
df_beheer = gpd.read_file(beheer_file)

#### Measurements

In [None]:
df = gpd.read_file(measure_file, layer=measure_layer) 
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['X_GNSS'], df['Y_GNSS']), crs=CRS)

#### AHN results

##### trunks

In [None]:
df_ahn = gpd.read_file(ahn_results_file)
df_ahn = df_ahn.set_crs(CRS, allow_override=True)

##### shapes

In [None]:
use_concave = True
df_ahn_areas = gpd.read_file(ahn_results_area_file, crs=CRS)
df_ahn_areas['location'] = df_ahn_areas['location'].apply(wkt.loads)
df_ahn_areas['convex_hull'] = df_ahn_areas['convex_hull'].apply(wkt.loads)
if use_concave:
    df_ahn_areas['concave_hull'] = df_ahn_areas['concave_hull'].apply(wkt.loads)
    df_ahn_areas.set_geometry('concave_hull', inplace=True)
else:
    df_ahn_areas.set_geometry('convex_hull', inplace=True)
df_ahn_areas.drop('geometry', axis=1, inplace=True)
df_ahn_areas = df_ahn_areas.set_crs(CRS)
df_ahn_areas.drop_duplicates(['convex_hull', 'concave_hull'], inplace=True)

### Pre-process data

In [None]:
# Get area we are going to work with 
df_beheer_sel = gpd.clip(df_beheer, df_area)

# Select trees that are within the 'beheergrenzen' in the 'vooropname gebied'
gdf_sel = gdf.sjoin(df_beheer_sel[['geometry']], predicate='within').drop(['index_right'], axis=1)
df_ahn_sel = df_ahn.sjoin(df_beheer_sel[['geometry']], predicate='within').drop(['index_right'], axis=1)

df_ahn_areas_sel = gpd.clip(df_ahn_areas, df_beheer_sel)
df_ahn_areas_sel_dis = gpd.GeoDataFrame(geometry=gpd.GeoSeries(df_ahn_areas_sel.unary_union.geoms))

del df_beheer

### Get results

#### Find matches

In [None]:
# Get nearest measured tree to predicted trees
df_ahn_sjoin = df_ahn_sel.sjoin_nearest(gdf_sel[['objectid', 'geometry']], distance_col='afstand', how='left')

In [None]:
# Only keep cases where predicted trees are also closest to measured trees 
df_ahn_sjoin.sort_values(['objectid', 'afstand'], inplace=True)
df_ahn_sjoin.drop_duplicates(subset=['objectid'], keep='first', inplace=True)

In [None]:
# Drop cases where the distance between measured and predicated tree is too large
df_ahn_sjoin = df_ahn_sjoin[df_ahn_sjoin['afstand'] < max_dist]

#### Calculate overall statistics

In [None]:
true_positives = gdf_sel[gdf_sel['objectid'].isin(df_ahn_sjoin['objectid'])].reset_index(drop=True)
false_negatives = gdf_sel[~gdf_sel['objectid'].isin(true_positives['objectid'])].reset_index(drop=True)
false_positives = df_ahn_sel[~df_ahn_sel['label'].isin(df_ahn_sjoin['label'])].reset_index(drop=True)

In [None]:
print('measured: ' + str(len(gdf_sel)))
print('predicted: ' + str(len(df_ahn_sel)))
print('TP: ' + str(len(true_positives)))
print('FN: ' + str(len(false_negatives)))
print('FP: ' + str(len(false_positives)))
print('precision: ' + str(round(len(true_positives) / (len(true_positives) + len(false_positives)), 2)))
print('recall: ' + str(round(len(true_positives) / (len(true_positives) + len(false_negatives)), 2)))

#### Calculate statistics per cluster

In [None]:
# Calculate measurement count per cluster
cw_count = len(gdf_sel[gdf_sel['Type_Cluster_Technisch'] == 'Cluster - Weinig'])
cv_count = len(gdf_sel[gdf_sel['Type_Cluster_Technisch'] == 'Cluster - Veel'])
s_count = len(gdf_sel[gdf_sel['Type_Cluster_Technisch'] == 'Solitair'])
weide_count = len(gdf_sel[gdf_sel['Type_Cluster_Behoefte'] == 'Weide'])
wijk_count = len(gdf_sel[gdf_sel['Type_Cluster_Behoefte'] == 'Wijk'])
bos_count = len(gdf_sel[gdf_sel['Type_Cluster_Behoefte'] == 'Bosplantsoen'])

# Calculate true positives per cluster
tp_cw = true_positives[true_positives['Type_Cluster_Technisch'] == 'Cluster - Weinig']
tp_cv = true_positives[true_positives['Type_Cluster_Technisch'] == 'Cluster - Veel']
tp_s = true_positives[true_positives['Type_Cluster_Technisch'] == 'Solitair']
tp_weide = true_positives[true_positives['Type_Cluster_Behoefte'] == 'Weide']
tp_wijk = true_positives[true_positives['Type_Cluster_Behoefte'] == 'Wijk']
tp_bos = true_positives[true_positives['Type_Cluster_Behoefte'] == 'Bosplantsoen']

# Calculate false negatives per cluster
fn_cw = false_negatives[false_negatives['Type_Cluster_Technisch'] == 'Cluster - Weinig']
fn_cv = false_negatives[false_negatives['Type_Cluster_Technisch'] == 'Cluster - Veel']
fn_s = false_negatives[false_negatives['Type_Cluster_Technisch'] == 'Solitair']
fn_weide = false_negatives[false_negatives['Type_Cluster_Behoefte'] == 'Weide']
fn_wijk = false_negatives[false_negatives['Type_Cluster_Behoefte'] == 'Wijk']
fn_bos = false_negatives[false_negatives['Type_Cluster_Behoefte'] == 'Bosplantsoen']

In [None]:
print('TP cluster veel: ' + str(len(tp_cv)) + ' (/' + str(cv_count) +')')
print('recall cluster veel: ' + str(round(len(tp_cv) / (len(tp_cv) + len(fn_cv)), 2)))
print('TP cluster weinig: ' + str(len(tp_cw)) + ' (/' + str(cw_count) +')')
print('recall cluster weinig: ' + str(round(len(tp_cw) / (len(tp_cw) + len(fn_cw)), 2)))
print('TP solitair: ' + str(len(tp_s)) + ' (/' + str(s_count) +')')
print('recall solitair: ' + str(round(len(tp_s) / (len(tp_s) + len(fn_s)), 2)))

In [None]:
print('TP bosplantsoen: ' + str(len(tp_bos)) + ' (/' + str(bos_count) +')')
print('recall bosplantsoen: ' + str(round(len(tp_bos) / (len(tp_bos) + len(fn_bos)), 2)))
print('TP weide: ' + str(len(tp_weide)) + ' (/' + str(weide_count) +')')
print('recall weide: ' + str(round(len(tp_weide) / (len(tp_weide) + len(fn_weide)), 2)))
print('TP wijk: ' + str(len(tp_wijk)) + ' (/' + str(wijk_count) +')')
print('recall wijk: ' + str(round(len(tp_wijk) / (len(tp_wijk) + len(fn_wijk)), 2)))

### Plot results

In [None]:
fig, ax = plt.subplots(figsize=(12,12), frameon=False, dpi=500)

# Area
df_area.boundary.plot(ax=ax, color='blue')
df_beheer_sel.boundary.plot(ax=ax, color='blue', alpha=0.6)

# AHN results - areas
df_ahn_areas_sel_dis.plot(ax=ax, color='purple', alpha=0.3)

# Measurements
gdf_sel.plot(ax=ax, color='yellow', alpha=0.2, markersize=16)

# AHN results - trunks
df_ahn_sel.plot(ax=ax, color='purple', alpha=0.7, markersize=6)

# Background
cx.add_basemap(ax=ax, source=cx.providers.Esri.WorldImagery, crs=CRS)

ax.axis('off')

# Create legend
ar = mpatches.Patch(facecolor='silver', edgecolor='blue', label='area')
me = mpatches.Patch(color='yellow', alpha=0.5, label='measurements')
pr1 = mpatches.Patch(color='purple', label='predictions (AHN) - trunks')
pr2 = mpatches.Patch(color='purple', alpha=0.3, label='predictions (AHN) - trees')
plt.legend(handles=[ar,me,pr1,pr2], loc='lower right')

plt.savefig(output_image, bbox_inches='tight')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12,12), frameon=False, dpi=500)

# Area
df_area.boundary.plot(ax=ax, color='blue')
df_beheer_sel.boundary.plot(ax=ax, color='blue', alpha=0.6)

# AHN results - areas
df_ahn_areas_sel_dis.plot(ax=ax, color='purple', alpha=0.3)

# Measurements & predictions
false_negatives.plot(ax=ax, color='red', alpha=0.6, markersize=26)
false_positives.plot(ax=ax, color='black', alpha=0.6, markersize=26)
true_positives.plot(ax=ax, color='green', alpha=0.8, markersize=26)

# Background
cx.add_basemap(ax=ax, source=cx.providers.Esri.WorldImagery, crs=CRS)

ax.axis('off')

# Create legend
ar = mpatches.Patch(facecolor='silver', edgecolor='blue', label='area')
tp = mpatches.Patch(color='green', label='true positive')
fp = mpatches.Patch(color='black', alpha=0.6, label='false positive')
fn = mpatches.Patch(color='red', alpha=0.6, label='false negative')
pr = mpatches.Patch(color='purple', alpha=0.3, label='predictions (AHN) - trees')
plt.legend(handles=[ar,tp,fp,fn,pr], loc='lower right')

plt.savefig(output_image_analysis, bbox_inches='tight')
plt.show()

## Inspect individual trees

In [None]:
import laspy as lp
import shapely.geometry as sg
import matplotlib.image as mpimg
from scipy import ndimage
import pathlib
import open3d as o3d

### Load data

In [None]:
#my_tile_1 = '127_481'
#my_tile_2 = '127_482'
my_tile_1 = '124_486'
my_tile_2 = '125_486'

# Input
input_las_1 = DATA_DIR + 'HGB/AMS_subtiles_1000_reduced/trees_' + my_tile_1 + '.laz'
input_las_2 = DATA_DIR + 'HGB/AMS_subtiles_1000_reduced/trees_' + my_tile_2 + '.laz'

In [None]:
# Load the point cloud data
las_file_1 = lp.read(input_las_1)
las_file_2 = lp.read(input_las_2)

### Prepare data

In [None]:
# concatenate the file coordinates
coord_1 = np.c_[las_file_1.x, las_file_1.y, las_file_1.hag]  
coord_2 = np.c_[las_file_2.x, las_file_2.y, las_file_2.hag]
coord_all = np.concatenate([coord_1, coord_2])

# Take only 'tree' points
las_file_1.points = las_file_1.points[las_file_1.label == 1]
las_file_2.points = las_file_2.points[las_file_2.label == 1]

# concatenate the file coordinates - with only 'tree' points
coord_1 = np.c_[las_file_1.x, las_file_1.y, las_file_1.hag]
coord_2 = np.c_[las_file_2.x, las_file_2.y, las_file_2.hag]
coord = np.concatenate([coord_1, coord_2])

del coord_1
del coord_2

In [None]:
# Downsample the data
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(coord)
pcd_down = pcd.voxel_down_sample(voxel_size=0.5)  
coord_red = np.asarray(pcd_down.points)

del pcd
del pcd_down

In [None]:
# Prepare dataframes for selecting points of the point cloud
df_coord_all = pd.DataFrame(coord_all)
gdf_coord_all = gpd.GeoDataFrame(df_coord_all, geometry=gpd.points_from_xy(df_coord_all[0], df_coord_all[1]), crs=CRS)
del coord_all
del df_coord_all

In [None]:
df_coord = pd.DataFrame(coord)
gdf_coord = gpd.GeoDataFrame(df_coord, geometry=gpd.points_from_xy(df_coord[0], df_coord[1]), crs=CRS)
del coord
del df_coord

In [None]:
df_coord_red = pd.DataFrame(coord_red)
gdf_coord_red = gpd.GeoDataFrame(df_coord_red, geometry=gpd.points_from_xy(df_coord_red[0], df_coord_red[1]), crs=CRS)
del coord_red
del df_coord_red

In [None]:
len(false_negatives)

In [None]:
len(true_positives)

In [None]:
false_negatives = false_negatives.tail(10)

##### Loop

In [None]:
for index, row in false_negatives.iterrows():
#for index, row in true_positives.iterrows():    
    print(index)
    output_image_fn = DATA_DIR + 'output_images/fn_sampling/fn_plot_' + str(index) + '.png'
    #output_image_fn = DATA_DIR + 'output_images/tp/tp_plot_' + str(index) + '.png'
    
    # Get picture file
    picture_name = row['ID_foto1'] + '.jpg'
    picture_file = DATA_DIR + 'measurements/pictures/' + picture_name

    # Get tree location
    fn_geom = row['geometry']
    
    # Select area for aerial plot
    fn_geom_buffer = fn_geom.buffer(distance=10, cap_style=3)
    x_min = sg.Polygon(fn_geom_buffer).bounds[0] 
    x_max = sg.Polygon(fn_geom_buffer).bounds[2]
    y_min = sg.Polygon(fn_geom_buffer).bounds[1]
    y_max = sg.Polygon(fn_geom_buffer).bounds[3]
    
    # Select points for point cloud plot
    fn_geom_buffer = fn_geom.buffer(distance=4, cap_style=3)
    fn_geom_buffer_df = gpd.GeoDataFrame({'geometry': [fn_geom_buffer]}, crs=CRS)
    gdf_coord_all_sel = gdf_coord.sjoin(fn_geom_buffer_df, predicate='within').drop(['index_right'], axis=1)
    gdf_coord_sel = gdf_coord.sjoin(fn_geom_buffer_df, predicate='within').drop(['index_right'], axis=1)
    gdf_coord_red_sel = gdf_coord_red.sjoin(fn_geom_buffer_df, predicate='within').drop(['index_right'], axis=1)
    
    # Plot
    fig = plt.figure(figsize=(12, 8))

    ax = fig.add_subplot(231)
    ax.scatter(gdf_coord_all_sel.geometry.x, gdf_coord_all_sel[2], color='silver')
    ax.scatter(gdf_coord_sel.geometry.x, gdf_coord_sel[2], color='purple')
    gdf_coord_all
    ax.scatter(fn_geom.x, 0, color='yellow')
    #ax.axis('off')

    ax = fig.add_subplot(232)
    ax.scatter(gdf_coord_all_sel.geometry.y, gdf_coord_all_sel[2], color='silver')
    ax.scatter(gdf_coord_sel.geometry.y, gdf_coord_sel[2], color='purple')
    ax.scatter(fn_geom.y, 0, color='yellow')
    #ax.axis('off')

    ax = fig.add_subplot(233)
    img = np.uint8(mpimg.imread(picture_file)) 
    rotated_img = ndimage.rotate(img, -90)
    plt.imshow(rotated_img)
    ax.axis('off')

    ax = fig.add_subplot(234)
    ax.scatter(gdf_coord_red_sel.geometry.x, gdf_coord_red_sel[2], color='purple')
    ax.scatter(fn_geom.x, 0, color='yellow')
    #ax.axis('off')

    ax = fig.add_subplot(235)
    ax.scatter(gdf_coord_red_sel.geometry.y, gdf_coord_red_sel[2], color='purple')
    ax.scatter(fn_geom.y, 0, color='yellow')
    #ax.axis('off')

    ax = fig.add_subplot(236)
    df_area.boundary.plot(ax=ax, color='blue')
    df_beheer_sel.boundary.plot(ax=ax, color='blue', alpha=0.6)
    df_ahn_areas_sel_dis.plot(ax=ax, color='purple', alpha=0.15)
    gdf_sel.plot(ax=ax, color='yellow', alpha=0.5, markersize=100)
    df_ahn_sel.plot(ax=ax, color='purple', alpha=0.7, markersize=40)
    cx.add_basemap(ax=ax, source=cx.providers.Esri.WorldImagery, crs=CRS)
    ax.axis('off')
    ar = mpatches.Patch(facecolor='silver', edgecolor='blue', label='Vooropnamegebied')
    me = mpatches.Patch(color='yellow', alpha=0.5, label='Metingen')
    pr = mpatches.Patch(color='purple', alpha=0.5, label='AHN voorspellingen')
    plt.legend(handles=[ar,me,pr], loc='upper center', bbox_to_anchor=(0.5, -0.05))
    ax.set_xlim([x_min, x_max])
    ax.set_ylim([y_min, y_max])

    plt.savefig(output_image_fn, bbox_inches='tight')
    plt.show()