This notebook implements FFH results assessment using manually derived groundtruth FFH values.
May need to install if not already existing: pip install openpyxl.

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from sklearn.metrics import root_mean_squared_error, mean_absolute_error
import numpy as np

### Load input files

In [None]:
groundtruth_file=r"/mnt/floorheightvolume/groundtruth/Wagga Groundtruth FFH Estimation.xlsx"
building_points_file_FFH = r'/home/ubuntu/lavender_floor_height/output/Final_Wagga_training_samples_pano_metadata_clipping_elevations_ffhs.geojson'

Building points file which has both UFI and GSV pano id (only needed for Wagga as GSV was used for groundtruth data measurement but UFI was not recorded in results):

In [None]:
building_points_file_GSV = r"/mnt/floorheightvolume/groundtruth/Final_Wagga_meta_FFH_GSV.geojson"

Residential building only:

In [None]:
gdf_building_points_FFH=gpd.read_file(building_points_file_FFH)
gdf_building_points_FFH=gdf_building_points_FFH[gdf_building_points_FFH["USAGE"]=="Residential"].reset_index(drop=True)
gdf_building_points_FFH

In [None]:
gdf_building_points_GSV=gpd.read_file(building_points_file_GSV)
gdf_building_points_GSV

Join by UFI:

In [None]:
gdf_building_points_FFH_joined = gdf_building_points_FFH.merge(gdf_building_points_GSV[['UFI','pano_id']], on='UFI', how='left')
gdf_building_points_FFH_joined

### Extract groundtruth measures for each building

In [None]:
# Combine sheet and column selection
df_groundtruth = pd.read_excel(
    groundtruth_file,
    sheet_name='Combined Checking (GSV)',
    usecols='A,P', # update based on data
    names=['pano_id','groundtruth_FFH'],
    dtype={'A':'str','P':'float64'},
    skiprows=1  # Skip header row if needed
).reset_index(drop=True)
df_groundtruth

### Join measures by pano_id

In [None]:
gdf_building_points_FFH_final=gdf_building_points_FFH_joined.merge(df_groundtruth[['pano_id','groundtruth_FFH']], on='pano_id', how='left')
gdf_building_points_FFH_final

### Compare predictions and groundtruth measures

In [None]:
# filter out invalid prediction/groundtruth data
predicted_FFH=gdf_building_points_FFH_final['FFH3'].astype(float)
gt_FFH=gdf_building_points_FFH_final['groundtruth_FFH']
valid_indices = (~np.isnan(predicted_FFH))& (~np.isnan(gt_FFH))
predicted_FFH = predicted_FFH[valid_indices]
gt_FFH = gt_FFH[valid_indices]

# Calculate RMSE and MAE
rmse = root_mean_squared_error(gt_FFH, predicted_FFH)
mae=mean_absolute_error(gt_FFH, predicted_FFH)
# Calculate Correlation Coefficient
correlation = np.corrcoef(gt_FFH, predicted_FFH)[0, 1]
# Plot scatter points
plt.figure(figsize=(8, 6))
plt.scatter(gt_FFH, predicted_FFH, alpha=0.7, label=f'Points (n={len(predicted_FFH)})')
# Add a line for ideal correlation (optional)
plt.plot([predicted_FFH.min(), predicted_FFH.max()],[predicted_FFH.min(), predicted_FFH.max()], 'r--', label='Ideal Line')
# Annotate RMSE and Correlation
plt.text(0.05, 0.95, f'RMSE: {rmse:.2f}\nMAE:{mae:.2f}\nCorrelation: {correlation:.2f}', 
         transform=plt.gca().transAxes, fontsize=12, 
         verticalalignment='top', bbox=dict(boxstyle="round", fc="wheat", ec="black", alpha=0.5))
# Labeling
plt.title('FFH - GSV prediction vs ground truth')
plt.xlabel('Ground truth')
plt.ylabel('GSV prediction')
plt.legend()
plt.grid()
# Show plot
plt.show()

### Compare predictions and local council data

In [None]:
# filter out invalid prediction/groundtruth data
predicted_FFH=gdf_building_points_FFH_final['FFH2'].astype(float)
gt_FFH=gdf_building_points_FFH_final['Floor_height']
valid_indices = (~np.isnan(predicted_FFH))& (~np.isnan(gt_FFH))
predicted_FFH = predicted_FFH[valid_indices]
gt_FFH = gt_FFH[valid_indices]

# Calculate RMSE and MAE
rmse = root_mean_squared_error(gt_FFH, predicted_FFH)
mae=mean_absolute_error(gt_FFH, predicted_FFH)
# Calculate Correlation Coefficient
correlation = np.corrcoef(gt_FFH, predicted_FFH)[0, 1]
# Plot scatter points
plt.figure(figsize=(8, 6))
plt.scatter(gt_FFH, predicted_FFH, alpha=0.7, label=f'Points (n={len(predicted_FFH)})')
# Add a line for ideal correlation (optional)
plt.plot([predicted_FFH.min(), predicted_FFH.max()],[predicted_FFH.min(), predicted_FFH.max()], 'r--', label='Ideal Line')
# Annotate RMSE and Correlation
plt.text(0.05, 0.95, f'RMSE: {rmse:.2f}\nMAE:{mae:.2f}\nCorrelation: {correlation:.2f}', 
         transform=plt.gca().transAxes, fontsize=12, 
         verticalalignment='top', bbox=dict(boxstyle="round", fc="wheat", ec="black", alpha=0.5))
# Labeling
plt.title('FFH - GSV prediction vs council measure')
plt.xlabel('Council measure')
plt.ylabel('GSV prediction')
plt.legend()
plt.grid()
# Show plot
plt.show()