# Join NEXIS and Council data for Launceston


In [2]:
import geopandas as gpd
import numpy as np
from matplotlib import pyplot as plt

### NEXIS data

In [None]:
nexis_filepath = '/Users/madeleineseehaber/Library/CloudStorage/OneDrive-FrontierSI/127 Residential Dwelling Floor Height/4 Executing/GA_data_documentation/nexis_building_data/launceston_nexis.gpkg'#'/Users/Fangyuan/FrontierSI/Projects - Documents/Projects - Data Analytics/127 Residential Dwelling Floor Height/4 Executing/GA_data_documentation/nexis_building_data/launceston_nexis.gpkg'

df_nexis = gpd.read_file(nexis_filepath)
df_nexis.head()

### City council data

In [4]:
# file
council_filepath = '/Users/madeleineseehaber/Library/CloudStorage/OneDrive-FrontierSI/127 Residential Dwelling Floor Height/4 Executing/GA_data_documentation/Launceston Exposure Data/LC_Final.zip'#'/Users/Fangyuan/FrontierSI/Projects - Documents/Projects - Data Analytics/127 Residential Dwelling Floor Height/4 Executing/GA_data_documentation/Launceston Exposure Data/LC_Final.zip'

In [None]:
df = gpd.read_file(council_filepath)
df.head()

In [None]:
df_r = df[df.USE=='Residential']
df_r['Floor_Height'] = df_r.LCC_FLOOR - df_r.DEM

### Join

In [21]:
df_nexis = df_nexis.to_crs(df_r.crs)

In [None]:
nearby_points = gpd.sjoin_nearest(df_nexis, df_r, max_distance=5, how='inner', distance_col="distance")
print("# of match", len(nearby_points))

# Calculate correlation
correlation = nearby_points['floor_height_(m)'].corr(nearby_points['Floor_Height'])
print(f"Correlation between {'floor_height_(m)'} and {'Floor_Height'}: {correlation}")

one_to_one_matches = nearby_points.groupby('index_right').filter(lambda x: len(x) == 1)
print("# of one to one match", len(one_to_one_matches))

# of mismatch
print("# of mismatched height", (np.abs(nearby_points['floor_height_(m)']-nearby_points.Floor_Height)>1e-3).sum())

In [None]:
nearby_points.Floor_Height.plot.hist(bins=np.arange(0,df_r.Floor_Height.max()+0.1,0.1), logy=True)

In [None]:
unique_build_types = np.sort(nearby_points['flood_vulnerability_function_id'].unique())
num_build_types = len(unique_build_types)
fig, axes = plt.subplots(nrows=(num_build_types + 3) // 4, ncols=4, figsize=(20, 5 * ((num_build_types + 3) // 4)))
axes = axes.flatten()

ffh_col_1 = 'floor_height_(m)'
ffh_col_2 = 'Floor_Height'
for ax, build_type in zip(axes, unique_build_types):
    group = nearby_points[nearby_points['flood_vulnerability_function_id'] == build_type]
        
    ax.hist(group[ffh_col_1][group[ffh_col_1]>=0], bins=np.arange(0,2,0.1), alpha=0.7, label=ffh_col_1, density=False)
    ax.hist(group[ffh_col_2][group[ffh_col_2]>=0], bins=np.arange(0,2,0.1), alpha=0.7, label=ffh_col_2, density=False)
    
    ax.set_title(f'Histogram of FFH for {build_type}')
    ax.legend()

### Building footprint

In [11]:

overture_filepath = '/Users/madeleineseehaber/Library/CloudStorage/OneDrive-FrontierSI/127 Residential Dwelling Floor Height/4 Executing/Data Exploration/overture/launceston_building.gpkg'#'/Users/Fangyuan/FrontierSI/Projects - Documents/Projects - Data Analytics/127 Residential Dwelling Floor Height/4 Executing/Data Exploration/overture/launceston_building.gpkg'

In [12]:
footprint = gpd.read_file(overture_filepath)

In [None]:
footprint.head()

In [None]:
footprint.crs

In [15]:
nearby_points.rename(columns={'index_right':'index_nexis'}, inplace=True)

In [None]:
nearby_points.columns

In [17]:
import os

if os.path.exists('launceston_FFH_footprint_geometry.geojson'):
    df_footprint = gpd.read_file('launceston_FFH_footprint_geometry.geojson')
else:
    footprint = footprint.to_crs(nearby_points.crs)

    # Perform spatial join and retain one-to-one matches

    # Step 1: Perform the spatial join
    df_intersections = gpd.sjoin(nearby_points, footprint, how='inner', predicate='intersects')

    # Step 2: Identify one-to-one matches by counting intersections for each index in both dataframes
    one_to_one_matches = df_intersections.groupby('index_right').filter(lambda x: len(x) == 1)
    
    # Save
    one_to_one_matches.to_file('launceston_FFH_with_footprint.geojson')
    one_to_one_matches['geometry'] = footprint.loc[one_to_one_matches.index_right].geometry.values
    one_to_one_matches.to_file('launceston_FFH_footprint_geometry.geojson')

    len(one_to_one_matches)

In [None]:
one_to_one_matches.head()

In [None]:
len(one_to_one_matches)