# Inspect Launceston data

Match to footprint and generate cleaned (unique match) dataset

Prepare DEM

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
#Floor height file
filepath = '/Users/madeleineseehaber/Library/CloudStorage/OneDrive-FrontierSI/127 Residential Dwelling Floor Height/4 Executing/GA_data_documentation/Launceston Exposure Data/LC_Final.zip'#'/Users/Fangyuan/FrontierSI/Projects - Documents/Projects - Data Analytics/127 Residential Dwelling Floor Height/4 Executing/GA_data_documentation/Launceston Exposure Data/LC_Final.zip'

In [None]:
df = gpd.read_file(filepath)
df.head()

In [None]:
df.columns

In [None]:
df.select_dtypes(exclude=[float, int]).describe(include='all')

In [None]:
df.LCC_TYPE.unique()

In [None]:
# Roof type, Wall type, Commercial construction type, Industrial construction type
df.Survey_Roo.value_counts(), df.Survey_Wal.value_counts(), df.Survey_C_2.value_counts(), df.Survey_Ind.value_counts()

In [None]:
df_r = df[df.USE=='Residential']
df_r.Survey_Roo.value_counts(), df_r.Survey_Wal.value_counts(), df_r.Survey_C_2.value_counts(), df_r.Survey_Ind.value_counts()

In [None]:
df_r.select_dtypes(exclude=[float, int]).describe(include='all')

In [None]:
#Foundation type
# Rubble Trench Footings (RTF), Slab-on-grade (SOG), Timber Frame
df_r.Survey_G_1.value_counts()

In [None]:
df_r.select_dtypes(include=[float, int]).describe(include='all')

In [None]:
df_r[df_r.LCC_FLOOR>0]

In [None]:
df_r.LCC_FLOOR.plot.hist(bins=50, label='LCC_FLOOR')
df_r.DEM.plot.hist(bins=50, label='DEM', alpha=0.7)
(df_r.LCC_FLOOR-df_r.DEM).plot.hist(bins=50, alpha=0.7, label='LCC_FLOOR-DEM')
plt.legend()

In [None]:
(df_r[df_r.LCC_FLOOR>0].LCC_FLOOR-df_r[df_r.LCC_FLOOR>0].DEM).plot.hist(bins=50)

In [None]:
# Distribution of Floor Height per foundation type

df_r['FFH'] = (df_r.LCC_FLOOR-df_r.DEM)

def plot_histograms_by_build_type(df_r, type_col='Survey_G_1', ffh_col='FFH'):
    # Create a 4-column grid for the subplots
    unique_build_types = df_r[type_col].unique()
    num_build_types = len(unique_build_types)

    fig, axes = plt.subplots(nrows=(num_build_types + 3) // 4, ncols=4, figsize=(20, 5 * ((num_build_types + 3) // 4)))
    axes = axes.flatten()

    for ax, build_type in zip(axes, unique_build_types):
        group = df_r[df_r[type_col] == build_type]
        ax.hist(group[ffh_col][group[ffh_col]>=0], bins=np.arange(0,2,0.1), alpha=0.7)
        ax.set_title(f'Histogram of FFH for {build_type}')
        ax.set_xlabel(ffh_col)
        ax.set_ylabel('Frequency')

    # Hide any unused subplots
    for i in range(len(unique_build_types), len(axes)):
        fig.delaxes(axes[i])

    plt.tight_layout()
    plt.show()

# Example usage
plot_histograms_by_build_type(df_r)


## Matching with building footprint

In [None]:
df_r.crs

In [None]:
len(df_r)

In [19]:
# Footprint 
filepath = '/Users/madeleineseehaber/Library/CloudStorage/OneDrive-FrontierSI/127 Residential Dwelling Floor Height/4 Executing/Data Exploration/Footprints_080922/sql_statement.shp'#'/Users/Fangyuan/FrontierSI/Projects - Documents/Projects - Data Analytics/127 Residential Dwelling Floor Height/4 Executing/Data Exploration/Footprints_080922/sql_statement.shp'

In [20]:

if os.path.exists('launceston_FFH_footprint_geometry.geojson'):
    df_footprint = gpd.read_file('launceston_FFH_footprint_geometry.geojson')
else:
    footprint = gpd.read_file(filepath)
    footprint = footprint.to_crs(df_r.crs)

    # Perform spatial join and retain one-to-one matches

    # Step 1: Perform the spatial join
    df_intersections = gpd.sjoin(df_r, footprint, how='inner', predicate='intersects')

    # Step 2: Identify one-to-one matches by counting intersections for each index in both dataframes
    one_to_one_matches = df_intersections.groupby('index_right').filter(lambda x: len(x) == 1)
    
    # Save
    one_to_one_matches.to_file('launceston_FFH_with_footprint.geojson')
    one_to_one_matches['geometry'] = footprint.loc[one_to_one_matches.index_right].geometry.values
    one_to_one_matches.to_file('launceston_FFH_footprint_geometry.geojson')

    df_footprint = gpd.read_file('launceston_FFH_footprint_geometry.geojson')

In [None]:
df_footprint

## Prepare DEM

In [25]:
import os
import glob
from osgeo import gdal

# Define output path for the VRT
vrt_path = '/Users/madeleineseehaber/Library/CloudStorage/OneDrive-FrontierSI/127 Residential Dwelling Floor Height/4 Executing/GA_data_documentation/Launceston DEM/1m_DEM.vrt'#'/Users/Fangyuan/FrontierSI/Projects - Documents/Projects - Data Analytics/127 Residential Dwelling Floor Height/4 Executing/GA_data_documentation/Launceston DEM/1m_DEM.vrt'

if not os.path.exists(vrt_path):
    # Define paths
    tiff_folder = '/Users/madeleineseehaber/Library/CloudStorage/OneDrive-FrontierSI/127 Residential Dwelling Floor Height/4 Executing/GA_data_documentation/Launceston DEM/Geoscience Australia/DEM/1 Metre'#'/Users/Fangyuan/FrontierSI/Projects - Documents/Projects - Data Analytics/127 Residential Dwelling Floor Height/4 Executing/GA_data_documentation/Launceston DEM/Geoscience Australia/DEM/1 Metre/'
    # Get a list of all TIFF files in the folder
    tiff_files = glob.glob(f"{tiff_folder}/*.tif")

    # Build the VRT
    gdal.BuildVRT(vrt_path, tiff_files)
