In [None]:
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
from geopandas import GeoDataFrame

In [None]:
building_gdf = gpd.read_file("../../_data/AusUrbHI HVI data unprocessed/Geoscape/temporary/buildings_in_study_area.shp")
meshblock_gdf = gpd.read_file("../../_data/study area/meshblock_study_area_2021.shp")
study_area_gdf = gpd.read_file("../../_data/study area/ausurbhi_study_area_2021.shp")

In [None]:
# create a dictionary of SA1 area
sa1_area_dict = study_area_gdf.set_index('SA1_CODE21')['AREASQKM21'].to_dict()

# group buildings by SA1
grouped_buildings = building_gdf.groupby('SA1_CODE21')
print(len(grouped_buildings.groups), len(study_area_gdf))

In [None]:
result = []
for sa1_code, group_data in tqdm(grouped_buildings, total=len(grouped_buildings),
                                    desc="processing buildings"):
    count = len(group_data)
    sp_adj_yes = group_data['SP_ADJ'].str.contains('Yes', case=False, na=False).sum()
    pr_rf_ma_counts = group_data['PR_RF_MAT'].value_counts()

    # compute fields based on values of all buildings in the SA1
    sa1_dict = {'SA1_CODE21': sa1_code,
                'SP_ADJ': sp_adj_yes / count,
                'ROOF_HGT': group_data['ROOF_HGT'].mean(),
                'MAT_Tile': pr_rf_ma_counts.get('Tile', 0) / count,
                'MAT_Metal': pr_rf_ma_counts.get('Metal', 0) / count,
                'MAT_Concre': pr_rf_ma_counts.get('Flat Concrete', 0) / count,
                'AREA': group_data['AREA'].sum() / sa1_area_dict[sa1_code] * 1e6,
                'EST_LEV': group_data['EST_LEV'].mean()}
    result.append(sa1_dict)

In [None]:
result_df = pd.DataFrame(result)
merged_df = result_df.merge(study_area_gdf[['SA1_CODE21', 'geometry']], on='SA1_CODE21', how='left')
new_gdf = GeoDataFrame(merged_df, geometry='geometry', crs=study_area_gdf.crs)
new_gdf.to_file('../../_data/AusUrbHI HVI data processed/Geoscape/buildings.shp')