In [1]:
import pandas as pd
import geopandas as gpd
from tqdm import tqdm

In [2]:
building_gdf = gpd.read_file("../_data/AusUrbHI HVI data unprocessed/Geoscape/temporary/buildings_in_study_area.shp")
meshblock_gdf = gpd.read_file("../_data/study area/meshblock_study_area_2021.shp")
study_area_gdf = gpd.read_file("../_data/study area/ausurbhi_study_area_2021.shp")
print("Data loaded.")

Data loaded.


In [19]:
class StudyAreaProcessor:
    def __init__(self, meshblock_gdf, study_area_gdf, building_gdf):
        """
        Initialize the processor with meshblock, study area, and building dataframes.
        """
        self.meshblock_gdf = meshblock_gdf
        self.study_area_gdf = study_area_gdf
        self.building_gdf = building_gdf

    def preprocessing(self):
        """
        Preprocess the data: compute study area dictionary and group buildings by SA1.
        """
        # Create a dictionary of SA1 area
        self.sa1_area_dict = self.study_area_gdf.set_index('SA1_CODE21')['AREASQKM21'].to_dict()
        
        # Group buildings by SA1
        self.grouped_buildings = self.building_gdf.groupby('SA1_CODE21')
        print(f"{len(self.grouped_buildings.groups)} grouped, {len(self.study_area_gdf)} total study areas.")
    
    @staticmethod
    def compute_luminance(hex):
        """
        Compute luminance from hex code.
        """
        if hex is None:
            return None
        
        def hex_to_rgb(value):
            value = value.lstrip('#')
            length = len(value)
            return tuple(int(value[i:i + length // 3], 16) for i in range(0, length, length // 3))
        
        rgb = hex_to_rgb(hex)
        r, g, b = [x / 255.0 for x in rgb]
        return 0.299 * r + 0.587 * g + 0.114 * b
        
    def compute_fields(self):
        """
        Compute fields for each group of buildings, including LRF_PCT_3 and LAE_PCT_3.
        """
        self.results = []
        for sa1_code, group_data in tqdm(self.grouped_buildings, total=len(self.grouped_buildings), desc="processing buildings"):
            # Compute luminance
            group_data['LUMINANCE'] = group_data['ROOF_CLR'].apply(self.compute_luminance).dropna()
    
            # Existing fields and new calculations
            existing_fields = {
                'SA1_CODE21': sa1_code,
                'SP_ADJ': round(group_data['SP_ADJ'].str.contains('Yes', case=False, na=False).sum() / len(group_data), 2),
                'ROOF_HGT': round(group_data['ROOF_HGT'].mean(), 2),
                'MAT_Tile': round(group_data['PR_RF_MAT'].value_counts().get('Tile', 0) / len(group_data), 2),
                'MAT_Metal': round(group_data['PR_RF_MAT'].value_counts().get('Metal', 0) / len(group_data), 2),
                'MAT_Concre': round(group_data['PR_RF_MAT'].value_counts().get('Flat Concrete', 0) / len(group_data), 2),
                'AREA_PCT': round(group_data['AREA'].sum() / (self.sa1_area_dict[sa1_code] * 1e6), 2),
                'EST_LEV': round(group_data['EST_LEV'].mean(), 2),
                
                'LTRF_PCT_3': round(group_data[group_data['LUMINANCE'] >= 0.3].shape[0] / group_data.shape[0], 2),
                'LTRF_PCT_4': round(group_data[group_data['LUMINANCE'] >= 0.4].shape[0] / group_data.shape[0], 2),
                'LTRF_PCT_5': round(group_data[group_data['LUMINANCE'] >= 0.5].shape[0] / group_data.shape[0], 2),

                'LRAE_PCT_3': round(group_data[group_data['LUMINANCE'] >= 0.3]['AREA'].sum() / group_data['AREA'].sum(), 2),
                'LRAE_PCT_4': round(group_data[group_data['LUMINANCE'] >= 0.4]['AREA'].sum() / group_data['AREA'].sum(), 2),
                'LRAE_PCT_5': round(group_data[group_data['LUMINANCE'] >= 0.5]['AREA'].sum() / group_data['AREA'].sum(), 2)
            }
    
            self.results.append(existing_fields)


    def save_output(self, output_path):
        """
        Save the results to a specified path.
        """
        result_df = pd.DataFrame(self.results)
        merged_df = result_df.merge(self.study_area_gdf[['SA1_CODE21', 'geometry']], on='SA1_CODE21', how='left')
        new_gdf = gpd.GeoDataFrame(merged_df, geometry='geometry', crs=self.study_area_gdf.crs)
        new_gdf.to_file(output_path)


In [20]:
# Process data
processor = StudyAreaProcessor(meshblock_gdf, study_area_gdf, building_gdf)
processor.preprocessing()
print("Grouping done.")

13491 grouped, 13537 total study areas.
Grouping done.


In [21]:
processor.compute_fields()
print("Computing done.")



processing buildings:   0%|          | 0/13491 [00:00<?, ?it/s][A[A

processing buildings:   0%|          | 1/13491 [00:18<67:49:34, 18.10s/it][A[A

processing buildings:   0%|          | 6/13491 [00:18<8:23:34,  2.24s/it] [A[A

processing buildings:   0%|          | 11/13491 [00:18<3:45:56,  1.01s/it][A[A

processing buildings:   0%|          | 18/13491 [00:18<1:48:49,  2.06it/s][A[A

processing buildings:   0%|          | 25/13491 [00:18<1:03:28,  3.54it/s][A[A

processing buildings:   0%|          | 32/13491 [00:18<40:34,  5.53it/s]  [A[A

processing buildings:   0%|          | 39/13491 [00:18<27:24,  8.18it/s][A[A

processing buildings:   0%|          | 46/13491 [00:18<19:20, 11.59it/s][A[A

processing buildings:   0%|          | 55/13491 [00:18<13:01, 17.20it/s][A[A

processing buildings:   0%|          | 63/13491 [00:19<09:48, 22.81it/s][A[A

processing buildings:   1%|          | 71/13491 [00:19<08:03, 27.75it/s][A[A

processing buildings:   1%|        

Computing done.





In [22]:
processor.save_output('../_data/AusUrbHI HVI data processed/Geoscape/buildings.shp')