Data Source: 3D building, LoD2, German open source <br>
Building volume: calcualted via FME <br>
Original citygml data: <../Test data/690_5336.gml> <br>
Data used: <../Test data/001test.geojson>

In [None]:
import geopandas as gpd
import random
import numpy as np

## The data and summary statistics

In [3]:
# read the data file and print the first few rows and a random sample
gdf = gpd.read_file("../Test data/001test.geojson")
# print a random row to see all columns
print(gdf.iloc[random.randint(0, len(gdf)-1)])

Skipping field citygml_level_of_detail: unsupported OGR type: 5


gml_id                                                             DEBY_LOD2_65259
gml_parent_id                         fme-gen-e77c37af-fc0c-4891-afdf-24a30e8baaa0
citygml_target_uri                     http://www.opengis.net/citygml/building/1.0
citygml_feature_role                                              cityObjectMember
gml_name                                                                      None
citygml_creationDate                                           2020-09-23 00:00:00
citygml_function                                                        31001_2000
citygml_roof_type                                                             3100
citygml_measured_height                                                       4.36
citygml_measured_height_units                                        urn:adv:uom:m
citygml_storeys_above_ground                                                  None
DatenquelleBodenhoehe                                                         1100
Date

In [5]:
# Summary statistics and info about the GeoDataFrame

# Get summary info
print(gdf.info())

# View all data
print(f"The number of objects: {gdf.shape[0]}")

# The summary statistics for numerical columns
print(gdf.describe())

# Find the building with the lowest volume
lowest_volume_idx = gdf['_volume'].idxmin()
print(gdf.loc[lowest_volume_idx])


<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 6574 entries, 0 to 6573
Data columns (total 23 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   gml_id                         6574 non-null   object        
 1   gml_parent_id                  6574 non-null   object        
 2   citygml_target_uri             6574 non-null   object        
 3   citygml_feature_role           6574 non-null   object        
 4   gml_name                       42 non-null     object        
 5   citygml_creationDate           6574 non-null   datetime64[ms]
 6   citygml_function               6574 non-null   object        
 7   citygml_roof_type              6574 non-null   object        
 8   citygml_measured_height        6574 non-null   float64       
 9   citygml_measured_height_units  6574 non-null   object        
 10  citygml_storeys_above_ground   4522 non-null   object        
 11  Datenquel

## Import functions and data class
functions_occupants: estimate the number of occupants in a building<br>
functions_extract_gdf: Functions to Convert GeoDataFrame Row to Building Object<br>

In [None]:
from functions_occupants import *
from functions_extract_gdf import *


### Test: calculate the number of occupants in a building

In [None]:
# ============================================================================
# Extract Building Data from GeoDataFrame
# ============================================================================

# Choose a residential building with volume between 3000 and 4000 m3
gdf_v_general = gdf[(gdf["_volume"] >= 3000) & (gdf["_volume"] <= 4000) & (gdf["citygml_function"] == "31001_1000")]
building_test = gdf_v_general.iloc[random.randint(0, len(gdf_v_general)-1)]

# Extract single building from your test data
building_test_obj = extract_building_from_geodataframe(building_test)

# Extracted building information
print("="*70)
print("EXTRACTED BUILDING INFORMATION")
print("="*70)
print(f"Building ID:          {building_test_obj.building_id}")
print(f"Measured Height:      {building_test_obj.measured_height} m")
print(f"Storeys Above Ground: {building_test_obj.storeys_above_ground}")
print(f"Volume:               {building_test_obj.volume:.2f} m³" if building_test_obj.volume else "Volume: N/A")
print(f"Valid:                {building_test_obj.validate()}")
print("="*70)

# Calculate number of occupants in the building
if building_test_obj.validate() and building_test_obj.volume:

    # Step 1: Calculate heated area
    h_g, A_h = av_storey_h_and_h_area_building(building_test_obj)
    print(f"\nAverage storey height: {h_g:.2f} m")
    print(f"Heated area: {A_h:.2f} m²")
    
    # Step 1.5: Calculate number of households
    building_type, n_households, adjusted_A_h = calculate_number_of_households(
        heated_area=A_h,
        storeys=building_test_obj.get_storeys(),
        measured_height=building_test_obj.get_height()
    )
    print(f"\nResidential building type: {building_type}")
    print(f"Number of households: {n_households}")
    print(f"Adjusted heated area: {adjusted_A_h:.2f} m²")
    
    # Step 2: Calculate household areas
    household_areas = heated_area_per_household(adjusted_A_h, n_households, area_pmf)
    print(f"\nHousehold areas: {[f'{a:.1f}' for a in household_areas]} m²")
    
    # Step 3 & 4: Calculate occupants in summary
    if 'occupants_of_area_pmf' in globals():
        summary = calculate_building_occupants_summary(household_areas, occupants_of_area_pmf)
        print_building_occupants_report(summary)


EXTRACTED BUILDING INFORMATION
Building ID:          DEBY_LOD2_4907848
Measured Height:      22.358 m
Storeys Above Ground: 5
Volume:               3717.90 m³
Valid:                True

Average storey height: 4.47 m
Heated area: 682.73 m²

Residential building type: HRB
Number of households: 7
Adjusted heated area: 402.81 m²

Household areas: ['25.0', '42.8', '45.0', '56.0', '57.0', '86.0', '91.0'] m²

BUILDING OCCUPANTS REPORT

Total Occupants:      12 persons
Total Households:     7
Avg Occupants/HH:     1.71 persons/household

----------------------------------------------------------------------
HOUSEHOLD DISTRIBUTION BY SIZE
----------------------------------------------------------------------
1-person households:    5 ( 71.4%)
2-person households:    0 (  0.0%)
3-person households:    1 ( 14.3%)
4-person households:    1 ( 14.3%)
5+ person households:   0 (  0.0%)

----------------------------------------------------------------------
HOUSEHOLD AREA STATISTICS
-----------------

## Estimate the number of occupants for all buildings

In [24]:
# ===========================================
# Extract all buildings from GeoDataFrame
# ===========================================

print("\n" + "="*70)
print("EXTRACTING ALL BUILDINGS")
print("="*70)

# all_buildings is a list of Building objects
all_buildings = extract_buildings_from_geodataframe(gdf, filter_invalid=True)
print(f"Total buildings in GeoDataFrame: {len(gdf)}")
print(f"Valid buildings extracted: {len(all_buildings)}")
print(f"Invalid buildings filtered: {len(gdf) - len(all_buildings)}")

# Statistics on extracted buildings
if all_buildings:
    volumes = [b.volume for b in all_buildings if b.volume]
    heights = [b.get_height() for b in all_buildings if b.get_height()]
    storeys = [b.get_storeys() for b in all_buildings]
    functions = [b.function for b in all_buildings if b.function]
    roof_types = [b.roof_type for b in all_buildings if b.roof_type]
    
    print("\n" + "="*70)
    print("BUILDING STATISTICS")
    print("="*70)
    print(f"Volume - Mean: {np.mean(volumes):.2f} m³, Min: {np.min(volumes):.2f} m³, Max: {np.max(volumes):.2f} m³")
    print(f"Height - Mean: {np.mean(heights):.2f} m, Min: {np.min(heights):.2f} m, Max: {np.max(heights):.2f} m")
    print(f"Storeys - Mean: {np.mean(storeys):.1f}, Min: {int(np.min(storeys))}, Max: {int(np.max(storeys))}")
    
    # Function distribution
    if functions:
        from collections import Counter
        function_counts = Counter(functions)
        print(f"\nBuilding Functions:")
        for func, count in function_counts.most_common():
            print(f"  {func}: {count} ({count/len(all_buildings)*100:.1f}%)")
    
    # Roof type distribution
    if roof_types:
        roof_counts = Counter(roof_types)
        print(f"\nRoof Types:")
        for roof, count in roof_counts.most_common():
            print(f"  {roof}: {count} ({count/len(all_buildings)*100:.1f}%)")
    
    print("="*70)

# Show first 5 buildings
print("\n" + "="*70)
print("FIRST 5 BUILDINGS")
print("="*70)
for i, building in enumerate(all_buildings[:5], 1):
    print(f"\n{i}. ID: {building.building_id}")
    print(f"   Function: {building.function}, Roof: {building.roof_type}")
    print(f"   Height: {building.measured_height}m, Storeys: {building.storeys_above_ground}, Volume: {building.volume:.2f}m³")



EXTRACTING ALL BUILDINGS
Total buildings in GeoDataFrame: 6574
Valid buildings extracted: 6574
Invalid buildings filtered: 0

BUILDING STATISTICS
Volume - Mean: 3495.86 m³, Min: 1.43 m³, Max: 211137.09 m³
Height - Mean: 14.17 m, Min: 0.04 m, Max: 68.20 m
Storeys - Mean: 3.5, Min: 1, Max: 23

Building Functions:
  31001_1000: 2877 (43.8%)
  31001_2000: 1832 (27.9%)
  31001_9998: 1427 (21.7%)
  51009_1610: 224 (3.4%)
  31001_2463: 64 (1.0%)
  31001_3020: 38 (0.6%)
  31001_3000: 34 (0.5%)
  31001_2523: 25 (0.4%)
  31001_3071: 21 (0.3%)
  31001_3041: 8 (0.1%)
  31001_3065: 7 (0.1%)
  31001_3052: 6 (0.1%)
  51009_1700: 4 (0.1%)
  31001_3043: 2 (0.0%)
  31001_3072: 2 (0.0%)
  31001_3051: 1 (0.0%)
  31001_2513: 1 (0.0%)
  31001_2465: 1 (0.0%)

Roof Types:
  1000: 2808 (42.7%)
  3100: 2326 (35.4%)
  3200: 646 (9.8%)
  2100: 482 (7.3%)
  3400: 208 (3.2%)
  9999: 45 (0.7%)
  3500: 18 (0.3%)
  3900: 15 (0.2%)
  3300: 13 (0.2%)
  4000: 5 (0.1%)
  2200: 5 (0.1%)
  3600: 2 (0.0%)
  3700: 1 (0.0%)



In [26]:
# ===========================================
# Extract residential buildinds for calculation
# ===========================================

print("="*70)
# Buildings with specific function (e.g., '31001_1000' - residential)
residential = [b for b in all_buildings if b.function and '31001_1000' in b.function]
print(f"Residential buildings: {len(residential)}")
print("="*70)

Residential buildings: 2877


In [27]:
# ============================================================================
# Function to process all buildings and calculate occupants
# ============================================================================

def calculate_occupants_for_all_buildings(
    all_buildings,
    area_pmf,
    occupants_of_area_pmf,
    verbose=True
):
    """Calculate occupants for all buildings and update total_occupants attribute"""
    
    total_buildings = len(all_buildings)
    successful = 0
    failed = 0
    
    if verbose:
        print(f"Processing {total_buildings} buildings...")
        print("=" * 70)
    
    for idx, building in enumerate(all_buildings):
        # Progress indicator every 100 buildings
        if verbose and (idx + 1) % 100 == 0:
            print(f"Progress: {idx + 1}/{total_buildings} buildings processed...")
        
        # Validation check
        if not building.validate() or not building.volume or building.volume <= 0:
            building.total_occupants = None
            failed += 1
            continue
        
        try:
            # Step 1: Calculate heated area
            h_g, A_h = av_storey_h_and_h_area_building(building)
            
            # Step 1.5: Calculate number of households
            building_type, n_households, adjusted_A_h = calculate_number_of_households(
                heated_area=A_h,
                storeys=building.get_storeys(),
                measured_height=building.get_height()
            )
            
            # Step 2: Calculate household areas
            household_areas = heated_area_per_household(adjusted_A_h, n_households, area_pmf)
            
            if not household_areas:
                building.total_occupants = None
                failed += 1
                continue
            
            # Step 3 & 4: Calculate occupants
            total_occ, _, _ = calculate_building_occupants(household_areas, occupants_of_area_pmf)
            
            # Update building
            building.total_occupants = total_occ
            successful += 1
        
        except Exception as e:
            building.total_occupants = None
            failed += 1
            if verbose and failed <= 5:  # Show first 5 errors
                print(f"  Error in {building.building_id}: {str(e)}")
    
    if verbose:
        print("=" * 70)
        print(f"✓ Successful: {successful}")
        print(f"✗ Failed: {failed}")
        print(f"Total: {total_buildings}")
    
    return all_buildings

In [None]:
# Chceckpoint: Make sure you have these defined:
# - all_buildings: List[Building]
# - area_pmf: np.ndarray
# - occupants_of_area_pmf: np.ndarray
# - All calculation functions

# Process all buildings
residential = calculate_occupants_for_all_buildings(
    all_buildings=residential,
    area_pmf=area_pmf,
    occupants_of_area_pmf=occupants_of_area_pmf,
    verbose=True
)

# View results
print("\n" + "=" * 70)
print("RESULTS SUMMARY")
print("=" * 70)

# Filter buildings with occupants
valid_buildings = [b for b in residential if b.total_occupants is not None]

print(f"Buildings with occupants calculated: {len(valid_buildings)}")
print(f"Total population: {sum(b.total_occupants for b in valid_buildings):,}")

if valid_buildings:
    print(f"Average occupants per building: {np.mean([b.total_occupants for b in valid_buildings]):.2f}")
    print(f"Min occupants: {min(b.total_occupants for b in valid_buildings)}")
    print(f"Max occupants: {max(b.total_occupants for b in valid_buildings)}")

# The number of occupants for residential building is in residential.total_occupants


Processing 2877 buildings...
Progress: 100/2877 buildings processed...
Progress: 200/2877 buildings processed...
Progress: 300/2877 buildings processed...
Progress: 400/2877 buildings processed...
Progress: 500/2877 buildings processed...
Progress: 600/2877 buildings processed...
Progress: 700/2877 buildings processed...
Progress: 800/2877 buildings processed...
Progress: 900/2877 buildings processed...
Progress: 1000/2877 buildings processed...
Progress: 1100/2877 buildings processed...
Progress: 1200/2877 buildings processed...
Progress: 1300/2877 buildings processed...
Progress: 1400/2877 buildings processed...
Progress: 1500/2877 buildings processed...
Progress: 1600/2877 buildings processed...
Progress: 1700/2877 buildings processed...
Progress: 1800/2877 buildings processed...
Progress: 1900/2877 buildings processed...
Progress: 2000/2877 buildings processed...
Progress: 2100/2877 buildings processed...
Progress: 2200/2877 buildings processed...
Progress: 2300/2877 buildings proc

### Append the number of occupants to dataset (gdf)

In [None]:
# ===========================================
# Append the number of occupants to dataset (gdf)
# ===========================================

# Add occupants to GeoDataFrame
occupants_dict = {b.building_id: b.total_occupants for b in all_buildings}
gdf['total_occupants'] = gdf['gml_id'].map(occupants_dict)

# Checkpoint: make sure the total_occupants column is added correctly
print(f"Total population: {gdf['total_occupants'].sum():.0f}")
print(f"Buildings with occupants: {gdf['total_occupants'].notna().sum()}")
print(gdf[['gml_id', 'total_occupants']].head(10))

# Comment out export steps
# # Export results as GeoJSON (with geometry)
# gdf.to_file("../Test data/buildings_with_occupants.geojson", driver='GeoJSON')
# print("✓ Saved to GeoJSON")

# # Export as CSV (without geometry)
# gdf.drop(columns='geometry').to_csv("../Test data/buildings_with_occupants.csv", index=False)
# print("✓ Saved to CSV")

# With the file: buildings_with_occupants.geojson, work with Bayesian network to model the risk of fire (code in BNs.ipynb)

Total population: 39488
Buildings with occupants: 2877
            gml_id  total_occupants
0  DEBY_LOD2_59962              NaN
1  DEBY_LOD2_59986              NaN
2  DEBY_LOD2_59987              NaN
3  DEBY_LOD2_59988              NaN
4  DEBY_LOD2_59989              NaN
5  DEBY_LOD2_59990             19.0
6  DEBY_LOD2_59991             17.0
7  DEBY_LOD2_59992              NaN
8  DEBY_LOD2_59993              NaN
9  DEBY_LOD2_59994              NaN
