In [1]:
import geopandas as gpd

gdf = gpd.read_file(r"Areas-of-interest-POIs\merged_building_volumes_filtered.gpkg")

print(gdf.crs)

EPSG:25832


In [2]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 655874 entries, 0 to 655873
Data columns (total 11 columns):
 #   Column      Non-Null Count   Dtype   
---  ------      --------------   -----   
 0   gml_id      655874 non-null  object  
 1   measHeight  655874 non-null  float64 
 2   function    655874 non-null  object  
 3   Stadt       655874 non-null  object  
 4   Strasse     242814 non-null  object  
 5   HausNr      242814 non-null  object  
 6   Name        4904 non-null    object  
 7   area_m2     655874 non-null  float64 
 8   volume_m3   655874 non-null  float64 
 9   _cluster    655874 non-null  int64   
 10  geometry    655874 non-null  geometry
dtypes: float64(3), geometry(1), int64(1), object(6)
memory usage: 55.0+ MB


In [3]:
import xml.etree.ElementTree as ET

tree = ET.parse(r"Areas-of-interest-POIs\BuildingFunctionTypeAdV.xml")
root = tree.getroot()

In [4]:
import xml.etree.ElementTree as ET
import pandas as pd

def read_adv_codelist(xml_path):
    ns = {"gml": "http://www.opengis.net/gml"}
    root = ET.parse(xml_path).getroot()

    rows = []
    for d in root.findall(".//gml:Definition", ns):
        code = None
        label_de = None
        for n in d.findall("gml:name", ns):
            if "codeSpace" in n.attrib:
                code = (n.text or "").strip()
            else:
                label_de = (n.text or "").strip()
        if code and label_de:
            rows.append((code, label_de))

    return (pd.DataFrame(rows, columns=["function", "label_de"])
              .drop_duplicates("function")
              .sort_values("function")
              .reset_index(drop=True))

df_codes = read_adv_codelist(r"Areas-of-interest-POIs\BuildingFunctionTypeAdV.xml")
df_codes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   function  301 non-null    object
 1   label_de  301 non-null    object
dtypes: object(2)
memory usage: 4.8+ KB


In [5]:
len(df_codes['label_de'].unique())

293

In [6]:
df_codes['label_de'].value_counts()

label_de
Sonstiges                                  6
Schöpfwerk                                 2
Umformer                                   2
Nach Quellenlage nicht zu spezifizieren    2
Wohngebäude                                1
                                          ..
Betriebsgebäude zur Seilbahn               1
Bootshaus                                  1
Betriebsgebäude zur Schleuse               1
Dock (Halle)                               1
Sperrwerk                                  1
Name: count, Length: 293, dtype: int64

In [7]:
df_codes.head(10)

Unnamed: 0,function,label_de
0,31001_1000,Wohngebäude
1,31001_1010,Wohnhaus
2,31001_1020,Wohnheim
3,31001_1021,Kinderheim
4,31001_1022,Seniorenheim
5,31001_1023,Schwesternwohnheim
6,31001_1024,"Studenten-, Schülerwohnheim"
7,31001_1025,Schullandheim
8,31001_1100,Gemischt genutztes Gebäude mit Wohnen
9,31001_1110,Wohngebäude mit Gemeinbedarf


In [8]:
df_codes.to_csv(
    r"Areas-of-interest-POIs\building_function_codelist.csv",
    index=False,
    encoding="utf-8"
)

In [9]:
# from googletrans import Translator

# df = pd.read_csv(
#     r"Areas-of-interest-POIs\building_function_codelist.csv",
#     encoding="utf-8-sig"
# )

# translator = Translator()

# def translate(text):
#     if pd.isna(text):
#         return text
#     return translator.translate(text, src="de", dest="en").text

# df["label_en"] = df["label_de"].apply(translate)

# df.to_csv(
#     r"Areas-of-interest-POIs\building_function_codelist_de_en.csv",
#     index=False,
#     encoding="utf-8-sig"
# )

# df.head(10)

In [10]:
df = pd.read_csv(r"Areas-of-interest-POIs\building_function_codelist_de_en.csv")

gdf = gdf.merge(
    df[["function", "label_de", "label_en"]],
    on="function",
    how="left"
)

In [11]:
gdf.head()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,_cluster,geometry,label_de,label_en
0,DENILD01000000Fg,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997289,13.990511,0,MULTIPOLYGON Z (((608736.257 5799617.417 95.25...,Mast,mast
1,DENILD01000000Fh,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997593,13.991576,1,MULTIPOLYGON Z (((608534.443 5799829.37 94.225...,Mast,mast
2,DENILD01000002A1,4.377,31001_2000,"Braunschweig, Stadt",Ackerweg,2.0,,212.799509,931.423451,2,MULTIPOLYGON Z (((609554.181 5797264.172 78.94...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce
3,DENILD01000002A3,5.155,31001_2000,"Braunschweig, Stadt",Stieglitzweg,3.0,,247.435021,1275.527533,3,MULTIPOLYGON Z (((608098.849 5796746.146 83.91...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce
4,DENILD01000002A4,2.746,31001_2000,"Braunschweig, Stadt",,,,83.722687,229.902499,4,"MULTIPOLYGON Z (((608926.355 5797165.768 84.6,...",Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce


In [12]:
osm_building_data = gpd.read_file(r"Areas-of-interest-POIs\Buildings-Area-of-study.gpkg")

osm_building_data.head()

Unnamed: 0,osm_id,code,fclass,name,type,geometry
0,17248750,1500,building,VW Parkhaus FE1,parking,"MULTIPOLYGON (((10.74321 52.43561, 10.74727 52..."
1,23621807,1500,building,VW Parkpalette,,"MULTIPOLYGON (((10.74755 52.43733, 10.74825 52..."
2,24030702,1500,building,Halle 73,industrial,"MULTIPOLYGON (((10.74496 52.44097, 10.74531 52..."
3,24030779,1500,building,,industrial,"MULTIPOLYGON (((10.74662 52.43858, 10.74673 52..."
4,24030795,1500,building,Halle 74,industrial,"MULTIPOLYGON (((10.74864 52.44009, 10.75003 52..."


In [13]:
osm_building_data.to_crs(gdf.crs, inplace=True)
print(osm_building_data.crs)

EPSG:25832


In [14]:
name_col = "name"  

# keep only features that actually have a name
osm_named = osm_building_data[osm_building_data[name_col].notna() & (osm_building_data[name_col].astype(str).str.strip() != "")].copy()

# spatial join: which OSM buildings intersect each gdf polygon
j = gpd.sjoin(
    gdf[["geometry"]].reset_index(names="gdf_idx"),
    osm_named[[name_col, "geometry"]],
    how="left",
    predicate="intersects"
)

# aggregate names into unique list per gdf polygon
names = (j.groupby("gdf_idx")[name_col]
           .apply(lambda s: sorted(set(str(x).strip() for x in s.dropna() if str(x).strip())))
           .rename("osm_names"))

# attach back to gdf
gdf["osm_names"] = gdf.index.to_series().map(names)

In [15]:
gdf[gdf["osm_names"].notna() & (gdf["osm_names"].str.len() > 10)].head()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,_cluster,geometry,label_de,label_en,osm_names
293063,DENILD1513150232765_26468543,14.156,31001_2100,"Wolfsburg, Stadt",,,Halle 71,61973.011137,877289.9,308733,"MULTIPOLYGON Z (((618363.646 5811162.43 57.39,...",Gebäude für Gewerbe und Industrie,Commercial and industrial buildings,"[EP2, EP3, ER6, Halle 70, Halle 70A, Halle 71,..."
295086,DENILD1513150232765_26516937,16.716,31001_2100,"Wolfsburg, Stadt",,,"['Halle 14 13 12', 'Halle 8', 'Bürogebäude',...",706191.844289,11804700.0,311086,MULTIPOLYGON Z (((620215.53 5810328.026 57.487...,Gebäude für Gewerbe und Industrie,Commercial and industrial buildings,"[BU 2, Halle 1, Halle 1 A, Halle 10, Halle 11,..."
295101,DENILD1513150232765_26517172,5.565,31001_2010,"Wolfsburg, Stadt",,,,8823.693765,49103.86,311106,MULTIPOLYGON Z (((621853.226 5810199.053 63.89...,Gebäude für Handel und Dienstleistungen,Buildings for trade and services,"[Center-Information, Desigual, G. K. Mayer Sho..."
298349,DENILD1513150232765_26567047,13.804,31001_2010,"Wolfsburg, Stadt",,,,3859.985373,53283.24,315027,MULTIPOLYGON Z (((622088.359 5810285.319 58.67...,Gebäude für Handel und Dienstleistungen,Buildings for trade and services,"[Bugatti Shoes, CALIDA, Coach, Five Guys, Hunk..."
298431,DENILD1513150232765_26567710,13.742,31001_2010,"Wolfsburg, Stadt",,,,2996.846687,41182.67,315134,MULTIPOLYGON Z (((622087.449 5810260.476 72.51...,Gebäude für Handel und Dienstleistungen,Buildings for trade and services,"[Adidas, Jack Wolfskin, Möve, Pepe Jeans, Rave..."


In [16]:
osm_landuse_data = gpd.read_file(r"Areas-of-interest-POIs\Land-use_Area-of-study.gpkg")

osm_landuse_data.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,15036412,7201,forest,,"MULTIPOLYGON (((10.77893 52.45542, 10.77913 52..."
1,24031436,7207,allotments,,"MULTIPOLYGON (((10.73406 52.42662, 10.73491 52..."
2,24975881,7218,grass,,"MULTIPOLYGON (((10.79298 52.43343, 10.79513 52..."
3,24975929,7218,grass,,"MULTIPOLYGON (((10.79383 52.43305, 10.79451 52..."
4,25021140,7203,residential,,"MULTIPOLYGON (((10.74657 52.42885, 10.74674 52..."


In [17]:
osm_landuse_data['fclass'].value_counts()

fclass
farmland             13779
meadow               13014
forest               10067
scrub                 8387
grass                 4723
residential           3106
farmyard              1092
industrial             927
park                   739
commercial             716
allotments             656
cemetery               473
retail                 203
heath                  199
quarry                 197
recreation_ground      147
orchard                133
nature_reserve         109
vineyard                 9
military                 4
Name: count, dtype: int64

In [18]:
osm_landuse_data

Unnamed: 0,osm_id,code,fclass,name,geometry
0,15036412,7201,forest,,"MULTIPOLYGON (((10.77893 52.45542, 10.77913 52..."
1,24031436,7207,allotments,,"MULTIPOLYGON (((10.73406 52.42662, 10.73491 52..."
2,24975881,7218,grass,,"MULTIPOLYGON (((10.79298 52.43343, 10.79513 52..."
3,24975929,7218,grass,,"MULTIPOLYGON (((10.79383 52.43305, 10.79451 52..."
4,25021140,7203,residential,,"MULTIPOLYGON (((10.74657 52.42885, 10.74674 52..."
...,...,...,...,...,...
58675,1452574739,7217,scrub,,"MULTIPOLYGON (((10.56889 52.15847, 10.56889 52..."
58676,1458500879,7218,grass,,"MULTIPOLYGON (((10.53833 52.16558, 10.53834 52..."
58677,1458500882,7218,grass,,"MULTIPOLYGON (((10.53776 52.16537, 10.53777 52..."
58678,19986979,7203,residential,,"MULTIPOLYGON (((10.57139 52.06734, 10.57143 52..."


In [19]:
osm_landuse_data['name'].value_counts()

name
Friedhof                            21
Pfingstanger                        15
Pferdekoppel                        15
Festplatz                           11
Steinkamp                           10
                                    ..
Rinderwiese                          1
Haferbergwiesen                      1
PHÖNIX-Seniorenzentrum Eichenhof     1
Gewerbegebiet Rohrwiesen             1
Seeliger Park                        1
Name: count, Length: 3388, dtype: int64

In [20]:
landuse = osm_landuse_data.to_crs(gdf.crs)

# spatial join
j = gpd.sjoin(
    gdf[["geometry"]].reset_index(names="gdf_idx"),
    landuse[["fclass", "name", "geometry"]],
    how="left",
    predicate="intersects"
)

# aggregate landuse class
class_lu = (
    j.groupby("gdf_idx")["fclass"]
     .apply(lambda s: sorted(set(x for x in s.dropna())))
)

# aggregate landuse name
name_lu = (
    j.groupby("gdf_idx")["name"]
     .apply(lambda s: sorted(set(str(x).strip() for x in s.dropna() if str(x).strip())))
)

# attach to gdf (lists, empty list means no landuse intersected)
gdf["class_landuse"] = gdf.index.to_series().map(class_lu).apply(lambda x: x if isinstance(x, list) else [])
gdf["name_landuse"]  = gdf.index.to_series().map(name_lu).apply(lambda x: x if isinstance(x, list) else [])


In [21]:
gdf.head()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,_cluster,geometry,label_de,label_en,osm_names,class_landuse,name_landuse
0,DENILD01000000Fg,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997289,13.990511,0,MULTIPOLYGON Z (((608736.257 5799617.417 95.25...,Mast,mast,[],[farmland],[]
1,DENILD01000000Fh,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997593,13.991576,1,MULTIPOLYGON Z (((608534.443 5799829.37 94.225...,Mast,mast,[],[farmland],[]
2,DENILD01000002A1,4.377,31001_2000,"Braunschweig, Stadt",Ackerweg,2.0,,212.799509,931.423451,2,MULTIPOLYGON Z (((609554.181 5797264.172 78.94...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,[],[residential],[]
3,DENILD01000002A3,5.155,31001_2000,"Braunschweig, Stadt",Stieglitzweg,3.0,,247.435021,1275.527533,3,MULTIPOLYGON Z (((608098.849 5796746.146 83.91...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,[],[residential],[]
4,DENILD01000002A4,2.746,31001_2000,"Braunschweig, Stadt",,,,83.722687,229.902499,4,"MULTIPOLYGON Z (((608926.355 5797165.768 84.6,...",Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,[],[residential],[]


In [22]:
gdf[gdf["name_landuse"].notna() & (gdf["name_landuse"].str.len() > 0)].head()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,_cluster,geometry,label_de,label_en,osm_names,class_landuse,name_landuse
14,DENILD01000002AI,2.567,31001_1000,"Braunschweig, Stadt",,,,20.005565,51.354285,14,MULTIPOLYGON Z (((609168.166 5797801.912 86.23...,Wohngebäude,residential buildings,[],[allotments],[KGV Auf dem Klei]
63,DENILD01000002BX,2.33,31001_1000,"Braunschweig, Stadt",,,,15.681107,36.536978,64,"MULTIPOLYGON Z (((609365.72 5797802.233 81.5, ...",Wohngebäude,residential buildings,[],[allotments],[KGV Auf dem Klei]
75,DENILD01000002Bt,2.175,31001_1000,"Braunschweig, Stadt",,,,20.867027,45.385784,77,MULTIPOLYGON Z (((609292.305 5797804.527 80.85...,Wohngebäude,residential buildings,[],[allotments],[KGV Auf dem Klei]
92,DENILD01000002CL,2.549,31001_1000,"Braunschweig, Stadt",,,,19.097345,48.679134,94,"MULTIPOLYGON Z (((609290.89 5797829.997 83.56,...",Wohngebäude,residential buildings,[],[allotments],[KGV Auf dem Klei]
100,DENILD01000002CZ,2.21,31001_1000,"Braunschweig, Stadt",,,,14.204898,31.392825,102,MULTIPOLYGON Z (((609242.529 5797831.682 84.42...,Wohngebäude,residential buildings,[],[allotments],[KGV Auf dem Klei]


In [23]:
df_map = pd.read_excel(
    r"Areas-of-interest-POIs\alkis_building_activity_map.xlsx"
)

gdf = gdf.merge(
    df_map,
    left_on="function",
    right_on="gfk_code",
    how="left"
).drop(columns=["gfk_code"])

In [24]:
gdf.head()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,_cluster,geometry,label_de,label_en,osm_names,class_landuse,name_landuse,gfk_class,gfk_name,activities
0,DENILD01000000Fg,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997289,13.990511,0,MULTIPOLYGON Z (((608736.257 5799617.417 95.25...,Mast,mast,[],[farmland],[],Bauwerk oder Anlage für Industrie und Gewerbe,Mast,work
1,DENILD01000000Fh,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997593,13.991576,1,MULTIPOLYGON Z (((608534.443 5799829.37 94.225...,Mast,mast,[],[farmland],[],Bauwerk oder Anlage für Industrie und Gewerbe,Mast,work
2,DENILD01000002A1,4.377,31001_2000,"Braunschweig, Stadt",Ackerweg,2.0,,212.799509,931.423451,2,MULTIPOLYGON Z (((609554.181 5797264.172 78.94...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,[],[residential],[],Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business
3,DENILD01000002A3,5.155,31001_2000,"Braunschweig, Stadt",Stieglitzweg,3.0,,247.435021,1275.527533,3,MULTIPOLYGON Z (((608098.849 5796746.146 83.91...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,[],[residential],[],Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business
4,DENILD01000002A4,2.746,31001_2000,"Braunschweig, Stadt",,,,83.722687,229.902499,4,"MULTIPOLYGON Z (((608926.355 5797165.768 84.6,...",Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,[],[residential],[],Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business
