In [1]:
import geopandas as gpd

gdf = gpd.read_file(r"Areas-of-interest-POIs\merged_building_volumes_filtered.gpkg")

print(gdf.crs)

EPSG:25832


In [2]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 655874 entries, 0 to 655873
Data columns (total 11 columns):
 #   Column      Non-Null Count   Dtype   
---  ------      --------------   -----   
 0   gml_id      655874 non-null  object  
 1   measHeight  655874 non-null  float64 
 2   function    655874 non-null  object  
 3   Stadt       655874 non-null  object  
 4   Strasse     242814 non-null  object  
 5   HausNr      242814 non-null  object  
 6   Name        4904 non-null    object  
 7   area_m2     655874 non-null  float64 
 8   volume_m3   655874 non-null  float64 
 9   _cluster    655874 non-null  int64   
 10  geometry    655874 non-null  geometry
dtypes: float64(3), geometry(1), int64(1), object(6)
memory usage: 55.0+ MB


In [3]:
import xml.etree.ElementTree as ET

tree = ET.parse(r"Areas-of-interest-POIs\BuildingFunctionTypeAdV.xml")
root = tree.getroot()

In [4]:
import xml.etree.ElementTree as ET
import pandas as pd

def read_adv_codelist(xml_path):
    ns = {"gml": "http://www.opengis.net/gml"}
    root = ET.parse(xml_path).getroot()

    rows = []
    for d in root.findall(".//gml:Definition", ns):
        code = None
        label_de = None
        for n in d.findall("gml:name", ns):
            if "codeSpace" in n.attrib:
                code = (n.text or "").strip()
            else:
                label_de = (n.text or "").strip()
        if code and label_de:
            rows.append((code, label_de))

    return (pd.DataFrame(rows, columns=["function", "label_de"])
              .drop_duplicates("function")
              .sort_values("function")
              .reset_index(drop=True))

df_codes = read_adv_codelist(r"Areas-of-interest-POIs\BuildingFunctionTypeAdV.xml")
df_codes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   function  301 non-null    object
 1   label_de  301 non-null    object
dtypes: object(2)
memory usage: 4.8+ KB


In [5]:
len(df_codes['label_de'].unique())

293

In [6]:
df_codes['label_de'].value_counts()

label_de
Sonstiges                                  6
Schöpfwerk                                 2
Umformer                                   2
Nach Quellenlage nicht zu spezifizieren    2
Wohngebäude                                1
                                          ..
Betriebsgebäude zur Seilbahn               1
Bootshaus                                  1
Betriebsgebäude zur Schleuse               1
Dock (Halle)                               1
Sperrwerk                                  1
Name: count, Length: 293, dtype: int64

In [7]:
df_codes.head()

Unnamed: 0,function,label_de
0,31001_1000,Wohngebäude
1,31001_1010,Wohnhaus
2,31001_1020,Wohnheim
3,31001_1021,Kinderheim
4,31001_1022,Seniorenheim


In [8]:
df_codes.to_csv(
    r"Areas-of-interest-POIs\building_function_codelist.csv",
    index=False,
    encoding="utf-8"
)

In [9]:
# from googletrans import Translator

# df = pd.read_csv(
#     r"Areas-of-interest-POIs\building_function_codelist.csv",
#     encoding="utf-8-sig"
# )

# translator = Translator()

# def translate(text):
#     if pd.isna(text):
#         return text
#     return translator.translate(text, src="de", dest="en").text

# df["label_en"] = df["label_de"].apply(translate)

# df.to_csv(
#     r"Areas-of-interest-POIs\building_function_codelist_de_en.csv",
#     index=False,
#     encoding="utf-8-sig"
# )

# df.head(10)

In [10]:
df = pd.read_csv(r"Areas-of-interest-POIs\building_function_codelist_de_en.csv")

gdf = gdf.merge(
    df[["function", "label_de", "label_en"]],
    on="function",
    how="left"
)

In [11]:
gdf.tail()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,_cluster,geometry,label_de,label_en
655869,UUID_ffb2e246-ee7f-40a8-98c5-a6b02672ace9,2.839,31001_2000,Grafhorst,,,,53.475836,151.817898,739752,MULTIPOLYGON Z (((632149.112 5811844.913 62.12...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce
655870,UUID_ffd105e2-8d39-4d47-a5d9-3669010d40b5,2.139,31001_2000,"Peine, Stadt",,,,55.441332,118.589009,739771,MULTIPOLYGON Z (((580527.673 5799786.278 63.38...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce
655871,UUID_ffde2e68-2cfb-4bd7-9286-15fe43ac31d6,2.519,31001_2000,"Goslar, Stadt",,,,40.276721,101.457059,739773,MULTIPOLYGON Z (((598386.446 5753239.569 262.5...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce
655872,UUID_fff0d051-6f00-47f5-9b68-1f1ac8acdb2f,2.668,31001_2000,Wasbüttel,,,,68.497208,182.750552,739783,MULTIPOLYGON Z (((608175.333 5808227.048 70.29...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce
655873,UUID_fff34825-8b9c-467c-9d42-486069a7ffbe,8.089,31001_1000,"Salzgitter, Stadt",Im Winkel,10.0,,104.378961,844.321416,739784,MULTIPOLYGON Z (((596284.494 5784414.372 95.36...,Wohngebäude,residential buildings


In [12]:
osm_building_data = gpd.read_file(r"Areas-of-interest-POIs\Buildings-Area-of-study.gpkg")

osm_building_data.head()

Unnamed: 0,osm_id,code,fclass,name,type,geometry
0,17248750,1500,building,VW Parkhaus FE1,parking,"MULTIPOLYGON (((10.74321 52.43561, 10.74727 52..."
1,23621807,1500,building,VW Parkpalette,,"MULTIPOLYGON (((10.74755 52.43733, 10.74825 52..."
2,24030702,1500,building,Halle 73,industrial,"MULTIPOLYGON (((10.74496 52.44097, 10.74531 52..."
3,24030779,1500,building,,industrial,"MULTIPOLYGON (((10.74662 52.43858, 10.74673 52..."
4,24030795,1500,building,Halle 74,industrial,"MULTIPOLYGON (((10.74864 52.44009, 10.75003 52..."


In [13]:
osm_building_data.to_crs(gdf.crs, inplace=True)
print(osm_building_data.crs)

EPSG:25832


In [14]:
import numpy as np

name_col = "name"
type_col = "type"

# keep only features that actually have a name
osm_named = osm_building_data[
    osm_building_data[name_col].notna()
    & (osm_building_data[name_col].astype(str).str.strip() != "")
].copy()

# spatial join: which OSM buildings intersect each gdf polygon
j = gpd.sjoin(
    gdf[["geometry"]].reset_index(names="gdf_idx"),
    osm_named[[name_col, type_col, "geometry"]],
    how="left",
    predicate="intersects"
)

# aggregate names → semicolon-separated string
names = (
    j.groupby("gdf_idx")[name_col]
     .apply(lambda s: ";".join(
         sorted(set(str(x).strip() for x in s.dropna() if str(x).strip()))
     ))
     .replace("", np.nan)
     .rename("osm_names")
)

# aggregate types → semicolon-separated string
types = (
    j.groupby("gdf_idx")[type_col]
     .apply(lambda s: ";".join(
         sorted(set(str(x).strip() for x in s.dropna() if str(x).strip()))
     ))
     .replace("", np.nan)
     .rename("osm_building_type")
)

# attach back to gdf
gdf["osm_names"] = gdf.index.to_series().map(names)
gdf["osm_building_type"] = gdf.index.to_series().map(types)

In [15]:
gdf[gdf["osm_names"].notna() & gdf["osm_names"].str.contains(";")].head()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,_cluster,geometry,label_de,label_en,osm_names,osm_building_type
2264,DENILD0100000SWu,6.15,31001_3024,"Braunschweig, Stadt",,,,2863.790831,17612.313614,2372,MULTIPOLYGON Z (((606380.787 5797190.672 84.26...,Forschungsinstitut,Research institute,118 - Institut für Faserverbundleichtbau und A...,
2916,DENILD0100000THO,8.984,31001_3000,"Braunschweig, Stadt",Lilienthalplatz,3.0,Straßenneubauamt,2001.947551,17985.496794,3088,MULTIPOLYGON Z (((606214.781 5797360.337 84.84...,Gebäude für öffentliche Zwecke,Buildings for public purposes,Flughafen Braunschweig-Wolfsburg;Terminal Gate...,
2979,DENILD0100000TK1,4.39,31001_2000,"Braunschweig, Stadt",,,,195.809084,859.601879,3163,MULTIPOLYGON Z (((606671.871 5797085.842 84.94...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,122 - Institut für Softwaretechnologie;Institu...,office
6519,DENILD0100000zD0,7.556,31001_1000,"Braunschweig, Stadt",Wendebrück,20.0,,7308.626285,55223.980211,6988,MULTIPOLYGON Z (((603859.978 5798557.671 68.97...,Wohngebäude,residential buildings,Kartcity;You Jump,
7304,DENILD0100001CDl,5.399,31001_2000,"Braunschweig, Stadt",,,,2233.965381,12061.179095,7821,MULTIPOLYGON Z (((600135.035 5795143.761 81.90...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,Chadwick-Bau Beschleuniger/Maschinenhaus;Chadw...,government


In [16]:
osm_landuse_data = gpd.read_file(r"Areas-of-interest-POIs\Land-use_Area-of-study.gpkg")

osm_landuse_data.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,15036412,7201,forest,,"MULTIPOLYGON (((10.77893 52.45542, 10.77913 52..."
1,24031436,7207,allotments,,"MULTIPOLYGON (((10.73406 52.42662, 10.73491 52..."
2,24975881,7218,grass,,"MULTIPOLYGON (((10.79298 52.43343, 10.79513 52..."
3,24975929,7218,grass,,"MULTIPOLYGON (((10.79383 52.43305, 10.79451 52..."
4,25021140,7203,residential,,"MULTIPOLYGON (((10.74657 52.42885, 10.74674 52..."


In [17]:
osm_landuse_data['fclass'].value_counts()

fclass
farmland             13779
meadow               13014
forest               10067
scrub                 8387
grass                 4723
residential           3106
farmyard              1092
industrial             927
park                   739
commercial             716
allotments             656
cemetery               473
retail                 203
heath                  199
quarry                 197
recreation_ground      147
orchard                133
nature_reserve         109
vineyard                 9
military                 4
Name: count, dtype: int64

In [18]:
osm_landuse_data['name'].value_counts()

name
Friedhof                            21
Pfingstanger                        15
Pferdekoppel                        15
Festplatz                           11
Steinkamp                           10
                                    ..
Rinderwiese                          1
Haferbergwiesen                      1
PHÖNIX-Seniorenzentrum Eichenhof     1
Gewerbegebiet Rohrwiesen             1
Seeliger Park                        1
Name: count, Length: 3388, dtype: int64

In [19]:
landuse = osm_landuse_data.to_crs(gdf.crs)

# spatial join
j = gpd.sjoin(
    gdf[["geometry"]].reset_index(names="gdf_idx"),
    landuse[["fclass", "name", "geometry"]],
    how="left",
    predicate="intersects"
)

# aggregate landuse class → semicolon-separated string
class_lu = (
    j.groupby("gdf_idx")["fclass"]
     .apply(lambda s: ";".join(
         sorted(set(x for x in s.dropna()))
     ))
     .replace("", np.nan)
)

# aggregate landuse name → semicolon-separated string
name_lu = (
    j.groupby("gdf_idx")["name"]
     .apply(lambda s: ";".join(
         sorted(set(str(x).strip() for x in s.dropna() if str(x).strip()))
     ))
     .replace("", np.nan)
)

# attach to gdf (NaN means no landuse intersected)
gdf["osm_landuse_class"] = gdf.index.to_series().map(class_lu)
gdf["osm_landuse_name"]  = gdf.index.to_series().map(name_lu)

In [20]:
gdf[gdf["osm_landuse_class"].notna() & gdf["osm_landuse_name"].str.contains(";")].head()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,_cluster,geometry,label_de,label_en,osm_names,osm_building_type,osm_landuse_class,osm_landuse_name
8462,DENILD0100001T9S,2.724,31001_2000,"Braunschweig, Stadt",,,,46.105128,125.590369,9043,MULTIPOLYGON Z (((598300.795 5794266.086 83.56...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,commercial;forest,"Johann Heinrich von Thünen-Institut, Julius Kü..."
8623,DENILD0100001TEn,2.847,31001_2000,"Braunschweig, Stadt",,,,28.204322,80.297706,9204,MULTIPOLYGON Z (((597944.746 5794187.166 83.45...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,commercial;forest,"Johann Heinrich von Thünen-Institut, Julius Kü..."
8752,DENILD0100001TJD,2.582,31001_2500,"Braunschweig, Stadt",,,,22.348812,57.704633,9336,MULTIPOLYGON Z (((597486.217 5795395.603 80.86...,Gebäude zur Versorgung,Buildings for supply,,,commercial;forest,"Johann Heinrich von Thünen-Institut, Julius Kü..."
9211,DENILD0100001TZe,3.108,31001_2000,"Braunschweig, Stadt",,,,39.320126,122.206952,9810,MULTIPOLYGON Z (((597282.718 5794724.883 78.31...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,commercial;forest,"Johann Heinrich von Thünen-Institut, Julius Kü..."
9342,DENILD0100001TeN,2.754,31001_2000,"Braunschweig, Stadt",,,,34.49238,94.992015,9949,"MULTIPOLYGON Z (((597378.85 5795392.655 81.11,...",Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,commercial;forest,"Johann Heinrich von Thünen-Institut, Julius Kü..."


In [21]:
df_map = pd.read_excel(
    r"Areas-of-interest-POIs\alkis_building_activity_map.xlsx"
)

gdf = gdf.merge(
    df_map,
    left_on="function",
    right_on="gfk_code",
    how="left"
).drop(columns=["gfk_code"])

In [22]:
gdf.head()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,_cluster,geometry,label_de,label_en,osm_names,osm_building_type,osm_landuse_class,osm_landuse_name,gfk_class,gfk_name,activities
0,DENILD01000000Fg,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997289,13.990511,0,MULTIPOLYGON Z (((608736.257 5799617.417 95.25...,Mast,mast,,,farmland,,Bauwerk oder Anlage für Industrie und Gewerbe,Mast,work
1,DENILD01000000Fh,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997593,13.991576,1,MULTIPOLYGON Z (((608534.443 5799829.37 94.225...,Mast,mast,,,farmland,,Bauwerk oder Anlage für Industrie und Gewerbe,Mast,work
2,DENILD01000002A1,4.377,31001_2000,"Braunschweig, Stadt",Ackerweg,2.0,,212.799509,931.423451,2,MULTIPOLYGON Z (((609554.181 5797264.172 78.94...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,residential,,Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business
3,DENILD01000002A3,5.155,31001_2000,"Braunschweig, Stadt",Stieglitzweg,3.0,,247.435021,1275.527533,3,MULTIPOLYGON Z (((608098.849 5796746.146 83.91...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,residential,,Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business
4,DENILD01000002A4,2.746,31001_2000,"Braunschweig, Stadt",,,,83.722687,229.902499,4,"MULTIPOLYGON Z (((608926.355 5797165.768 84.6,...",Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,residential,,Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business


In [23]:
residencial_ALKIS = gpd.read_file('Areas-of-interest-POIs/Residencial-Landuse_ALKIS.gpkg')
print(residencial_ALKIS.crs)
residencial_ALKIS.head()

EPSG:25832


Unnamed: 0,uuid,beginnt,anlass,name,zeitlichkeit,zustand,datumderletztenueberpruefung,istweiterenutzung,ergebnisderueberpruefung,mappingannahme,quellobjektid,geometry
0,DENIN00100003YDF,2023-08-30T08:41:55Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1000,,2023-11-16T08:25:16Z,,2000,1.0,DENIAL0100003YDF,"MULTIPOLYGON (((602885.02 5784212.258, 602886...."
1,DENIN00100003YDG,2021-05-10T10:19:25Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1000,,2023-11-16T08:25:16Z,,2000,1.0,DENIAL0100003YDG,"MULTIPOLYGON (((602954.327 5784791.11, 602956...."
2,DENIN00100003YDH,2011-03-17T18:07:36Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1000,,2023-11-16T08:25:16Z,,2000,1.0,DENIAL0100003YDH,"MULTIPOLYGON (((603169.893 5784970.22, 603170...."
3,DENIN00100004a5f,2011-03-17T18:16:07Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1000,,2023-11-16T08:25:16Z,,2000,1.0,DENIAL0100004a5f,"MULTIPOLYGON (((604680.913 5784691.206, 604681..."
4,DENIN0010000d82P,2023-08-21T12:10:40Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1000,,2023-11-16T08:25:16Z,,2000,1.0,DENIAL010000d82P,"MULTIPOLYGON (((603700.035 5784780.643, 603701..."


In [24]:
residencial_ALKIS.columns

Index(['uuid', 'beginnt', 'anlass', 'name', 'zeitlichkeit', 'zustand',
       'datumderletztenueberpruefung', 'istweiterenutzung',
       'ergebnisderueberpruefung', 'mappingannahme', 'quellobjektid',
       'geometry'],
      dtype='object')

In [25]:
residential = residencial_ALKIS[["geometry"]].to_crs(gdf.crs)

j = gpd.sjoin(
    gdf[["geometry"]].reset_index(names="gdf_idx"),
    residential,
    how="inner",          # only matched buildings appear here
    predicate="intersects"  # or "within"
)

# Matched building indices
res_idx = j["gdf_idx"].unique()

# Create/ensure column is string-capable (object dtype) with NaN default
if "ALKIS_Landuse_info" not in gdf.columns:
    gdf["ALKIS_Landuse_info"] = pd.Series(index=gdf.index, dtype="object")
else:
    gdf["ALKIS_Landuse_info"] = gdf["ALKIS_Landuse_info"].astype("object")

# Assign only matching buildings
gdf.loc[res_idx, "ALKIS_Landuse_info"] = "residence"

In [26]:
gdf.head()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,_cluster,...,label_de,label_en,osm_names,osm_building_type,osm_landuse_class,osm_landuse_name,gfk_class,gfk_name,activities,ALKIS_Landuse_info
0,DENILD01000000Fg,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997289,13.990511,0,...,Mast,mast,,,farmland,,Bauwerk oder Anlage für Industrie und Gewerbe,Mast,work,
1,DENILD01000000Fh,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997593,13.991576,1,...,Mast,mast,,,farmland,,Bauwerk oder Anlage für Industrie und Gewerbe,Mast,work,
2,DENILD01000002A1,4.377,31001_2000,"Braunschweig, Stadt",Ackerweg,2.0,,212.799509,931.423451,2,...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,residential,,Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business,
3,DENILD01000002A3,5.155,31001_2000,"Braunschweig, Stadt",Stieglitzweg,3.0,,247.435021,1275.527533,3,...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,residential,,Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business,
4,DENILD01000002A4,2.746,31001_2000,"Braunschweig, Stadt",,,,83.722687,229.902499,4,...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,residential,,Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business,residence


In [27]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 655874 entries, 0 to 655873
Data columns (total 21 columns):
 #   Column              Non-Null Count   Dtype   
---  ------              --------------   -----   
 0   gml_id              655874 non-null  object  
 1   measHeight          655874 non-null  float64 
 2   function            655874 non-null  object  
 3   Stadt               655874 non-null  object  
 4   Strasse             242814 non-null  object  
 5   HausNr              242814 non-null  object  
 6   Name                4904 non-null    object  
 7   area_m2             655874 non-null  float64 
 8   volume_m3           655874 non-null  float64 
 9   _cluster            655874 non-null  int64   
 10  geometry            655874 non-null  geometry
 11  label_de            655874 non-null  object  
 12  label_en            655874 non-null  object  
 13  osm_names           15231 non-null   object  
 14  osm_building_type   7242 non-null    object  
 15  osm_landu

In [28]:
gdf['ALKIS_Landuse_info'].value_counts(dropna=False)

ALKIS_Landuse_info
residence    567307
NaN           88567
Name: count, dtype: int64

In [29]:
commercial_ALKIS = gpd.read_file('Areas-of-interest-POIs/Commercial_Landuse_ALKIS.gpkg')
print(commercial_ALKIS.crs)
commercial_ALKIS.head()

EPSG:25832


Unnamed: 0,uuid,beginnt,anlass,name,art,zustand,datumderletztenueberpruefung,istweiterenutzung,ergebnisderueberpruefung,mappingannahme,quellobjektid,geometry
0,DENIN00100003XZK,2021-05-10T10:19:25Z,https://registry.gdi-de.org/codelist/de.adv-on...,,,,2023-11-16T08:25:16Z,,2000,,DENIAL0100003XZK,"MULTIPOLYGON (((602948.607 5784530.281, 602949..."
1,DENIN00100003XZM,2019-10-01T10:24:48Z,https://registry.gdi-de.org/codelist/de.adv-on...,,,,2023-11-16T08:25:16Z,,2000,,DENIAL0100003XZM,"MULTIPOLYGON (((603075.842 5784864.969, 603076..."
2,DENIN00100003XZO,2017-04-24T12:53:16Z,https://registry.gdi-de.org/codelist/de.adv-on...,,,,2023-11-16T08:25:16Z,,2000,,DENIAL0100003XZO,"MULTIPOLYGON (((603019.189 5784870.042, 603019..."
3,DENIN00100003XZR,2011-03-17T18:07:36Z,https://registry.gdi-de.org/codelist/de.adv-on...,,,,2023-11-16T08:25:16Z,,2000,,DENIAL0100003XZR,"MULTIPOLYGON (((602877.899 5784936.868, 602893..."
4,DENIN00100003XZS,2022-12-16T11:41:10Z,https://registry.gdi-de.org/codelist/de.adv-on...,,,,2023-11-16T08:25:16Z,,2000,,DENIAL0100003XZS,"MULTIPOLYGON (((601930.05 5784504.429, 601944...."


In [30]:
import pandas as pd
import geopandas as gpd

commercial = commercial_ALKIS[["geometry"]].to_crs(gdf.crs)

j_com = gpd.sjoin(
    gdf[["geometry"]].reset_index(names="gdf_idx"),
    commercial,
    how="inner",
    predicate="intersects"   # or "within"
)

com_idx = j_com["gdf_idx"].unique()

# ensure string-capable dtype (prevents pandas dtype warnings)
gdf["ALKIS_Landuse_info"] = gdf["ALKIS_Landuse_info"].astype("object")

# rows that are commercial
m = gdf.index.isin(com_idx)

# helper: add tag to a ';' separated string (no duplicates)
def add_tag(existing, tag="commercial"):
    if pd.isna(existing):
        return tag
    tags = [t.strip() for t in str(existing).split(";") if t.strip()]
    if tag not in tags:
        tags.append(tag)
    return ";".join(tags)

# apply ONLY to commercial buildings
gdf.loc[m, "ALKIS_Landuse_info"] = gdf.loc[m, "ALKIS_Landuse_info"].apply(add_tag)

In [31]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 655874 entries, 0 to 655873
Data columns (total 21 columns):
 #   Column              Non-Null Count   Dtype   
---  ------              --------------   -----   
 0   gml_id              655874 non-null  object  
 1   measHeight          655874 non-null  float64 
 2   function            655874 non-null  object  
 3   Stadt               655874 non-null  object  
 4   Strasse             242814 non-null  object  
 5   HausNr              242814 non-null  object  
 6   Name                4904 non-null    object  
 7   area_m2             655874 non-null  float64 
 8   volume_m3           655874 non-null  float64 
 9   _cluster            655874 non-null  int64   
 10  geometry            655874 non-null  geometry
 11  label_de            655874 non-null  object  
 12  label_en            655874 non-null  object  
 13  osm_names           15231 non-null   object  
 14  osm_building_type   7242 non-null    object  
 15  osm_landu

In [32]:
gdf['ALKIS_Landuse_info'].value_counts(dropna=False)

ALKIS_Landuse_info
residence               557431
NaN                      65648
commercial               22919
residence;commercial      9876
Name: count, dtype: int64

In [33]:
industries_ALKIS = gpd.read_file('Areas-of-interest-POIs/Industries_Landuse_ALKIS.gpkg')
print(industries_ALKIS.crs)
industries_ALKIS.head()

EPSG:25832


Unnamed: 0,uuid,beginnt,anlass,name,art,zustand,datumderletztenueberpruefung,istweiterenutzung,ergebnisderueberpruefung,mappingannahme,quellobjektid,geometry
0,DENIN001000032GL,2011-03-17T17:27:38Z,https://registry.gdi-de.org/codelist/de.adv-on...,,,,2023-11-16T08:25:16Z,,2000,,DENIAL01000032GL,"MULTIPOLYGON (((599196.486 5788350.533, 599198..."
1,DENIN0010000b7Br,2013-01-30T10:01:47Z,https://registry.gdi-de.org/codelist/de.adv-on...,,,,2023-11-16T08:25:16Z,,2000,,DENIAL010000b7Br,"MULTIPOLYGON (((599324.138 5788529.055, 599333..."
2,DENIN00100002bNR,2015-10-29T13:32:52Z,https://registry.gdi-de.org/codelist/de.adv-on...,,,,2023-11-16T08:25:16Z,,2000,,DENIAL0100002bNR,"MULTIPOLYGON (((598573.553 5791958.433, 598591..."
3,DENIN00100002bO3,2011-03-17T16:54:43Z,https://registry.gdi-de.org/codelist/de.adv-on...,,,,2023-11-16T08:25:16Z,,2000,,DENIAL0100002bO3,"MULTIPOLYGON (((598178.35 5791936.246, 598197...."
4,DENIN00100002bOm,2011-03-17T16:54:43Z,https://registry.gdi-de.org/codelist/de.adv-on...,,,,2023-11-16T08:25:16Z,,2000,,DENIAL0100002bOm,"MULTIPOLYGON (((598476.522 5791959.534, 598478..."


In [34]:
industries = industries_ALKIS[["geometry"]].to_crs(gdf.crs)

j_ind = gpd.sjoin(
    gdf[["geometry"]].reset_index(names="gdf_idx"),
    industries,
    how="inner",
    predicate="intersects"   # or "within"
)

ind_idx = j_ind["gdf_idx"].unique()

# ensure string-capable dtype
gdf["ALKIS_Landuse_info"] = gdf["ALKIS_Landuse_info"].astype("object")

# rows that are industrial
m = gdf.index.isin(ind_idx)

# helper: add tag to a ';' separated string (no duplicates)
def add_tag(existing, tag="industrial"):
    if pd.isna(existing):
        return tag
    tags = [t.strip() for t in str(existing).split(";") if t.strip()]
    if tag not in tags:
        tags.append(tag)
    return ";".join(tags)

# apply ONLY to industrial buildings
gdf.loc[m, "ALKIS_Landuse_info"] = gdf.loc[m, "ALKIS_Landuse_info"].apply(add_tag)

In [35]:
gdf['ALKIS_Landuse_info'].value_counts(dropna=False)

ALKIS_Landuse_info
residence                          554257
NaN                                 47703
commercial                          22323
industrial                          17945
residence;commercial                 9588
residence;industrial                 3174
commercial;industrial                 596
residence;commercial;industrial       288
Name: count, dtype: int64

In [36]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 655874 entries, 0 to 655873
Data columns (total 21 columns):
 #   Column              Non-Null Count   Dtype   
---  ------              --------------   -----   
 0   gml_id              655874 non-null  object  
 1   measHeight          655874 non-null  float64 
 2   function            655874 non-null  object  
 3   Stadt               655874 non-null  object  
 4   Strasse             242814 non-null  object  
 5   HausNr              242814 non-null  object  
 6   Name                4904 non-null    object  
 7   area_m2             655874 non-null  float64 
 8   volume_m3           655874 non-null  float64 
 9   _cluster            655874 non-null  int64   
 10  geometry            655874 non-null  geometry
 11  label_de            655874 non-null  object  
 12  label_en            655874 non-null  object  
 13  osm_names           15231 non-null   object  
 14  osm_building_type   7242 non-null    object  
 15  osm_landu

In [37]:
public_office_ALKIS = gpd.read_file('Areas-of-interest-POIs/Public-office_Landuse_ALKIS.gpkg')
print(public_office_ALKIS.crs)
public_office_ALKIS.head()

EPSG:25832


Unnamed: 0,uuid,beginnt,anlass,name,funktion,zustand,datumderletztenueberpruefung,istweiterenutzung,ergebnisderueberpruefung,mappingannahme,quellobjektid,geometry
0,DENIN00100003XPH,2015-10-12T13:27:25Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1140,,2023-11-16T08:25:16Z,,2000,,DENIAL0100003XPH,"MULTIPOLYGON (((603103.462 5784859.466, 603103..."
1,DENIN0010000e64Z,2016-06-30T07:16:09Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1170,,2023-11-16T08:25:16Z,,2000,,DENIAL010000e64Z,"MULTIPOLYGON (((602986.954 5784872.434, 602988..."
2,DENIN0010000hVIV,2021-05-10T11:50:38Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1120,,2023-11-16T08:25:16Z,,2000,,DENIAL010000hVIV,"MULTIPOLYGON (((602847.447 5784759.944, 602847..."
3,DENIN0010000hVIW,2021-05-10T10:19:25Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1160,,2023-11-16T08:25:16Z,,2000,,DENIAL010000hVIW,"MULTIPOLYGON (((602783.376 5784755.872, 602786..."
4,DENIN0010000hVnk,2020-03-05T11:26:10Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1170,,2023-11-16T08:25:16Z,,2000,,DENIAL010000hVnk,"MULTIPOLYGON (((603100.846 5784933.422, 603102..."


In [38]:
public_office = public_office_ALKIS[["geometry"]].to_crs(gdf.crs)

j_pub = gpd.sjoin(
    gdf[["geometry"]].reset_index(names="gdf_idx"),
    public_office,
    how="inner",
    predicate="intersects"   # or "within"
)

pub_idx = j_pub["gdf_idx"].unique()

# ensure string-capable dtype
gdf["ALKIS_Landuse_info"] = gdf["ALKIS_Landuse_info"].astype("object")

# rows that intersect public office polygons
m = gdf.index.isin(pub_idx)

# helper: add tag safely to ';'-separated string
def add_tag(existing, tag="public_office"):
    if pd.isna(existing):
        return tag
    tags = [t.strip() for t in str(existing).split(";") if t.strip()]
    if tag not in tags:
        tags.append(tag)
    return ";".join(tags)

# apply ONLY to public-office buildings
gdf.loc[m, "ALKIS_Landuse_info"] = gdf.loc[m, "ALKIS_Landuse_info"].apply(add_tag)

In [39]:
gdf['ALKIS_Landuse_info'].value_counts(dropna=False)

ALKIS_Landuse_info
residence                                        552274
NaN                                               36207
commercial                                        21944
industrial                                        17856
public_office                                     11496
residence;commercial                               9402
residence;industrial                               3147
residence;public_office                            1983
commercial;industrial                               587
commercial;public_office                            379
residence;commercial;industrial                     283
residence;commercial;public_office                  186
industrial;public_office                             89
residence;industrial;public_office                   27
commercial;industrial;public_office                   9
residence;commercial;industrial;public_office         5
Name: count, dtype: int64

In [40]:
sport_ALKIS = gpd.read_file('Areas-of-interest-POIs/Sports-area_Landuse_ALKIS.gpkg')
print(sport_ALKIS.crs)
sport_ALKIS.head()

EPSG:25832


Unnamed: 0,uuid,beginnt,anlass,name,sportart,datumderletztenueberpruefung,istweiterenutzung,ergebnisderueberpruefung,mappingannahme,quellobjektid,geometry
0,DENIN1030000p0H0,2023-07-10T10:25:54Z,https://registry.gdi-de.org/codelist/de.adv-on...,Bogen-Schießanlage,1130,,1000,2000,,DENIAL030000p0H0,"MULTIPOLYGON (((615036.216 5726440.935, 615100..."
1,DENIN1030000nId9,2018-07-23T14:00:32Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1130,,1000,2000,,DENIAL030000nId9,"MULTIPOLYGON (((614767.233 5725379.495, 614771..."
2,DENIN1030000pPkI,2017-06-15T14:13:49Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1130,,1000,2000,,DENIAL030000pPkI,"MULTIPOLYGON (((604703.136 5730351.154, 604705..."
3,DENIN1030000mSCG,2014-09-26T06:54:07Z,https://registry.gdi-de.org/codelist/de.adv-on...,Biathlon,1130,,1000,2000,,DENIAL030000mSCG,"MULTIPOLYGON (((604080.892 5735481.424, 604111..."
4,DENIN1030000lKHF,2014-01-16T09:09:51Z,https://registry.gdi-de.org/codelist/de.adv-on...,,1130,,1000,2000,,DENIAL030000lKHF,"MULTIPOLYGON (((611086.773 5730754.505, 611095..."


In [41]:
sport = sport_ALKIS[["geometry"]].to_crs(gdf.crs)

j_sport = gpd.sjoin(
    gdf[["geometry"]].reset_index(names="gdf_idx"),
    sport,
    how="inner",
    predicate="intersects"   # or "within"
)

sport_idx = j_sport["gdf_idx"].unique()

# ensure string-capable dtype
gdf["ALKIS_Landuse_info"] = gdf["ALKIS_Landuse_info"].astype("object")

# rows that intersect sport polygons
m = gdf.index.isin(sport_idx)

# helper: add tag safely to ';'-separated string
def add_tag(existing, tag="sport"):
    if pd.isna(existing):
        return tag
    tags = [t.strip() for t in str(existing).split(";") if t.strip()]
    if tag not in tags:
        tags.append(tag)
    return ";".join(tags)

# apply ONLY to sport buildings
gdf.loc[m, "ALKIS_Landuse_info"] = gdf.loc[m, "ALKIS_Landuse_info"].apply(add_tag)

In [42]:
gdf['ALKIS_Landuse_info'].value_counts(dropna=False)

ALKIS_Landuse_info
residence                                        552132
NaN                                               34977
commercial                                        21917
industrial                                        17849
public_office                                     11447
residence;commercial                               9396
residence;industrial                               3146
residence;public_office                            1977
sport                                              1230
commercial;industrial                               587
commercial;public_office                            379
residence;commercial;industrial                     283
residence;commercial;public_office                  186
residence;sport                                     142
industrial;public_office                             89
public_office;sport                                  49
residence;industrial;public_office                   27
commercial;sport             

In [43]:
gdf[gdf['ALKIS_Landuse_info'].isna()].head()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,_cluster,...,label_de,label_en,osm_names,osm_building_type,osm_landuse_class,osm_landuse_name,gfk_class,gfk_name,activities,ALKIS_Landuse_info
0,DENILD01000000Fg,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997289,13.990511,0,...,Mast,mast,,,farmland,,Bauwerk oder Anlage für Industrie und Gewerbe,Mast,work,
1,DENILD01000000Fh,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997593,13.991576,1,...,Mast,mast,,,farmland,,Bauwerk oder Anlage für Industrie und Gewerbe,Mast,work,
14,DENILD01000002AI,2.567,31001_1000,"Braunschweig, Stadt",,,,20.005565,51.354285,14,...,Wohngebäude,residential buildings,,,allotments,KGV Auf dem Klei,Gebäude,Wohngebäude,home;meetup,
63,DENILD01000002BX,2.33,31001_1000,"Braunschweig, Stadt",,,,15.681107,36.536978,64,...,Wohngebäude,residential buildings,,,allotments,KGV Auf dem Klei,Gebäude,Wohngebäude,home;meetup,
75,DENILD01000002Bt,2.175,31001_1000,"Braunschweig, Stadt",,,,20.867027,45.385784,77,...,Wohngebäude,residential buildings,,,allotments,KGV Auf dem Klei,Gebäude,Wohngebäude,home;meetup,


In [44]:
# gdf[gdf['ALKIS_Landuse_info'].isna()].to_file('Buildings-with-no-ALKIS-tags-intersect.gpkg')

In [46]:
gdf = gdf.drop(['_cluster'],axis=1)
gdf.columns

Index(['gml_id', 'measHeight', 'function', 'Stadt', 'Strasse', 'HausNr',
       'Name', 'area_m2', 'volume_m3', 'geometry', 'label_de', 'label_en',
       'osm_names', 'osm_building_type', 'osm_landuse_class',
       'osm_landuse_name', 'gfk_class', 'gfk_name', 'activities',
       'ALKIS_Landuse_info'],
      dtype='object')

In [47]:
gdf.head()

Unnamed: 0,gml_id,measHeight,function,Stadt,Strasse,HausNr,Name,area_m2,volume_m3,geometry,label_de,label_en,osm_names,osm_building_type,osm_landuse_class,osm_landuse_name,gfk_class,gfk_name,activities,ALKIS_Landuse_info
0,DENILD01000000Fg,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997289,13.990511,MULTIPOLYGON Z (((608736.257 5799617.417 95.25...,Mast,mast,,,farmland,,Bauwerk oder Anlage für Industrie und Gewerbe,Mast,work,
1,DENILD01000000Fh,3.5,51002_1250,"Braunschweig, Stadt",,,,3.997593,13.991576,MULTIPOLYGON Z (((608534.443 5799829.37 94.225...,Mast,mast,,,farmland,,Bauwerk oder Anlage für Industrie und Gewerbe,Mast,work,
2,DENILD01000002A1,4.377,31001_2000,"Braunschweig, Stadt",Ackerweg,2.0,,212.799509,931.423451,MULTIPOLYGON Z (((609554.181 5797264.172 78.94...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,residential,,Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business,commercial
3,DENILD01000002A3,5.155,31001_2000,"Braunschweig, Stadt",Stieglitzweg,3.0,,247.435021,1275.527533,MULTIPOLYGON Z (((608098.849 5796746.146 83.91...,Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,residential,,Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business,commercial
4,DENILD01000002A4,2.746,31001_2000,"Braunschweig, Stadt",,,,83.722687,229.902499,"MULTIPOLYGON Z (((608926.355 5797165.768 84.6,...",Gebäude für Wirtschaft oder Gewerbe,Buildings for business or commerce,,,residential,,Gebäude,Gebäude für Wirtschaft oder Gewerbe,work;business,residence


In [49]:
gdf.to_file('Areas-of-interest-POIs/Enriched-building-volume-data.gpkg')