In [96]:
import osmium
import shapely.wkb as wkblib
import pandas as pd
import geopandas
import osmnx as ox

In [None]:
#In this notebook we are using osmium to load buildingdata from a .osm-file and converting it into
#a geopandas-dataframe, adding correct projection to the geopandas-df and calculating area
#of buildings.

In [97]:
#this is an example function that shows how a osmium data-loader or "handler"
#works. All kinds of stuff can be done within the default methods
# node (is performed for all nodes in the data), way (for all ways in the data)
# areas or whatever, see more in the following cells 
class BuildingCounterHandler(osmium.SimpleHandler):
    def __init__(self):
        super(BuildingCounterHandler, self).__init__()
        self.num_nodes = 0

    def count_building(self, tags):
        if tags.get('building') == 'yes':
            self.num_nodes += 1

    def node(self, n):
        self.count_building(n.tags)

    def way(self, w):
        self.count_building(w.tags)

    def relation(self, r):
        self.count_building(r.tags)

In [98]:
#demonstrating how to use a data loader in osmium, h will contain the end result after calling the
#inherited method "apply_file"
h = BuildingCounterHandler()

h.apply_file("gibraltar.osm")

print("Number of nodes: %d" % h.num_nodes)

Number of nodes: 2082


In [102]:
#here defining a more useful class for handling data, this creates
#multipolygons for all buildings. Uses "area", and I don't know
#if e.g. "way" or "relation" also could/should be used.
#Inspiration from here https://max-coding.medium.com/extracting-open-street-map-osm-street-data-from-data-files-using-pyosmium-afca6eaa5d00
class BuildingHandler(osmium.SimpleHandler):
    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.building_count = 0
        self.buildings = []
        # A global factory that creates WKB from a osmium geometry
        self.wkbfab = osmium.geom.WKBFactory()

    def area(self, w):
        if w.tags.get("building") == 'yes':
            try:
                wkb = self.wkbfab.create_multipolygon(w)
                geo = wkblib.loads(wkb, hex=True)
            except Exception as e:
                print(e)
                return
            row = { "w_id": w.id, "geometry": geo }

            for key, value in w.tags:
                row[key] = value

            self.buildings.append(row)
            self.building_count += 1


In [103]:
buildinghandler = BuildingHandler()
buildinghandler.apply_file("gibraltar.osm", locations=True)

In [104]:
#first a pandas dataframe from the array - could this step be skipped?
df = pd.DataFrame(buildinghandler.buildings)
#and then geopandas from the pandas, geopandas needs to know which column contains
#the geographical coordinates
gdf = geopandas.GeoDataFrame(df, geometry='geometry')

Index(['w_id', 'geometry', 'building', 'historic', 'name', 'name:ja',
       'name:nl', 'name:zh', 'tourism', 'wheelchair',
       ...
       'unisex', 'museum', 'url', 'defensive_works', 'club', 'ele', 'phone',
       'wikimedia_commons', 'fee', 'opening_hours'],
      dtype='object', length=106)

In [109]:
#then setting projection, the first row is needed so the geopandas has
#*some projection at all* - the following row projects it correctly using
#a function from osmnx (probably this could be skipped if correct projection could
#be set at once in the first row.
gdf = gdf.set_crs("EPSG:4326")
gdf = ox.project_gdf(gdf)

Unnamed: 0,w_id,geometry,building,historic,name,name:ja,name:nl,name:zh,tourism,wheelchair,...,unisex,museum,url,defensive_works,club,ele,phone,wikimedia_commons,fee,opening_hours
0,85650974,"MULTIPOLYGON (((288740.000 4002556.736, 288746...",yes,fort,Princess Caroline's Battery,プリンセス・キャロライン砲台,Prinses Carolina Batterij,卡罗琳公主炮台,attraction,no,...,,,,,,,,,,
1,85793312,"MULTIPOLYGON (((288557.115 4004055.636, 288557...",yes,,Estación de Autobuses de La Línea de La Concep...,,,,,,...,,,,,,,,,,
2,106822078,"MULTIPOLYGON (((288767.127 4003813.289, 288766...",yes,,Gibraltar Info,,,,information,,...,,,,,,,,,,
3,144975340,"MULTIPOLYGON (((288096.144 4001098.502, 288114...",yes,,,,,,,,...,,,,,,,,,,
4,144975466,"MULTIPOLYGON (((288060.728 4001086.717, 288079...",yes,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2074,2247411782,"MULTIPOLYGON (((288653.560 4002114.484, 288657...",yes,,,,,,,,...,,,,,,,,,,
2075,2247411784,"MULTIPOLYGON (((288727.551 4002126.493, 288735...",yes,,,,,,,,...,,,,,,,,,,
2076,2247411786,"MULTIPOLYGON (((288724.223 4002127.028, 288726...",yes,,,,,,,,...,,,,,,,,,,
2077,2247411788,"MULTIPOLYGON (((288726.251 4002122.639, 288729...",yes,,,,,,,,...,,,,,,,,,,


In [114]:
#checking that area can be calculated:
gdf.area

25       534.728374
28       529.083542
29       520.208704
30      1216.718644
31      1200.601062
           ...     
1890     334.039261
1891     334.117994
1892     333.999247
2069     510.353280
2070      78.343119
Length: 73, dtype: float64

In [115]:
#dropping all buildings that don't have level information
gdf = gdf.dropna(subset=['building:levels'])
gdf.shape

(73, 106)