In [2]:
import osmium
import shapely.wkb as wkblib
import pandas as pd
import geopandas
import osmnx as ox
import matplotlib.pyplot as plt
import psutil

In [18]:
#uncomment and download data if you need it
!wget http://download.geofabrik.de/europe/great-britain-latest.osm.pbf

--2023-03-22 07:45:53--  http://download.geofabrik.de/europe/great-britain-latest.osm.pbf
Resolving download.geofabrik.de (download.geofabrik.de)... 65.109.50.43, 65.109.48.72
Connecting to download.geofabrik.de (download.geofabrik.de)|65.109.50.43|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1597102569 (1.5G) [application/octet-stream]
Saving to: ‘great-britain-latest.osm.pbf’


2023-03-22 07:46:47 (28.7 MB/s) - ‘great-britain-latest.osm.pbf’ saved [1597102569/1597102569]



In [4]:
#https://max-coding.medium.com/extracting-open-street-map-osm-street-data-from-data-files-using-pyosmium-afca6eaa5d00

In [5]:
#this class creates
#multipolygons for all buildings. Uses "area", and I don't know
#if e.g. "way" or "relation" also could/should be used.
#Inspiration from here https://max-coding.medium.com/extracting-open-street-map-osm-street-data-from-data-files-using-pyosmium-afca6eaa5d00
class BuildingHandler(osmium.SimpleHandler):
    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.building_count = 0
        self.buildings = []
        # A global factory that creates WKB from a osmium geometry
        self.wkbfab = osmium.geom.WKBFactory()

    def area(self, w):
        if w.tags.get("building") == 'yes':
            try:
                wkb = self.wkbfab.create_multipolygon(w)
                geo = wkblib.loads(wkb, hex=True)
            except Exception as e:
                print(e)
                return
            row = { "w_id": w.id, "geometry": geo }

            for key, value in w.tags:
                row[key] = value

            self.buildings.append(row)
            self.building_count += 1

In [19]:
#this runs the actual loading from the file, might take a while
buildinghandler = BuildingHandler()
buildinghandler.apply_file("great-britain-latest.osm.pbf", locations=True)

invalid area (area_id=807768990)


In [20]:
len(buildinghandler.buildings)

4642128

In [21]:
psutil.virtual_memory().percent

21.1

In [78]:
#if one wants to store a gdf, this is how you do it
#gdf.to_file('datafram.shp')

  gdf.to_file('datafram.shp')


In [None]:
#in this loop buildings with and without level-data are separated into two different dataframes
#this loop is needed, because trying to create a geopandas dataframe from the whole array of
#buildings at once requires more memory than 32GB. By looping through them, this is fixed.
i = 200000
while i < len(buildinghandler.buildings):
    dfx = pd.DataFrame(buildinghandler.buildings[(i-200000):min([i, len(buildinghandler.buildings)-1])])
    gdfx = geopandas.GeoDataFrame(dfx, geometry='geometry')
    gdfx = gdfx.set_crs("EPSG:4326")
    gdfx = ox.project_gdf(gdfx)
    gdfx = gdfx[['w_id', 'geometry', 'building:levels']]
    gdfx = gdfx.rename(columns={'building:levels':'levels'})
    levels = gdfx.dropna(subset=['levels'])
    nolevel = gdfx.loc[gdfx.index.difference(gdfx.dropna(subset=['levels']).index)]
    if i < 200001:
        levelsall = levels
        nolevelall = nolevel
    else:
        levelsall = pd.concat([levelsall, levels])
        nolevelall = pd.concat([nolevelall, nolevel])
    print(levelsall.shape)
    print(nolevelall.shape)
    i += 200000

In [69]:
levelsall = levelsall[levelsall['levels'].str.contains('[^0-9]') == False]
levelsall.shape

(106443, 3)

In [70]:
levelsall['levels'].astype('float').mean()

2.1611190966056952

In [71]:
nolevelall['levels'] = levelsall['levels'].astype('float').mean()
nolevelall

Unnamed: 0,w_id,geometry,levels
0,5912374,"MULTIPOLYGON (((706952.165 5703517.791, 706955...",2.161119
1,5912376,"MULTIPOLYGON (((707029.568 5703570.508, 707043...",2.161119
2,7160774,"MULTIPOLYGON (((634025.739 5626025.557, 634028...",2.161119
3,7160776,"MULTIPOLYGON (((635952.709 5623808.527, 635957...",2.161119
4,7160778,"MULTIPOLYGON (((634386.662 5622649.720, 634388...",2.161119
...,...,...,...
199995,2294078994,"MULTIPOLYGON (((425135.293 6190748.603, 425144...",2.161119
199996,2294078996,"MULTIPOLYGON (((425136.804 6190759.598, 425147...",2.161119
199997,2294078998,"MULTIPOLYGON (((425131.250 6190738.888, 425140...",2.161119
199998,2294079000,"MULTIPOLYGON (((425126.395 6190733.339, 425128...",2.161119


In [72]:
all = pd.concat([levelsall, nolevelall])
all.shape

(4592710, 3)

In [73]:
all['floorarea'] = all.area * all['levels'].astype(float)

In [74]:
#calculating kwh/a using climate zone 4
all['kWh/a'] = all['floorarea'] * 133.280

In [75]:
#calculating co2-emissions using this statistic:
#https://www.statista.com/statistics/426988/united-kingdom-uk-heating-methods/#:~:text=During%20the%20winter%20months%2C%20most,used%20this%20method%20of%20heating.
#using an average of the different heating form emissions for the category "other"
co2 = 0.199*0.79 + 0.104 * 0.11 + 0.255 * 0.05 + 0.23 * 0.05

In [76]:
all['co2/a'] = all['kWh/a'] * co2

In [78]:
all.to_file('britain_buildings.shp')