<div class="alert alert-block alert-warning">
    Purpose: Get data data from OSM, clip it to reduce the number of buildings to the administrative boundary (<b>halle.geojson</b>) and explore availability of building height and/or number of levels.     
</div>

# Exploring OSM building heigh data

In [None]:
import ast
import folium
import numpy as np
import osmnx as ox 
import utm
from typing import List
import geopandas as gpd

## Settings

In [None]:
# Bounding box
south, east, north, west = (51.3435, 12.5023, 51.6020, 11.4419)

In [None]:
border = gpd.read_file('halle.geojson')

In [None]:
border.crs

## Obtaining the buildings from OSM

In [None]:
buildings = ox.geometries_from_bbox(north, south, east, west, tags = {'building': True})

In [None]:
cols = ['geometry', 'building', 'building:levels', 'height'] 
buildings = buildings[cols]

In [None]:
buildings.crs

### Data subset

In [None]:
buildings = gpd.clip(buildings, border)

In [None]:
# Remove incomplete geometries
# See note "To get only the actual geometry objects that are neither missing nor empty, you can use a combination of both"
# in https://geopandas.org/en/stable/docs/user_guide/missing_empty.html
buildings = buildings[~buildings.geometry.is_empty & ~buildings.geometry.isna()]

In [None]:
buildings.head()

In [None]:
# Keep only geometries consisting of polygons
buildings = buildings[buildings.geometry.geom_type=='Polygon']

## Buildings data overview

In [None]:
buildings.count()

In [None]:
with open('building_osm.geojson' , 'w') as file:
    file.write(buildings.to_json())

## Total buldings

In [None]:
total_buildings = len(buildings)

In [None]:
print(f'Total OSM buildings: {total_buildings}')

## Total buildings missing heights

In [None]:
missing_heights = buildings[buildings.height.isnull()]

In [None]:
# Total missing heights
len(missing_heights)

## Buildings missing levels

In [None]:
# Total missing levels
missing_levels = buildings[buildings['building:levels'].isnull()]

In [None]:
len(missing_levels)

## Buildings missing levels and missing heights

In [None]:
missing_both = buildings[(buildings.height.isnull()) & (buildings['building:levels'].isnull())]

In [None]:
len(missing_both)

In [None]:
len(buildings) - len(missing_both)

## Buildings with levels and height data

In [None]:
having_both = buildings[(~buildings.height.isnull()) & (~buildings['building:levels'].isnull())]

In [None]:
having_both.head()

In [None]:
len(having_both.index)

In [None]:
having_both.height.unique()

## Kind of building

In [None]:
buildings.building.unique()

In [None]:
buildings[buildings.building=='yes'].count()

## Summary

In [None]:
from matplotlib import pyplot as plt

plt.rcParams["figure.figsize"] = [7.50, 5.50]
plt.rcParams["figure.autolayout"] = True

x = ['missing heights', 'missing levels', 'missing both']
y = [len(missing_heights)/total_buildings*100.0, 
     len(missing_levels)/total_buildings*100.0, 
     len(missing_both)/total_buildings*100.0]

width = 0.35
fig, ax = plt.subplots()

pps = ax.bar(x, y, width, align='center')

for p in pps:
   height = p.get_height()
   ax.text(x=p.get_x() + p.get_width() / 2, y=height+.50,
      s=f"{height:.2f}%",
      ha='center')

plt.show()