<h1>3. Extract Ways and Geotag</h1>
<blockquote>
    This script takes a .osm file containing <u>all</u> OSM data (nodes, ways, and relations) in a region.
    The output is a CSV file of geotagged addresses gathered from ways in the input file that had tags matching those provided in a .json file.
</blockquote>

In [1]:
import csv
import json
import osmium as o
from pyproj import Proj, itransform
from shapely import wkt
from shapely.geometry import LineString
import sys

<h4>Input Filenames and Geocoordinates Projection</h4>
<blockquote>
    geocoordinates of the nodes in a way are projected to compute a centroid. The centroid is saved in geocoordinates as the location of the way.
</blockquote>

In [2]:
# Input and output files
input_osm_data = 'map.osm'
input_keep_tags = 'keep_tags.json'
output_addresses = 'geotagged_addresses.csv'
with open(input_keep_tags) as f:
    map_features = json.load(f)
accepted_building_types = [subtype for sublist in map_features.values() for subtype in sublist]
print('Looking for tags: ', accepted_building_types)

# Geocoordinates projection
p1 = Proj('+init=epsg:4326')
p2 = Proj('+proj=lcc +lon_0=-456.6796875 +lat_1=48.4823944 +lat_2=81.6964789 +lat_0=65.0894367 +datum=WGS84 +units=m +no_defs')  # conformal projection
# transformer = Transformer.from_crs(CRS('WGS84'), CRS(out_projection))

Looking for tags:  ['apartments', 'bungalow', 'cabin', 'detached', 'farm', 'hotel', 'house', 'semidetached_house', 'static_caravan', 'terrace', 'cathedral', 'chapel', 'church', 'monastery', 'mosque', 'temple']


<h4>Geotagged Ways to CSV</h4>

In [3]:
class LocationHandler(o.SimpleHandler):
    def __init__(self, writer):
        super(LocationHandler, self).__init__()
        self.writer = writer
        
    def way_centroid(self, w):
        nodes_lnglat = list(wkt.loads(wkt_factory.create_linestring(w.nodes)).coords)
        nodes_xy = [node for node in itransform(p1, p2, nodes_lnglat, switch=True)]
        centroid_xy = LineString(nodes_xy).centroid
        centroid_xy = (round(centroid_xy.x, 7), round(centroid_xy.y, 7))
        centroid_latlng = p2(centroid_xy[1], centroid_xy[0], inverse=True)
        return centroid_latlng

    def way(self, w):
        if w.tags.get('building') in accepted_building_types:
            location_dict = {tag.k:tag.v for tag in w.tags}
            way_centroid = self.way_centroid(w)
            location_dict['latitude'] = round(way_centroid[0], 7)
            location_dict['longitude'] = round(way_centroid[1], 7)
            writer.writerow(location_dict)

In [4]:
with open(output_addresses, 'w', newline='') as csvfile:
    wkt_factory = o.geom.WKTFactory()
    fieldnames = ['latitude', 'longitude', 'addr:housenumber', 'addr:street', 'building']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames, extrasaction='ignore')
    writer.writeheader()
    l = LocationHandler(writer)
    l.apply_file(input_osm_data, locations=True)