In [1]:
import sys
sys.path.insert(0, "..")

# Buildings / Addresses

I want to understand how buildings and addresses are represented in OSM data.

Required reading: http://wiki.openstreetmap.org/wiki/Addresses



In [2]:
import osmdigest.digest as digest

# Nodes tagged as buildings / with addresses

For the Isle of Wight dataset, there are rather few nodes tagged as buildings.  There are quite a few more nodes which have address information, however.

In [3]:
building_node_ids = []
addr_node_ids = []

for x in digest.parse("isle-of-wight-latest.osm"):
    if isinstance(x, digest.Node):
        if "building" in x.tags:
            building_node_ids.append(x)
        if any(key.startswith("addr:") for key in x.tags):
            addr_node_ids.append(x)

In [4]:
len(building_node_ids), building_node_ids[:5]

(39,
 [Node(38921672 @ [50.6405824,-1.360434] {'name': 'Wolverton Manor', 'building': 'manor_house', 'historic': 'manor'}),
  Node(691207722 @ [50.6773251,-1.495855] {'name': 'Tollgate House', 'building': 'house', 'addr:city': 'Afton', 'addr:street': 'Wilmington Lane', 'addr:country': 'GB', 'addr:housename': 'Tollgate House'}),
  Node(718782298 @ [50.6829817,-1.418304] {'name': 'Freedom Farm', 'building': 'yes'}),
  Node(765755120 @ [50.6921583,-1.5315236] {'building': 'farmhouse', 'addr:housename': 'Bramble Farm'}),
  Node(829972519 @ [50.5856384,-1.2469082] {'name': 'St. Rhadagunds', 'email': 'info@strhads.co.uk', 'phone': '01983852160', 'tourism': 'hotel', 'website': 'strhads.co.uk', 'building': 'yes', 'internet_access': 'wlan'})])

In [5]:
len(addr_node_ids), addr_node_ids[:5]

(1822,
 [Node(2383584 @ [50.7065062,-1.5006427] {'name': 'Yarmouth Quay', 'amenity': 'ferry_terminal', 'network': 'Car Ferry', 'operator': 'Wightlink', 'addr:city': 'Yarmouth', 'addr:street': 'Quay Street', 'addr:postcode': 'PO41 0PB'}),
  Node(2387351 @ [50.6833789,-1.0956959] {'name': 'Bembridge Windmill', 'tourism': 'attraction', 'website': 'https://www.nationaltrust.org.uk/bembridge-windmill', 'man_made': 'windmill', 'operator': 'National Trust', 'addr:city': 'Bembridge', 'addr:street': 'Mill Road', 'addr:postcode': 'PO35 5SQ', 'addr:housename': 'Bembridge Windmill'}),
  Node(2433320 @ [50.6717547,-1.5427212] {'name': 'High Down Inn', 'amenity': 'pub', 'tourism': 'bed_and_breakfast', 'website': 'http://www.highdowninn.com/', 'alt_name': 'Highdown Inn', 'addr:city': 'Totland', 'addr:street': 'Highdown Lane', 'addr:country': 'GB', 'addr:postcode': 'PO39 0HY'}),
  Node(7227760 @ [50.7054346,-1.2146592] {'name': 'Havenstreet', 'usage': 'tourism', 'railway': 'station', 'addr:city': 'Hav

# Same for ways

We expected buildings to mostly have a polygonal outline, and so be "ways".

The situation here is reversed: lots of buildings, and fewer addresses.  From eyeballing a few ways which have address information, but which are not buildings, we find that the "way" gives the total outline of, say, a school, which may contain a number of buildings, playing fields etc.

In [6]:
building_way_ids = []
addr_way_ids = []

for x in digest.parse("isle-of-wight-latest.osm"):
    if isinstance(x, digest.Way):
        if "building" in x.tags:
            building_way_ids.append(x)
        if any(key.startswith("addr:") for key in x.tags):
            addr_way_ids.append(x)

In [7]:
len(building_way_ids), building_way_ids[:5]

(43644,
 [Way(3580387 ->  [17731291, 1812813868, 826100758, 17731290, 1812813848, 17731289, 826100762, 17731295, 1812813842, 17731294, 826100769, 17731293, 1812813861, 197678030, 826100761, 17731292, 1812813845, 17731291] {'alt_name': 'Spitsand Fort', 'alt_name:1': 'Spit Sand Fort', 'alt_name:2': 'Spit Fort', 'building': 'yes', 'castle_type': 'fortress', 'historic': 'castle', 'name': 'Spitbank Fort', 'natural': 'coastline', 'source': 'OS OpenData StreetView', 'start_date': '1867', 'tourism': 'hotel', 'wikidata': 'Q2311360', 'wikipedia': 'en:Spitbank Fort'}),
  Way(3580388 ->  [17731299, 17731297, 1812813839, 825993194, 1812813856, 17731305, 17731304, 825993180, 1812813854, 17731303, 17731302, 1812813864, 825993155, 17731301, 17731300, 1812813852, 825993118, 1812813850, 17731299] {'building': 'yes', 'historic': 'fort', 'man_made': 'pier', 'name': 'Horse Sand Fort', 'natural': 'coastline', 'source': 'OS OpenData StreetView', 'start_date': '1865', 'wikidata': 'Q1485267', 'wikipedia': 'en:

In [8]:
len(addr_way_ids), addr_way_ids[:5]

(6963,
 [Way(5026852 ->  [1636613142, 1756154625, 2852765828, 1756154623, 4594616645, 4594616647, 1756154590, 1756154593, 1756154627, 2852765827, 1756154630, 1756154631, 33556615, 28837469, 2852765838, 28837470, 33556618, 33556619, 33556620, 1636613166, 2852765836, 2852774936, 1636613142] {'name': 'Christ The King College - Upper College', 'amenity': 'school', 'addr:city': 'Newport', 'addr:street': 'Wellington Road', 'addr:postcode': 'PO30 5QT'}),
  Way(5026862 ->  [4684759642, 4684759641, 4684827409, 4684827406, 4684827399, 4684827389, 4684759640, 4684759639, 4684824588, 4684759635, 33556692, 4684759633, 4684759634, 4684824584, 4684759636, 4684824587, 1382864717, 33556694, 33556695, 2852776705, 33556696, 33556697, 33556698, 33556699, 33556700, 33556701, 33556702, 33556703, 1662247222, 2852776708, 2852776721, 1662247194, 1662247197, 1662247191, 1662247189, 1662247187, 1662247185, 4684759642] {'addr:city': 'Newport', 'addr:postcode': 'PO30 5QU', 'addr:street': 'Mountbatten Drive', 'amen

# Finally for relations

Rather few.  These tend to be special cases: usually when the building is non-convex (say a stately home, with an inner courtyard) so a relation is required to specify the "inner" and "outer" ways.

In [9]:
building_rel_ids = []
addr_rel_ids = []

for x in digest.parse("isle-of-wight-latest.osm"):
    if isinstance(x, digest.Relation):
        if "building" in x.tags:
            building_rel_ids.append(x)
        if any(key.startswith("addr:") for key in x.tags):
            addr_rel_ids.append(x)

In [10]:
len(building_rel_ids), building_rel_ids[:5]

(33,
 [Relation(70978 ->  [Member(type='way', ref=30533570, role='outer'), Member(type='way', ref=30533571, role='inner')] {'building': 'monastery', 'community': 'OSB', 'community:en': 'benedictine', 'denomination': 'catholic', 'name': 'Quarr Abbey', 'religion': 'christian', 'type': 'multipolygon', 'website': 'http://www.quarrabbey.co.uk/'}),
  Relation(1369783 ->  [Member(type='way', ref=94481636, role='inner'), Member(type='way', ref=35821252, role='outer'), Member(type='way', ref=94481643, role='inner'), Member(type='way', ref=94481639, role='inner'), Member(type='way', ref=94481641, role='inner'), Member(type='way', ref=94481638, role='inner')] {'type': 'multipolygon', 'building': 'yes'}),
  Relation(1578648 ->  [Member(type='way', ref=112449476, role='inner'), Member(type='way', ref=30665632, role='outer')] {'type': 'multipolygon', 'building': 'yes'}),
  Relation(1584149 ->  [Member(type='way', ref=19215850, role='outer'), Member(type='way', ref=113058329, role='inner')] {'buildin

In [11]:
len(addr_rel_ids), addr_rel_ids[:5]

(6,
 [Relation(1593581 ->  [Member(type='way', ref=114135843, role='outer'), Member(type='way', ref=114135841, role='inner')] {'addr:city': 'Bembridge', 'addr:housename': 'Solent Landing', 'addr:postcode': 'PO35 5NZ', 'addr:street': 'Beach Road', 'building': 'residential', 'type': 'multipolygon'}),
  Relation(1844051 ->  [Member(type='way', ref=228750548, role='outer'), Member(type='way', ref=136843896, role='inner')] {'name': 'Bugle Hotel', 'type': 'multipolygon', 'wifi': 'free', 'phone': '+44 1983 760272', 'source': 'Bing', 'amenity': 'pub', 'tourism': 'hotel', 'website': 'http://www.thebugleyarmouth.co.uk/', 'alt_name': 'The Bugle Coaching Inn', 'building': 'yes', 'addr:city': 'Yarmouth', 'addr:street': 'The Square', 'addr:postcode': 'PO41 0NS', 'addr:housename': 'Bugle Coaching Inn'}),
  Relation(5808141 ->  [Member(type='way', ref=168129028, role='outer'), Member(type='way', ref=378172008, role='inner')] {'addr:postcode': 'PO36 0HL', 'addr:street': 'Lower Road', 'capacity:pitches'

# Process to a `pandas` dataframe

In [12]:
import numpy as np
import pandas as pd

In [13]:
gen = digest.parse("isle-of-wight-latest.osm")
print(next(gen))
print(next(gen))

possible_address_tags = set()
for x in gen:
    for key in x.tags:
        if key.startswith("addr:"):
            possible_address_tags.add(key)
            
possible_address_tags

OSM(version=0.6, generator=osmconvert 0.8.5, timestamp=2017-04-25 20:43:28)
Bounds(latitude:[50.50555,50.80102], longitude:[-1.659074,-1.0313699]


{'addr:city',
 'addr:country',
 'addr:county',
 'addr:flats',
 'addr:floor',
 'addr:hamlet',
 'addr:housename',
 'addr:housenumber',
 'addr:inclusion',
 'addr:interpolation',
 'addr:name',
 'addr:place',
 'addr:postcode',
 'addr:province',
 'addr:street',
 'addr:unit'}

In [14]:
gen = digest.parse("isle-of-wight-latest.osm")
osm = next(gen)
bounds = next(gen)

address_data = { key : [] for key in possible_address_tags }
address_data["osm_id"] = []

for x in gen:
    addr = {key : x.tags[key] for key in x.tags if key.startswith("addr:")}
    if len(addr) > 0:
        address_data["osm_id"].append(x.name+"/"+str(x.osm_id))
        for key in possible_address_tags:
            if key in addr:
                address_data[key].append(addr[key])
            else:
                address_data[key].append(np.nan)

In [15]:
data = pd.DataFrame(address_data)
data = data.set_index("osm_id")
data[:5]

Unnamed: 0_level_0,addr:city,addr:country,addr:county,addr:flats,addr:floor,addr:hamlet,addr:housename,addr:housenumber,addr:inclusion,addr:interpolation,addr:name,addr:place,addr:postcode,addr:province,addr:street,addr:unit
osm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
node/2383584,Yarmouth,,,,,,,,,,,,PO41 0PB,,Quay Street,
node/2387351,Bembridge,,,,,,Bembridge Windmill,,,,,,PO35 5SQ,,Mill Road,
node/2433320,Totland,GB,,,,,,,,,,,PO39 0HY,,Highdown Lane,
node/7227760,"Havenstreet, near Ryde",,,,,,,,,,,,,,Main Road,
node/8093989,Ryde,,,,,,,41.0,,,,,PO33 2LF,,Union Street,
