In [4]:
import copy
import urllib.request
import random
import folium
import xml.sax
from math import radians, cos, sin, asin, sqrt
from pathlib import Path
import networkx as nx
from pprint import pprint


from vars import DATASET
print(DATASET)

cambridge


In [5]:
def haversine_distance(lon1, lat1, lon2, lat2, unit_m=True):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    default unit : km
    """
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * asin(sqrt(a))
    r = 6371  # Radius of the Earth in kilometers. Use 3956 for miles
    if unit_m:
        r *= 1000
    return c * r


class Node(object):
    def __init__(self, id, lon, lat):
        self.id = id
        self.lon = lon
        self.lat = lat
        self.tags = {}

    def __str__(self):
        return "Node (id : %s) lon : %s, lat : %s "%(self.id, self.lon, self.lat)


class Way(object):
    def __init__(self, id, osm):
        self.osm = osm
        self.id = id
        self.nds = []
        self.tags = {}

    def split(self, dividers):
        # slice the node-array using this nifty recursive function
        def slice_array(ar, dividers):
            for i in range(1,len(ar)-1):
                if dividers[ar[i]]>1:
                    left = ar[:i+1]
                    right = ar[i:]

                    rightsliced = slice_array(right, dividers)

                    return [left]+rightsliced
            return [ar]

        slices = slice_array(self.nds, dividers)

        # create a way object for each node-array slice
        ret = []
        i = 0
        for slice in slices:
            littleway = copy.copy(self)
            littleway.id += "-%d" % i
            littleway.nds = slice
            ret.append(littleway)
            i += 1

        return ret



In [6]:
class OSM(object):
    def __init__(self, osm_xml_data, is_xml_string=True):
        """ File can be either a filename or stream/file object.

        set `is_xml_string=False` if osm_xml_data is a filename or a file stream.
        """
        nodes = {}
        ways = {}

        superself = self

        class OSMHandler(xml.sax.ContentHandler):
            @classmethod
            def setDocumentLocator(self, loc):
                pass

            @classmethod
            def startDocument(self):
                pass

            @classmethod
            def endDocument(self):
                pass

            @classmethod
            def startElement(self, name, attrs):
                if name == 'node':
                    self.currElem = Node(attrs['id'], float(attrs['lon']), float(attrs['lat']))
                elif name == 'way':
                    self.currElem = Way(attrs['id'], superself)
                elif name == 'tag':
                    self.currElem.tags[attrs['k']] = attrs['v']
                elif name == 'nd':
                    self.currElem.nds.append(attrs['ref'])

            @classmethod
            def endElement(self, name):
                if name == 'node':
                    nodes[self.currElem.id] = self.currElem
                elif name == 'way':
                    ways[self.currElem.id] = self.currElem

            @classmethod
            def characters(self, chars):
                pass

        if is_xml_string:
            xml.sax.parseString(osm_xml_data, OSMHandler)
        else:
            with open(osm_xml_data, mode='r') as f:
                xml.sax.parse(f, OSMHandler)

        self.nodes = nodes
        self.ways = ways

        # count times each node is used
        node_histogram = dict.fromkeys(self.nodes.keys(), 0)
        for way in self.ways.values():
            if len(way.nds) < 2:  # if a way has only one node, delete it out of the osm collection
                del self.ways[way.id]
            else:
                for node in way.nds:
                    node_histogram[node] += 1

        # use that histogram to split all ways, replacing the member set of ways
        new_ways = {}
        for id, way in self.ways.items():
            split_ways = way.split(node_histogram)
            for split_way in split_ways:
                new_ways[split_way.id] = split_way
        self.ways = new_ways


In [13]:
def read_osm(osm_xml_data, is_xml_string=True, only_roads=True):
    """Read graph in OSM format from file specified by name or by stream object.
    Parameters
    ----------
    filename_or_stream : filename or stream object

    Returns
    -------
    G : Graph

    Examples
    --------
    >>> G=nx.read_osm(nx.download_osm(-122.33,47.60,-122.31,47.61))
    >>> import matplotlib.pyplot as plt
    >>> plt.plot([G.node[n]['lat']for n in G], [G.node[n]['lon'] for n in G], 'o', color='k')
    >>> plt.show()
    """
    osm = OSM(osm_xml_data, is_xml_string=is_xml_string)
    G = nx.DiGraph()

    ## Add ways
    for w in osm.ways.values():
        if only_roads and 'highway' not in w.tags:
            continue
          
        if ('oneway' in w.tags):
            if (w.tags['oneway'] == 'yes'):
                # ONLY ONE DIRECTION
                nx.add_path(G, w.nds, id=w.id, tags=w.tags)
            else:
                # BOTH DIRECTION
                nx.add_path(G, w.nds, id=w.id, tags=w.tags)
                nx.add_path(G, w.nds[::-1], id=w.id, tags=w.tags)        
                
        else:
            # BOTH DIRECTION
            nx.add_path(G, w.nds, id=w.id, tags=w.tags)
            nx.add_path(G, w.nds[::-1], id=w.id, tags=w.tags)
#             nx.add_path(G, w.nds, id=w.id)
#             nx.add_path(G, w.nds[::-1], id=w.id)

    # Complete the used nodes' information
    coordinates_map = {}
    for n_id in G.nodes():
        n = osm.nodes[n_id]
        G.nodes[n_id]['lat'] = n.lat
        G.nodes[n_id]['lon'] = n.lon
        G.nodes[n_id]['id'] = n.id
        G.nodes[n_id]['tags'] = n.tags
        coordinates_map[n_id] = (n.lon, n.lat)

    # Estimate the length of each way
    for u, v, d in G.edges(data=True):
        # Give a realistic distance estimation (neither EPSG nor projection nor reference system are specified)
        distance = haversine_distance(G.nodes[u]['lon'], G.nodes[u]['lat'], G.nodes[v]['lon'], G.nodes[v]['lat'], unit_m=True)

        G.add_weighted_edges_from([(u, v, distance)], weight='havlen')

#     G = nx.relabel_nodes(G, coordinates_map)
    return G


In [14]:
g = read_osm("./" + DATASET + ".osm", is_xml_string=False)
# make it undirected, because directed just isn't worth the trouble yet
# g = g.to_undirected()
len(g)

21438

In [15]:
# Export for Sylvia
## nodes
f = open("01_" + DATASET + "_unpruned_nodes_for_sylvia.csv", "a")
f.write("id,lat,lon\n")
for node in g.nodes():
    data = g.nodes()[node]
    nid, lat, lon = data['id'], data['lat'], data['lon']
    f.write(str(nid) + ',' + str(lat) + ',' + str(lon) + '\n')
f.close()

## edges
f = open("01_" + DATASET + "_unpruned_edges_for_sylvia.csv", "a")
f.write("id1,id2\n")

for edge in g.edges():
    f.write(edge[0] + ',' + edge[1] + '\n')
f.close()

# Delete valence two (in-between) nodes

### One-Way Streets

In [17]:
# deal with valence 2 nodes (intermediaries on one way streets)

# nodes with degree two. i.e. they are in between going from one place to another.
valence_2_nodes = set([k for k, v in g.degree() if v == 2])
print('original len valence_2_nodes: ' + str(len(valence_2_nodes)))

def right_node(node):
    print('right-node:' + node)
    out_edges = [x for x in g.out_edges(node)]
    
    if len(out_edges) > 0:
        return [x for x in g.out_edges(node)][0][1]
        
    else:
        return ''

def left_node(node):
    print('left-node:' + node)
    in_edges = [x for x in g.in_edges(node)]
    if len(in_edges) > 0:
        return [x for x in g.in_edges(node)][0][0]
    else:
        return ''


newly_linked = {}

while len(valence_2_nodes) > 0:
    node = list(valence_2_nodes)[0]
    
    old_right, new_right = node, right_node(node)
    old_left, new_left = node, left_node(node)
    
    # edge case with two cyclic nodes.
    # just choose one to win and call it a day.
    if new_left == new_right:
        g.remove_nodes_from([node])
        valence_2_nodes -= set([node, new_left])
        continue
    print(node)
    print(new_left)
    print(new_right)
    print('---')
        

    nodes_for_deletion = [node]
    distance = 0

    # find leftmost vertex with valence 2
    while (new_left in valence_2_nodes) and (new_left not in nodes_for_deletion) and (new_left in g.nodes()):
        distance += g.get_edge_data(new_left, old_left).get('havlen', 0) # add distance
        old_left = new_left # re-assign the rightmost valence 2 node.
        nodes_for_deletion.insert(0, old_left) # mark the rightmost v2 node for deletion
        new_left = left_node(old_left) # reassign the rightmost node of questionable valence

    # add distance to new leftmost valence != 2 node
    if new_left != '':
        distance += g.get_edge_data(new_left, old_left).get('havlen', 0)

    # find rightmost vertex with valence 2
    while new_right in valence_2_nodes and (new_right not in nodes_for_deletion) and (new_right in g.nodes()):
        distance += g.get_edge_data(old_right, new_right).get('havlen', 0)
        old_right = new_right
        nodes_for_deletion.append(old_right)
        new_right = right_node(old_right)

    # add distance to new rightmost valence != 2 node, if a more rightmost node exists
    if new_right != '':
        distance += g.get_edge_data(old_right, new_right).get('havlen', 0)
       
    # remove old useless nodes
    g.remove_nodes_from(nodes_for_deletion)
    valence_2_nodes -= set(nodes_for_deletion)
    
    # add new edge between end nodes
    g.add_weighted_edges_from([(new_left, new_right, distance)], weight='havlen')
    
    newly_linked[(new_left, new_right)] = [nodes_for_deletion]


print('num_valence_2_nodes remaining: ' + str(len(valence_2_nodes)) + "(should be 0)")
# pprint(newly_linked)

original len valence_2_nodes: 4665
right-node:7105607419
left-node:7105607419
7105607419
7105607425
2688941732
---
left-node:7105607425
left-node:61382794
left-node:61382790
left-node:2688941728
left-node:61382802
left-node:2688941738
left-node:7105660981
left-node:277607781
left-node:2688952215
left-node:7105660952
left-node:7105660980
left-node:7105660975
right-node:2688941732
right-node:7105607420
right-node:2688941730
right-node:61382785
right-node:7105607422
right-node:61382787
right-node:2688939556
right-node:2688941726
right-node:61382774
right-node:7105607415
right-node:61382780
right-node:61382700
right-node:2688941734
right-node:7105607421
right-node:2688939564
right-node:2688941736
right-node:61382696
right-node:61382691
right-node:7105607424
right-node:2688939562
right-node:7105607418
right-node:61382687
right-node:7105607426
right-node:61382682
right-node:7105607423
right-node:2688939555
right-node:7105607416
right-node:61382680
right-node:61382675
right-node:7105607417
ri

right-node:2688986123
right-node:1708499316
left-node:1708499316
1708499316
1708499310
1708499329
---
left-node:1708499310
left-node:1708499295
right-node:1708499329
right-node:1708499366
right-node:1708499373
right-node:1708499380
right-node:1708499388
right-node:1708499384
right-node:1708499377
right-node:1708499370
right-node:7560377006
left-node:7560377006
7560377006
7560376996
7560376997
---
right-node:7560376997
right-node:7560376998
right-node:7560376999
right-node:7560377008
right-node:7560377000
right-node:7560377001
right-node:7560377007
right-node:7560377002
right-node:7560377003
right-node:7560377004
right-node:7560377005
right-node:2679576497
left-node:2679576497
2679576497
2679576498
61323866
---
left-node:2679576498
left-node:2679576496
right-node:61326693
left-node:61326693
61326693
278950328
61326695
---
right-node:278949808
left-node:278949808
278949808
1525366591
61326695
---
left-node:1525366591
left-node:278949806
left-node:278949805
left-node:61331800
right-node:7

right-node:1708499256
right-node:7096589021
left-node:7096589021
7096589021
7096588990
7096482483
---
left-node:7096588990
left-node:7096588992
left-node:7096589006
right-node:7096482483
right-node:7096505101
right-node:7096505094
right-node:7096505107
right-node:7626165551
left-node:7626165551
right-node:2671747037
left-node:2671747037
2671747037
2671747035
61446552
---
left-node:2671747035
left-node:2671747034
left-node:2671747038
left-node:61449323
left-node:7746066184
left-node:7746066183
left-node:61449350
left-node:2671859001
left-node:2672155736
left-node:61449342
left-node:61449285
left-node:61449289
left-node:7552625792
left-node:2672155737
left-node:2672155733
left-node:61449280
right-node:61446552
right-node:61449304
right-node:2670395873
right-node:5458318645
left-node:5458318645
right-node:61332424
left-node:61332424
61332424
61332426
438265112
---
left-node:61332426
left-node:2693621424
left-node:61332386
left-node:2693621377
left-node:61332384
left-node:2693621465
left-n

61330815
61330797
61328117
---
right-node:7802240713
left-node:7802240713
right-node:5244281085
left-node:5244281085
5244281085
61329294
61328552
---
right-node:7628615270
left-node:7628615270
7628615270
61324781
61324775
---
left-node:61324781
left-node:6755512948
left-node:61324779
left-node:61329165
left-node:61329167
left-node:61329128
right-node:566050758
left-node:566050758
566050758
566057463
7746087677
---
left-node:566057463
right-node:7746087677
right-node:6450188301
left-node:6450188301
right-node:6114648229
left-node:6114648229
right-node:597845244
left-node:597845244
right-node:7139286845
left-node:7139286845
right-node:542892913
left-node:542892913
right-node:1041959398
left-node:1041959398
1041959398
71921123
1041959456
---
left-node:71921123
left-node:71939930
left-node:71918316
left-node:71929373
left-node:1887734674
right-node:1041959456
right-node:71917340
right-node:7717608738
left-node:7717608738
7717608738
7717608737
7640833611
---
left-node:7717608737
left-node:7

left-node:1230178128
right-node:5264316737
left-node:5264316737
5264316737
7564373051
61358345
---
left-node:7564373051
right-node:365950920
left-node:365950920
365950920
314502508
365950921
---
right-node:7130577060
left-node:7130577060
right-node:2049832610
left-node:2049832610
2049832610
7631495404
2049832619
---
right-node:61170997
left-node:61170997
right-node:4259341204
left-node:4259341204
4259341204
6868617305
6868617292
---
right-node:61325561
left-node:61325561
61325561
61325559
61325563
---
right-node:1887734372
left-node:1887734372
right-node:61321589
left-node:61321589
61321589
61321594
1705790264
---
left-node:61321594
right-node:3729509437
left-node:3729509437
right-node:61327543
left-node:61327543
61327543
1032773628
61327700
---
left-node:1032773628
left-node:1032773667
right-node:1368152374
left-node:1368152374
1368152374
1041959442
5493338394
---
right-node:2693475531
left-node:2693475531
2693475531
61330765
61318072
---
right-node:61318072
right-node:5516910313
left

right-node:6154479701
left-node:6154479701
right-node:7560377034
left-node:7560377034
right-node:71945913
left-node:71945913
71945913
71921094
71913928
---
right-node:1722449465
left-node:1722449465
right-node:688404503
left-node:688404503
right-node:61317415
left-node:61317415
right-node:1705821892
left-node:1705821892
right-node:6299094720
left-node:6299094720
right-node:7729022858
left-node:7729022858
right-node:4329252672
left-node:4329252672
right-node:589523396
left-node:589523396
right-node:7324284078
left-node:7324284078
right-node:3211700982
left-node:3211700982
right-node:61180006
left-node:61180006
right-node:61318177
left-node:61318177
61318177
5843638165
7632047812
---
right-node:7802240715
left-node:7802240715
right-node:5244314185
left-node:5244314185
5244314185
1956030437
61170994
---
left-node:1956030437
right-node:7713962846
left-node:7713962846
right-node:1629873701
left-node:1629873701
right-node:2292063723
left-node:2292063723
right-node:71926901
left-node:71926901

## bi-directional streets

In [18]:
# valence_4_nodes = set([k for k, v in g.degree() if v == 4])
# special_node = '61331813'
# for node in [special_node]:
#     print(node)
#     print(g.nodes[node])
#     print(g.out_edges(node))
#     print(g.in_edges(node))
    
# #     [item for sublist in l for item in sublist]
#     out_edges = set([item for sublist in g.out_edges(node) for item in sublist]) - set([node])
#     in_edges = set([item for sublist in g.in_edges(node) for item in sublist]) - set([node])
#     print(out_edges)
#     print(in_edges)
    
    
#     print('----')


In [19]:
    
def intermediary_two_way_node(node):
    '''returns true if the given node is intermediary on a two way street.
       returns false if given node is not intermediary on a two way street.
       errors if node does not exist.'''
    out_edges = set([item for sublist in g.out_edges(node) for item in sublist]) - set([node])
    in_edges = set([item for sublist in g.in_edges(node) for item in sublist]) - set([node])

    if len(out_edges) == 2 and len(in_edges) == 2:
        return in_edges == out_edges
    else:
        return False

def right_two_way_node(node):
    if intermediary_two_way_node(node):
        return [x for x in g.out_edges(node)][1][1]
    else:
        return False

def left_two_way_node(node):
    if intermediary_two_way_node(node):
        return [x for x in g.out_edges(node)][0][1]
    else:
        return False
    
def compute_distances_of_walk(walk_list):
    '''given a set of walking nodes, Compute the distance between them.'''
    distance = 0
    start_node = walk_list[0]
    
    for next_node in walk_list[1:]:
        sprint = g.get_edge_data(start_node, next_node).get('havlen', 0)
        distance += sprint
        start_node = next_node
    return distance


def walk_right(node):
    if not intermediary_two_way_node(node):
        return {'walked': [node],
                'distance': 0}
    
    # compute node sequence
    walked = [node]
    next_node = right_two_way_node(node)
    
    while intermediary_two_way_node(next_node) and (next_node not in walked):
        walked.append(next_node)
        next_node = right_two_way_node(next_node)
      
    # append non-intermediary node
    walked.append(next_node)
    
    # compute distances    
    return {'walked': walked,
            'distance': compute_distances_of_walk(walked)}

def walk_left(node):
    return {'walked': [node],
            'distance': 0}
    
    walked = [node]
    next_node = left_two_way_node(node)
    
    while intermediary_two_way_node(next_node) and (next_node not in walked):
        walked.append(next_node)
        next_node = left_two_way_node(next_node)
    
    # append non-intermediary node
    walked.append(next_node)
    
    return {'walked': walked,
            'distance': compute_distances_of_walk(walked)}

    

In [21]:


# get the intermediate nodes
intermediary_2w = set([x for x in g.nodes() if intermediary_two_way_node(x)])

# walk the grah, and remove intermediary nodes
while len(intermediary_2w) > 0:        
    node = list(intermediary_2w)[0]
#     print(node)

        
    right = walk_right(node)
    left = walk_left(node)
    
    # all walked nodes (use a set to eliminate duplicate start node)
    all_walked = set(left['walked'] + right['walked'])
    
    # the total distance walked
    all_distance = left['distance'] + right['distance']
    
    # modify graph
    # Add new edge between non-intermediary nodes
    g.add_weighted_edges_from([(left['walked'][-1], right['walked'][-1], all_distance)], weight='havlen')
    # delete intermediary nodes
    nodes_for_deletion = set(left['walked'][:-1] + right['walked'][:-1])
    g.remove_nodes_from(nodes_for_deletion)
    
    # modify intermediary list
    print(len(intermediary_2w))
    intermediary_2w -= nodes_for_deletion
    
    # edge case with single node cycle
    if len(nodes_for_deletion) == 0:
        intermediary_2w -= set([node])
        
    print(len(intermediary_2w)) 
    
    
#     print(node)
#     print(nodes_for_deletion)
#     print(all_walked)
#     print(all_distance)
    print('---')
    
    

10950
10949
---
10949
10948
---
10948
10945
---
10945
10943
---
10943
10941
---
10941
10938
---
10938
10936
---
10936
10935
---
10935
10933
---
10933
10931
---
10931
10929
---
10929
10928
---
10928
10918
---
10918
10914
---
10914
10912
---
10912
10911
---
10911
10906
---
10906
10905
---
10905
10902
---
10902
10895
---
10895
10893
---
10893
10892
---
10892
10890
---
10890
10888
---
10888
10879
---
10879
10871
---
10871
10870
---
10870
10869
---
10869
10868
---
10868
10865
---
10865
10858
---
10858
10853
---
10853
10851
---
10851
10849
---
10849
10843
---
10843
10839
---
10839
10833
---
10833
10830
---
10830
10829
---
10829
10818
---
10818
10815
---
10815
10814
---
10814
10812
---
10812
10808
---
10808
10806
---
10806
10805
---
10805
10793
---
10793
10790
---
10790
10788
---
10788
10786
---
10786
10783
---
10783
10782
---
10782
10780
---
10780
10779
---
10779
10767
---
10767
10765
---
10765
10764
---
10764
10762
---
10762
10761
---
10761
10731
---
10731
10730
---
10730
10728
---
10728
10

8368
---
8368
8365
---
8365
8361
---
8361
8360
---
8360
8359
---
8359
8357
---
8357
8354
---
8354
8352
---
8352
8345
---
8345
8337
---
8337
8334
---
8334
8332
---
8332
8314
---
8314
8313
---
8313
8311
---
8311
8306
---
8306
8305
---
8305
8303
---
8303
8297
---
8297
8293
---
8293
8292
---
8292
8289
---
8289
8288
---
8288
8287
---
8287
8279
---
8279
8277
---
8277
8276
---
8276
8273
---
8273
8268
---
8268
8265
---
8265
8263
---
8263
8261
---
8261
8260
---
8260
8258
---
8258
8256
---
8256
8248
---
8248
8244
---
8244
8243
---
8243
8241
---
8241
8240
---
8240
8239
---
8239
8237
---
8237
8232
---
8232
8229
---
8229
8226
---
8226
8224
---
8224
8222
---
8222
8221
---
8221
8220
---
8220
8218
---
8218
8216
---
8216
8215
---
8215
8213
---
8213
8207
---
8207
8204
---
8204
8200
---
8200
8199
---
8199
8198
---
8198
8190
---
8190
8185
---
8185
8183
---
8183
8182
---
8182
8176
---
8176
8175
---
8175
8174
---
8174
8162
---
8162
8160
---
8160
8159
---
8159
8158
---
8158
8150
---
8150
8149
---
8149
8148
-

6755
6754
---
6754
6750
---
6750
6747
---
6747
6746
---
6746
6745
---
6745
6743
---
6743
6739
---
6739
6738
---
6738
6737
---
6737
6736
---
6736
6734
---
6734
6731
---
6731
6730
---
6730
6728
---
6728
6727
---
6727
6722
---
6722
6720
---
6720
6719
---
6719
6717
---
6717
6715
---
6715
6714
---
6714
6713
---
6713
6710
---
6710
6707
---
6707
6706
---
6706
6705
---
6705
6695
---
6695
6694
---
6694
6692
---
6692
6690
---
6690
6689
---
6689
6688
---
6688
6686
---
6686
6684
---
6684
6682
---
6682
6681
---
6681
6676
---
6676
6674
---
6674
6668
---
6668
6667
---
6667
6666
---
6666
6662
---
6662
6659
---
6659
6657
---
6657
6650
---
6650
6649
---
6649
6648
---
6648
6646
---
6646
6645
---
6645
6643
---
6643
6642
---
6642
6641
---
6641
6639
---
6639
6637
---
6637
6636
---
6636
6635
---
6635
6632
---
6632
6629
---
6629
6627
---
6627
6625
---
6625
6624
---
6624
6623
---
6623
6622
---
6622
6621
---
6621
6619
---
6619
6618
---
6618
6617
---
6617
6611
---
6611
6610
---
6610
6608
---
6608
6607
---
6607
6

5424
5423
---
5423
5421
---
5421
5419
---
5419
5418
---
5418
5416
---
5416
5414
---
5414
5410
---
5410
5409
---
5409
5405
---
5405
5401
---
5401
5399
---
5399
5398
---
5398
5397
---
5397
5396
---
5396
5391
---
5391
5390
---
5390
5388
---
5388
5385
---
5385
5384
---
5384
5383
---
5383
5380
---
5380
5377
---
5377
5371
---
5371
5369
---
5369
5368
---
5368
5367
---
5367
5366
---
5366
5365
---
5365
5363
---
5363
5360
---
5360
5359
---
5359
5358
---
5358
5357
---
5357
5356
---
5356
5351
---
5351
5349
---
5349
5347
---
5347
5345
---
5345
5344
---
5344
5342
---
5342
5341
---
5341
5340
---
5340
5337
---
5337
5336
---
5336
5335
---
5335
5333
---
5333
5328
---
5328
5324
---
5324
5323
---
5323
5322
---
5322
5321
---
5321
5320
---
5320
5319
---
5319
5318
---
5318
5317
---
5317
5316
---
5316
5315
---
5315
5313
---
5313
5311
---
5311
5309
---
5309
5308
---
5308
5305
---
5305
5303
---
5303
5302
---
5302
5301
---
5301
5300
---
5300
5297
---
5297
5296
---
5296
5295
---
5295
5293
---
5293
5290
---
5290
5

4230
4229
---
4229
4227
---
4227
4226
---
4226
4225
---
4225
4222
---
4222
4221
---
4221
4216
---
4216
4215
---
4215
4214
---
4214
4211
---
4211
4209
---
4209
4207
---
4207
4206
---
4206
4205
---
4205
4204
---
4204
4203
---
4203
4202
---
4202
4201
---
4201
4199
---
4199
4198
---
4198
4197
---
4197
4196
---
4196
4195
---
4195
4192
---
4192
4191
---
4191
4190
---
4190
4189
---
4189
4188
---
4188
4182
---
4182
4181
---
4181
4180
---
4180
4179
---
4179
4178
---
4178
4177
---
4177
4176
---
4176
4169
---
4169
4168
---
4168
4167
---
4167
4163
---
4163
4162
---
4162
4161
---
4161
4160
---
4160
4159
---
4159
4158
---
4158
4156
---
4156
4155
---
4155
4153
---
4153
4152
---
4152
4150
---
4150
4149
---
4149
4148
---
4148
4145
---
4145
4140
---
4140
4139
---
4139
4138
---
4138
4137
---
4137
4136
---
4136
4135
---
4135
4132
---
4132
4131
---
4131
4130
---
4130
4129
---
4129
4127
---
4127
4126
---
4126
4125
---
4125
4123
---
4123
4122
---
4122
4121
---
4121
4120
---
4120
4118
---
4118
4114
---
4114
4

3084
---
3084
3083
---
3083
3082
---
3082
3081
---
3081
3080
---
3080
3079
---
3079
3078
---
3078
3076
---
3076
3075
---
3075
3074
---
3074
3073
---
3073
3071
---
3071
3070
---
3070
3069
---
3069
3068
---
3068
3067
---
3067
3066
---
3066
3065
---
3065
3063
---
3063
3062
---
3062
3061
---
3061
3060
---
3060
3059
---
3059
3058
---
3058
3055
---
3055
3054
---
3054
3053
---
3053
3051
---
3051
3050
---
3050
3049
---
3049
3045
---
3045
3044
---
3044
3043
---
3043
3042
---
3042
3040
---
3040
3039
---
3039
3038
---
3038
3036
---
3036
3035
---
3035
3034
---
3034
3033
---
3033
3032
---
3032
3031
---
3031
3030
---
3030
3028
---
3028
3027
---
3027
3026
---
3026
3024
---
3024
3023
---
3023
3022
---
3022
3021
---
3021
3018
---
3018
3017
---
3017
3016
---
3016
3012
---
3012
3011
---
3011
3010
---
3010
3009
---
3009
3008
---
3008
3007
---
3007
3006
---
3006
3005
---
3005
3004
---
3004
3002
---
3002
3000
---
3000
2999
---
2999
2998
---
2998
2997
---
2997
2996
---
2996
2995
---
2995
2994
---
2994
2993
-

1900
---
1900
1899
---
1899
1898
---
1898
1897
---
1897
1896
---
1896
1895
---
1895
1894
---
1894
1892
---
1892
1891
---
1891
1890
---
1890
1889
---
1889
1888
---
1888
1887
---
1887
1886
---
1886
1885
---
1885
1884
---
1884
1883
---
1883
1882
---
1882
1881
---
1881
1880
---
1880
1879
---
1879
1878
---
1878
1877
---
1877
1876
---
1876
1875
---
1875
1874
---
1874
1873
---
1873
1872
---
1872
1871
---
1871
1870
---
1870
1869
---
1869
1868
---
1868
1867
---
1867
1866
---
1866
1865
---
1865
1864
---
1864
1863
---
1863
1862
---
1862
1860
---
1860
1859
---
1859
1857
---
1857
1856
---
1856
1855
---
1855
1854
---
1854
1851
---
1851
1850
---
1850
1849
---
1849
1848
---
1848
1847
---
1847
1846
---
1846
1845
---
1845
1844
---
1844
1843
---
1843
1842
---
1842
1841
---
1841
1840
---
1840
1839
---
1839
1835
---
1835
1834
---
1834
1833
---
1833
1832
---
1832
1827
---
1827
1826
---
1826
1825
---
1825
1824
---
1824
1823
---
1823
1822
---
1822
1821
---
1821
1820
---
1820
1819
---
1819
1817
---
1817
1816
-

717
---
717
716
---
716
715
---
715
714
---
714
713
---
713
712
---
712
711
---
711
710
---
710
709
---
709
708
---
708
707
---
707
706
---
706
705
---
705
703
---
703
702
---
702
701
---
701
700
---
700
699
---
699
698
---
698
697
---
697
696
---
696
695
---
695
694
---
694
693
---
693
692
---
692
691
---
691
690
---
690
689
---
689
688
---
688
687
---
687
686
---
686
685
---
685
683
---
683
682
---
682
681
---
681
680
---
680
679
---
679
678
---
678
677
---
677
676
---
676
675
---
675
674
---
674
673
---
673
672
---
672
671
---
671
670
---
670
669
---
669
668
---
668
667
---
667
666
---
666
665
---
665
664
---
664
663
---
663
662
---
662
661
---
661
660
---
660
659
---
659
658
---
658
657
---
657
656
---
656
654
---
654
653
---
653
652
---
652
651
---
651
650
---
650
649
---
649
648
---
648
647
---
647
645
---
645
644
---
644
643
---
643
642
---
642
641
---
641
640
---
640
639
---
639
638
---
638
637
---
637
636
---
636
635
---
635
634
---
634
633
---
633
632
---
632
631
---
631
629


In [22]:
len(g.nodes())

7202

# Write to Disk

In [23]:
nx.write_gpickle(g, "01_" + DATASET + ".gpickle")

In [24]:
# export to graphml. graphml export can't handle some datatypes (including dicts and None)
# so delete them
h = g.copy()

for n in h.nodes():
    h.nodes[n].pop('tags', None)
for e in g.edges():
    h.edges[e].pop('tags', None)
nx.write_graphml(h, "01_" + DATASET + ".graphml")

# clear up memory
h = None

# Cruft

## Dictionary of Tags

In [51]:
tags = {}
osm = OSM('./' + DATASET + '.osm', is_xml_string=False)

for w in osm.ways.values():
    for t in w.tags:
        if tags.get(t):
            tags[t] = tags[t] + 1
        else:
            tags[t] = 1

In [52]:
tags

{'attribution': 146,
 'condition': 141,
 'highway': 205,
 'lanes': 142,
 'massgis:way_id': 142,
 'maxspeed': 107,
 'name': 169,
 'oneway': 108,
 'source': 154,
 'surface': 126,
 'width': 131,
 'cycleway:right': 15,
 'massgis:ref': 16,
 'parking:lane:left': 10,
 'lit': 3,
 'foot': 12,
 'access': 13,
 'building': 643,
 'addr:housenumber': 508,
 'addr:street': 507,
 'amenity': 17,
 'brand': 5,
 'brand:wikidata': 4,
 'brand:wikipedia': 4,
 'dispensing': 3,
 'drive_through': 3,
 'healthcare': 3,
 'opening_hours': 6,
 'payment:cash': 3,
 'payment:visa': 3,
 'phone': 7,
 'website': 10,
 'wheelchair': 3,
 'addr:city': 10,
 'addr:postcode': 11,
 'building:levels': 8,
 'operator': 3,
 'power': 3,
 'ref': 2,
 'substation': 1,
 'voltage': 3,
 'addr:state': 5,
 'shop': 2,
 'denomination': 3,
 'religion': 3,
 'area': 8,
 'created_by': 6,
 'leisure': 20,
 'massgis:ARTICLE97': 8,
 'massgis:ASSESS_ACR': 8,
 'massgis:ATT_DATE': 8,
 'massgis:DCAM_ID': 8,
 'massgis:DEED_ACRES': 8,
 'massgis:EOEAINVOLV': 8