In [42]:
import copy
import urllib.request
import random
import folium
import xml.sax
from math import radians, cos, sin, asin, sqrt
from pathlib import Path
import networkx as nx

from vars import DATASET

In [43]:
def haversine_distance(lon1, lat1, lon2, lat2, unit_m=True):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    default unit : km
    """
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * asin(sqrt(a))
    r = 6371  # Radius of the Earth in kilometers. Use 3956 for miles
    if unit_m:
        r *= 1000
    return c * r


class Node(object):
    def __init__(self, id, lon, lat):
        self.id = id
        self.lon = lon
        self.lat = lat
        self.tags = {}

    def __str__(self):
        return "Node (id : %s) lon : %s, lat : %s "%(self.id, self.lon, self.lat)


class Way(object):
    def __init__(self, id, osm):
        self.osm = osm
        self.id = id
        self.nds = []
        self.tags = {}

    def split(self, dividers):
        # slice the node-array using this nifty recursive function
        def slice_array(ar, dividers):
            for i in range(1,len(ar)-1):
                if dividers[ar[i]]>1:
                    left = ar[:i+1]
                    right = ar[i:]

                    rightsliced = slice_array(right, dividers)

                    return [left]+rightsliced
            return [ar]

        slices = slice_array(self.nds, dividers)

        # create a way object for each node-array slice
        ret = []
        i = 0
        for slice in slices:
            littleway = copy.copy(self)
            littleway.id += "-%d" % i
            littleway.nds = slice
            ret.append(littleway)
            i += 1

        return ret



In [44]:
class OSM(object):
    def __init__(self, osm_xml_data, is_xml_string=True):
        """ File can be either a filename or stream/file object.

        set `is_xml_string=False` if osm_xml_data is a filename or a file stream.
        """
        nodes = {}
        ways = {}

        superself = self

        class OSMHandler(xml.sax.ContentHandler):
            @classmethod
            def setDocumentLocator(self, loc):
                pass

            @classmethod
            def startDocument(self):
                pass

            @classmethod
            def endDocument(self):
                pass

            @classmethod
            def startElement(self, name, attrs):
                if name == 'node':
                    self.currElem = Node(attrs['id'], float(attrs['lon']), float(attrs['lat']))
                elif name == 'way':
                    self.currElem = Way(attrs['id'], superself)
                elif name == 'tag':
                    self.currElem.tags[attrs['k']] = attrs['v']
                elif name == 'nd':
                    self.currElem.nds.append(attrs['ref'])

            @classmethod
            def endElement(self, name):
                if name == 'node':
                    nodes[self.currElem.id] = self.currElem
                elif name == 'way':
                    ways[self.currElem.id] = self.currElem

            @classmethod
            def characters(self, chars):
                pass

        if is_xml_string:
            xml.sax.parseString(osm_xml_data, OSMHandler)
        else:
            with open(osm_xml_data, mode='r') as f:
                xml.sax.parse(f, OSMHandler)

        self.nodes = nodes
        self.ways = ways

        # count times each node is used
        node_histogram = dict.fromkeys(self.nodes.keys(), 0)
        for way in self.ways.values():
            if len(way.nds) < 2:  # if a way has only one node, delete it out of the osm collection
                del self.ways[way.id]
            else:
                for node in way.nds:
                    node_histogram[node] += 1

        # use that histogram to split all ways, replacing the member set of ways
        new_ways = {}
        for id, way in self.ways.items():
            split_ways = way.split(node_histogram)
            for split_way in split_ways:
                new_ways[split_way.id] = split_way
        self.ways = new_ways


In [45]:
def read_osm(osm_xml_data, is_xml_string=True, only_roads=True):
    """Read graph in OSM format from file specified by name or by stream object.
    Parameters
    ----------
    filename_or_stream : filename or stream object

    Returns
    -------
    G : Graph

    Examples
    --------
    >>> G=nx.read_osm(nx.download_osm(-122.33,47.60,-122.31,47.61))
    >>> import matplotlib.pyplot as plt
    >>> plt.plot([G.node[n]['lat']for n in G], [G.node[n]['lon'] for n in G], 'o', color='k')
    >>> plt.show()
    """
    osm = OSM(osm_xml_data, is_xml_string=is_xml_string)
    G = nx.DiGraph()

    ## Add ways
    for w in osm.ways.values():
        if only_roads and 'highway' not in w.tags:
            continue
          
        if ('oneway' in w.tags):
            if (w.tags['oneway'] == 'yes'):
                # ONLY ONE DIRECTION
                nx.add_path(G, w.nds, id=w.id, tags=w.tags)
            else:
                # BOTH DIRECTION
                nx.add_path(G, w.nds, id=w.id, tags=w.tags)
                nx.add_path(G, w.nds[::-1], id=w.id, tags=w.tags)
           
                
        else:
            # BOTH DIRECTION
            nx.add_path(G, w.nds, id=w.id, tags=w.tags)
            nx.add_path(G, w.nds[::-1], id=w.id, tags=w.tags)
#             nx.add_path(G, w.nds, id=w.id)
#             nx.add_path(G, w.nds[::-1], id=w.id)

    # Complete the used nodes' information
    coordinates_map = {}
    for n_id in G.nodes():
        n = osm.nodes[n_id]
        G.nodes[n_id]['lat'] = n.lat
        G.nodes[n_id]['lon'] = n.lon
        G.nodes[n_id]['id'] = n.id
        G.nodes[n_id]['tags'] = n.tags
        coordinates_map[n_id] = (n.lon, n.lat)

    # Estimate the length of each way
    for u, v, d in G.edges(data=True):
        distance = haversine_distance(G.nodes[u]['lon'], G.nodes[u]['lat'], G.nodes[v]['lon'], G.nodes[v]['lat'], unit_m=True)  # Give a realistic distance estimation (neither EPSG nor projection nor reference system are specified)

        G.add_weighted_edges_from([(u, v, distance)], weight='havlen')

    G = nx.relabel_nodes(G, coordinates_map)
    return G


In [46]:
g = read_osm("./" + DATASET + ".osm", is_xml_string=False)

In [47]:
len(g.nodes)

346

# Write to Disk

In [48]:
nx.write_gpickle(g, "01_" + DATASET + ".gpickle")

In [49]:
# graphml can't handle tags, so delete them
h = g.copy()

for n in h.nodes():
    h.nodes[n].pop('tags', None)
for e in g.edges():
    h.edges[e].pop('tags', None)
nx.write_graphml(h, "01_" + DATASET + ".gml")

# clear up memory
h = None

# Cruft

## Dictionary of Tags

In [51]:
tags = {}
osm = OSM('./' + DATASET + '.osm', is_xml_string=False)

for w in osm.ways.values():
    for t in w.tags:
        if tags.get(t):
            tags[t] = tags[t] + 1
        else:
            tags[t] = 1

In [52]:
tags

{'attribution': 146,
 'condition': 141,
 'highway': 205,
 'lanes': 142,
 'massgis:way_id': 142,
 'maxspeed': 107,
 'name': 169,
 'oneway': 108,
 'source': 154,
 'surface': 126,
 'width': 131,
 'cycleway:right': 15,
 'massgis:ref': 16,
 'parking:lane:left': 10,
 'lit': 3,
 'foot': 12,
 'access': 13,
 'building': 643,
 'addr:housenumber': 508,
 'addr:street': 507,
 'amenity': 17,
 'brand': 5,
 'brand:wikidata': 4,
 'brand:wikipedia': 4,
 'dispensing': 3,
 'drive_through': 3,
 'healthcare': 3,
 'opening_hours': 6,
 'payment:cash': 3,
 'payment:visa': 3,
 'phone': 7,
 'website': 10,
 'wheelchair': 3,
 'addr:city': 10,
 'addr:postcode': 11,
 'building:levels': 8,
 'operator': 3,
 'power': 3,
 'ref': 2,
 'substation': 1,
 'voltage': 3,
 'addr:state': 5,
 'shop': 2,
 'denomination': 3,
 'religion': 3,
 'area': 8,
 'created_by': 6,
 'leisure': 20,
 'massgis:ARTICLE97': 8,
 'massgis:ASSESS_ACR': 8,
 'massgis:ATT_DATE': 8,
 'massgis:DCAM_ID': 8,
 'massgis:DEED_ACRES': 8,
 'massgis:EOEAINVOLV': 8