In [1]:
import copy
import urllib.request
import random
import folium
import xml.sax
from math import radians, cos, sin, asin, sqrt
from pathlib import Path
import networkx as nx

In [2]:
def haversine_distance(lon1, lat1, lon2, lat2, unit_m=True):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    default unit : km
    """
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * asin(sqrt(a))
    r = 6371  # Radius of the Earth in kilometers. Use 3956 for miles
    if unit_m:
        r *= 1000
    return c * r


class Node(object):
    def __init__(self, id, lon, lat):
        self.id = id
        self.lon = lon
        self.lat = lat
        self.tags = {}

    def __str__(self):
        return "Node (id : %s) lon : %s, lat : %s "%(self.id, self.lon, self.lat)


class Way(object):
    def __init__(self, id, osm):
        self.osm = osm
        self.id = id
        self.nds = []
        self.tags = {}

    def split(self, dividers):
        # slice the node-array using this nifty recursive function
        def slice_array(ar, dividers):
            for i in range(1,len(ar)-1):
                if dividers[ar[i]]>1:
                    left = ar[:i+1]
                    right = ar[i:]

                    rightsliced = slice_array(right, dividers)

                    return [left]+rightsliced
            return [ar]

        slices = slice_array(self.nds, dividers)

        # create a way object for each node-array slice
        ret = []
        i = 0
        for slice in slices:
            littleway = copy.copy(self)
            littleway.id += "-%d" % i
            littleway.nds = slice
            ret.append(littleway)
            i += 1

        return ret



In [3]:
class OSM(object):
    def __init__(self, osm_xml_data, is_xml_string=True):
        """ File can be either a filename or stream/file object.

        set `is_xml_string=False` if osm_xml_data is a filename or a file stream.
        """
        nodes = {}
        ways = {}

        superself = self

        class OSMHandler(xml.sax.ContentHandler):
            @classmethod
            def setDocumentLocator(self, loc):
                pass

            @classmethod
            def startDocument(self):
                pass

            @classmethod
            def endDocument(self):
                pass

            @classmethod
            def startElement(self, name, attrs):
                if name == 'node':
                    self.currElem = Node(attrs['id'], float(attrs['lon']), float(attrs['lat']))
                elif name == 'way':
                    self.currElem = Way(attrs['id'], superself)
                elif name == 'tag':
                    self.currElem.tags[attrs['k']] = attrs['v']
                elif name == 'nd':
                    self.currElem.nds.append(attrs['ref'])

            @classmethod
            def endElement(self, name):
                if name == 'node':
                    nodes[self.currElem.id] = self.currElem
                elif name == 'way':
                    ways[self.currElem.id] = self.currElem

            @classmethod
            def characters(self, chars):
                pass

        if is_xml_string:
            xml.sax.parseString(osm_xml_data, OSMHandler)
        else:
            with open(osm_xml_data, mode='r') as f:
                xml.sax.parse(f, OSMHandler)

        self.nodes = nodes
        self.ways = ways

        # count times each node is used
        node_histogram = dict.fromkeys(self.nodes.keys(), 0)
        for way in self.ways.values():
            if len(way.nds) < 2:  # if a way has only one node, delete it out of the osm collection
                del self.ways[way.id]
            else:
                for node in way.nds:
                    node_histogram[node] += 1

        # use that histogram to split all ways, replacing the member set of ways
        new_ways = {}
        for id, way in self.ways.items():
            split_ways = way.split(node_histogram)
            for split_way in split_ways:
                new_ways[split_way.id] = split_way
        self.ways = new_ways


In [14]:
def read_osm(osm_xml_data, is_xml_string=True, only_roads=True):
    """Read graph in OSM format from file specified by name or by stream object.
    Parameters
    ----------
    filename_or_stream : filename or stream object

    Returns
    -------
    G : Graph

    Examples
    --------
    >>> G=nx.read_osm(nx.download_osm(-122.33,47.60,-122.31,47.61))
    >>> import matplotlib.pyplot as plt
    >>> plt.plot([G.node[n]['lat']for n in G], [G.node[n]['lon'] for n in G], 'o', color='k')
    >>> plt.show()
    """
    osm = OSM(osm_xml_data, is_xml_string=is_xml_string)
    G = nx.DiGraph()

    ## Add ways
    for w in osm.ways.values():
        if only_roads and 'highway' not in w.tags:
            continue
          
        if ('oneway' in w.tags):
            if (w.tags['oneway'] == 'yes'):
                # ONLY ONE DIRECTION
                nx.add_path(G, w.nds, id=w.id, tags=w.tags)
            else:
                # BOTH DIRECTION
                nx.add_path(G, w.nds, id=w.id, tags=w.tags)
                nx.add_path(G, w.nds[::-1], id=w.id, tags=w.tags)
        else:
            # BOTH DIRECTION
            nx.add_path(G, w.nds, id=w.id, tags=w.tags)
            nx.add_path(G, w.nds[::-1], id=w.id, tags=w.tags)

    # Complete the used nodes' information
    coordinates_map = {}
    for n_id in G.nodes():
        n = osm.nodes[n_id]
        G.nodes[n_id]['lat'] = n.lat
        G.nodes[n_id]['lon'] = n.lon
        G.nodes[n_id]['id'] = n.id
        G.nodes[n_id]['tags'] = n.tags
        coordinates_map[n_id] = (n.lon, n.lat)

    # Estimate the length of each way
    for u, v, d in G.edges(data=True):
        distance = haversine_distance(G.nodes[u]['lon'], G.nodes[u]['lat'], G.nodes[v]['lon'], G.nodes[v]['lat'], unit_m=True)  # Give a realistic distance estimation (neither EPSG nor projection nor reference system are specified)

        G.add_weighted_edges_from([(u, v, distance)], weight='havlen')

    G = nx.relabel_nodes(G, coordinates_map)
    return G


In [15]:
g = read_osm("./cambridge-small.osm", is_xml_string=False)

In [16]:
len(g.nodes)

346

# Write to Disk

In [18]:
nx.write_gpickle(g, "01_cambridge-small.gpickle")

# Dictionary of Tags

In [10]:
tags = {}
osm = OSM('./cambridge.xml', is_xml_string=False)

for w in osm.ways.values():
    for t in w.tags:
        if tags.get(t):
            tags[t] = tags[t] + 1
        else:
            tags[t] = 1

In [None]:
tags

# Cruft - DO NOT EDIT BELOW. USE OTHER NOTEBOOK

# Print map

In [90]:
g = nx.read_gpickle("cambridge-small.gpickle")

## Voterrank (just for PoC)

In [91]:
m = folium.Map(location = [42.374253,-71.0938267], zoom_start=13)
nodes_subset = networkx.algorithms.centrality.voterank(g, number_of_nodes=10)

for node in nodes_subset:
    lon,lat = node
    m.add_child(folium.Marker(location=[lat,lon],popup=node_id))
    
m

## Betweenness centrality

In [92]:
m = folium.Map(location = [42.374253,-71.0938267], zoom_start=13)
nodes_subset = networkx.algorithms.centrality.betweenness_centrality(g)

for lon_lat in nodes_subset:
    if nodes_subset.get(lon_lat, 0) > .2:
        lon,lat = lon_lat
        m.add_child(folium.Marker(location=[lat,lon]))
        
m

## Map with PolyLine (just because)

In [93]:
m = folium.Map(location = [42.374253,-71.0938267], zoom_start=13)
nodes_subset = random.sample(g.nodes.data(), 10)
node_walk = []

for node in nodes_subset:
    node_id = node[1]["id"]
    lon_lat = node[0]
    lon,lat = lon_lat

    #For each node, add a marker on the map
    m.add_child(folium.Marker(location=[lat,lon],popup=node_id))

    #For each node, add it as a destination on the PolyLine walk.
    #To really have a realistic route (i.e. not "as the crow flies") you'd need 
    # as many intermediary steps as possible if you were doing it just with points.
    # But I'm sure it can take GIS edge information too--haven't looked at the edge data
    node_walk.append([lat,lon])


m.add_child(folium.PolyLine(locations = node_walk[:3], line_opacity = 0.5, color="blue"))
m.add_child(folium.PolyLine(locations = node_walk[5:], line_opacity = 0.5, color="red"))


m