
# OpenStreetMap with OSMnx

This example shows how to use OSMnx to download and model a street network
from OpenStreetMap, visualize centrality, and save the graph as a shapefile,
a GeoPackage, or GraphML.

OSMnx is a Python package to retrieve, model, analyze, and visualize
OpenStreetMap street networks as NetworkX MultiDiGraph objects. It can also
retrieve any other spatial data from OSM as geopandas GeoDataFrames. See
https://osmnx.readthedocs.io/ for OSMnx documentation and usage.


In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import networkx as nx
import osmnx as ox
import polars as pl
from functools import partial
import multiprocessing as mp
from tqdm import tqdm
from taxifare import data
from taxifare import boroughs
import pickle
import os

ox.settings.log_console=True
ox.settings.use_cache=True

In [None]:
# ~ 6m 26s
# download street network data from OSM and construct a MultiDiGraph model

def load_map(base_map: str, simplified_map: str, tolerance: float) -> nx.MultiDiGraph:
    if os.path.exists(simplified_map):
        print(f"loading saved map {simplified_map}")
        H = ox.load_graphml(simplified_map)
        print(f"loaded saved map {simplified_map}")
        return H
    elif os.path.exists(base_map):
        print("Starting to simplify map. May require some time")
        H = ox.simplification._consolidate_intersections_rebuild_graph(
            ox.load_graphml(base_map), tolerance=tolerance, reconnect_edges=True)
        print(f"simplified base_map({base_map}) with tollerance = {tolerance}")
        ox.save_graphml(H, filepath=simplified_map)
        print(f"saved simplified graph: {simplified_map}")
        return H
    else:
        print("downloading graph. May require some time")
        G = ox.graph.graph_from_place("New York City, New York, USA")
        print("downloaded graph")
        G = ox.add_edge_speeds(G)
        G = ox.add_edge_travel_times(G)
        print("added edge speeds and travel times")
        ox.save_graphml(G, filepath=base_map)
        print(f"saved base map: {base_map}")
        print("Starting to simplify map. May require some time")
        H = ox.simplification._consolidate_intersections_rebuild_graph(
            G, tolerance=tolerance, reconnect_edges=True)
        print(f"simplified with tollerance = {tolerance}")
        ox.save_graphml(H, filepath=simplified_map)
        print(f"saved simplified graph: {simplified_map}")
        return H

def plot_graph(G: nx.MultiDiGraph):
    fig, ax = ox.plot_graph(
        G, bgcolor="k", node_size=5, edge_linewidth=2, edge_color="#333333"
    )

In [None]:
base_map = "new_york_base.graphml"
tolerance = 0.00025
simplified_map = f"new_york_{tolerance}.graphml"

# load data ~1m 50s
G = load_map(base_map, simplified_map, tolerance)
df = data.load_data().fetch(500_000)

In [None]:
def calculate_travel_distance(coords: pl.Series) -> pl.Series :
    lon1_series = coords.struct.field('pickup_longitude')
    lat1_series = coords.struct.field('pickup_latitude')
    lon2_series = coords.struct.field('dropoff_longitude')
    lat2_series = coords.struct.field('dropoff_latitude')

    n_process = 4
    chunksize = 8
    partial_travel_distance = partial(data.calculate_travel_distance, G=G)
    weights = []
    with mp.Pool(processes=n_process) as p:
        for travel_time in tqdm(p.imap(partial_travel_distance,
                                zip(lon1_series, lat1_series,
                                    lon2_series, lat2_series), chunksize),
                                    total=len(lon1_series)):
            weights.append(travel_time)

    return pl.Series(weights)

In [None]:
df = df.with_columns(pl.struct(['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude'])
                    .map(calculate_travel_distance).alias('travel_time'))

## Aproximated distance

Instead of calculating the distance for all points we calculate only for pairs of neigborhood

In [None]:
G = ox.load_graphml("new_york_base.graphml")

In [None]:
boros = boroughs.load()
hoods = []

for b in boros.values():
    hoods.extend(b['hoods'])

hoods_with_centroid = [(h['name'], tuple(h['geometry'].centroid.coords)[0])
                       for h in hoods]
hoods_with_centroid = sorted(hoods_with_centroid, key=lambda x: x[0])

pairs = []
for i in range(len(hoods_with_centroid)):
    for j in range(i+1, len(hoods_with_centroid)):
        pairs.append((hoods_with_centroid[i], hoods_with_centroid[j]))
pairs_dict = {(pair[0][0], pair[1][0]): (pair[0][1][0], pair[0][1][1], pair[1][1][0], pair[1][1][1]) for pair in pairs}

In [None]:
n_process = 10
chunksize = 32

partial_travel_distance = partial(data.calculate_travel_distance, G=G)
distance_dict = {}
keys_list = list(pairs_dict.keys())
with mp.Pool(processes=n_process) as p:
    index = 0
    for travel_time in tqdm(p.imap(partial_travel_distance,
                            pairs_dict.values(), chunksize),
                                total=len(pairs_dict)):
        distance_dict[keys_list[index]] = travel_time
        index += 1

In [None]:
with open('neighborhoods_pair_distance_0.00025.pickle', 'wb') as f:
    pickle.dump(distance_dict, f, protocol=pickle.HIGHEST_PROTOCOL)