In [None]:
"""node_data.ipynb
description: various methods of computing node data are analyzed for performace
by: Archie Gertsman (arkadiy2@illinois.edu)
Project director: Richard Sowers
r-sowers@illinois.eduhttps://publish.illinois.edu/r-sowers/
Copyright 2019 University of Illinois Board of Trustees. All Rights Reserved. Licensed under the MIT license
"""

In [417]:
import sys
sys.path.append('../Data/')
from data import csv_to_df
import osmnx as ox
from time import time
import numpy as np
graph = ox.graph_from_point((37.977482, 23.735405), network_type='drive', dist=500) 
df = csv_to_df('../Data/sample_larger.csv')
# df = df.groupby('id', as_index=False, group_keys=False).apply(lambda rows: rows[:100])
# graph = ox.graph_from_point(df.loc[(1,0.0), ['lat','lon']])


In [26]:
# find nearest node/edge for each individual row in the dataframe
def individual_rows(row, graph):
    coord = (row['lat'],row['lon'])
    nn = ox.get_nearest_node(graph, coord)
    start, end, _ = ox.get_nearest_edge(graph, coord)
    row['nearest_node'],             \
    row['nearest_edge_start_node'],  \
    row['nearest_edge_end_node'],    \
        = nn, start, end
    return row

# start = time()
# df.apply(individual_rows, axis=1, args=(graph,))
# end = time()
# end-start

In [10]:
# find nearest nodes/edges for each vehicle in the dataframe
def in_groups(rows, graph):
    coords = (rows['lon'], rows['lat'])
    nn = ox.get_nearest_nodes(graph, *coords, method='kdtree')
    ne = ox.get_nearest_edges(graph, *coords, method='kdtree')
    start, end = ne[:,0], ne[:,1]
    rows['nearest_node'],             \
    rows['nearest_edge_start_node'],  \
    rows['nearest_edge_end_node'],    \
        = nn, start, end
    return rows

start = time()
df.groupby('id', as_index=False, group_keys=False).apply(in_groups, graph)
end = time()
end-start

KeyboardInterrupt: 

In [None]:
# there is no noticeable increase in performance from using groupby vs individual rows

In [100]:
# using a graph with radius 500m
g1 = ox.graph_from_address(\
    'Athens, Municipality of Athens, Regional Unit of Central Athens, Attica, 10667, Greece', \
    network_type='drive', dist=500) 

start = time()
df.apply(individial_rows, axis=1, args=(g1,))
end = time()
end-start

12.254104137420654

In [101]:
# using a graph with radius 1000m
g2 = ox.graph_from_address(\
    'Athens, Municipality of Athens, Regional Unit of Central Athens, Attica, 10667, Greece', \
    network_type='drive', dist=1000) 

start = time()
df.apply(individial_rows, axis=1, args=(g2,))
end = time()
end-start

26.41882014274597

In [None]:
# using a smaller radius boosts the performance significantly.

In [29]:
import pandas as pd
from joblib import Parallel, delayed

def applyParallel(df, func, g, n=4):
    retLst = Parallel(n_jobs=n)(delayed(func)(row,g) for _,row in df.iterrows())
    return pd.concat(retLst, axis=1).T

start = time()
df2=applyParallel(df, individual_rows, graph)
end = time()
end-start

60.4370858669281

In [None]:
# we see a great performance boost from using Python's built-in parallelization library