In [1]:
import osmnx as ox
import networkx as nx
import pandas as pd
import json
import random
import statistics

from rtree import index
from utils import *

In [2]:
place_name = "Singapore, Central, Singapore"
G = ox.save_load.load_graphml(filename="Singapore_drive_processed.graphml")

# Stores nodes and edges mapped with speed bands along with other attributes
type(G)

networkx.classes.multidigraph.MultiDiGraph

In [3]:
edges = ox.graph_to_gdfs(G, nodes=False, edges=True)
nodes = pd.read_pickle('data/nodes_drive.pkl')

In [4]:
f = open("../../Traffic speed bands/Fri Feb 14 09_06_16 2020.json", "r").read()
x = json.loads(f)

speed_bands = pd.DataFrame.from_dict(x, orient='columns')
process_speed_band(speed_bands)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['MaximumSpeed'][i] = '80'


In [5]:
calculate_congestion(G, edges, speed_bands)

### Calculate Variance

In [6]:
# key: node label ,value: location
idx = index.Index()

xx = nodes['x'].astype(float)
yy = nodes['y'].astype(float)
node_id = nodes['osmid']
node_type = nodes['highway']

# nan, 'crossing', 'turning_circle', 'traffic_signals',
# 'motorway_junction', 'turning_loop', 'elevator', 'bus_stop',
# 'mini_roundabout', 'stop', 'speed_camera'

for node, x, y, kind in zip(node_id, xx, yy, node_type):
    if kind != 'nan' and kind != 'stop' and kind != 'speed_camera':
        idx.insert(int(node), (x,y,x,y))

In [7]:
def find_nearest_nodes(source, dist, idx):
    # Create a bounding box around source of min distance in all directions
    (north, south, east, west) = ox.bbox_from_point(point=source, distance=dist)
    
    candidate_nodes = list(idx.intersection((west, south, east, north)))    
    return candidate_nodes

In [10]:
def get_length_dict(sources, destinations, method='euclidean'):
    distances = dict()
    radius = 6371 * 1000 #m
    
    if method == 'euclidean':
        
        for source in sources:
            lat1 = nodes[nodes['osmid']==source]['y'].values[0]
            lon1 = nodes[nodes['osmid']==source]['x'].values[0]
        
            for dest in destinations:
                lat2 = nodes[nodes['osmid']==dest]['y'].values[0]
                lon2 = nodes[nodes['osmid']==dest]['x'].values[0]

                # euclidean distance in metres
                dlat = math.radians(lat2-lat1)
                dlon = math.radians(lon2-lon1)
                a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
                    * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
                c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
                d = radius * c
                print(d/5.28)
                return None
#                 distances[(source,dest)] = d/5.28
                
            
    else:
        for source in sources:        
            for dest in destinations:
                try:
                    # dijsktra distance in sec
                    d = nx.shortest_path_length(G, source, dest, weight='BPR', method='dijkstra')
                except nx.NetworkXUnfeasible:
                    d = float('inf')
                
                distances[(source,dest)] = d
                
    # sort the dictionary in ascending order using distances
    x = sorted(distances.items(), key = lambda kv:(kv[1], kv[0]))
    return x    

In [13]:
# get 20 random sources
sources = []
for i in range(20):
    r = random.randint(0,23210)
    s = nodes['osmid'].values[r]
    sources.append(s)
    
# get 20 correspondig random targets
targets = []
for i in range(20):
    r = random.randint(0,23210)
    t = nodes['osmid'].values[r]
    targets.append(t)

deviations = []
for s,t in zip(sources,targets):
    orig_point = nodes[nodes['osmid'] == s].geometry.values[0]
    target_point = nodes[nodes['osmid'] == t].geometry.values[0]
    orig_xy = (orig_point.y, orig_point.x)
    target_xy = (target_point.y, target_point.x)
    
    # Max distance for walking is 700m (10 min * 1.2 m/s)
    # Find all nodes within 720 m from candidate sources and candidate destinations
    candidate_dest = find_nearest_nodes(target_xy, 720, idx)
    candidate_source = find_nearest_nodes(orig_xy, 720, idx)
    
#     real_lengths = get_length_dict(candidate_source, candidate_dest, method='dijkstra')
    euclidean = get_length_dict(candidate_source, candidate_dest)
    
#     # value1:hybrid, value2: exhaustive
#     value1 = hybrid_search_length(G, euclidean, real_lengths)
#     value2 = real_lengths[0][1]
    
#     mean = (value1 + value2)/2
#     variance = (value1**2 + value2**2)/2 - mean**2
#     print(variance**0.5)
#     deviations.append(variance**0.5)
    
# # take mean of deviations
# print(statistics.mean(deviations))

2302.8935360619193
385.57856424889644
4128.753016212273
1398.548019913974
277.99825791007635
607.3501602826408
2740.5338298522993
3130.1987376538464
2800.3194381369217
982.1130828557515
3941.757478571374
2047.587054931843
1818.2881883952512
3545.477771560708
2801.631215273605
1705.8737822457858
2777.5120648217503
1996.2567818082746
2649.436789808568
3342.7255430866344
