In [7]:
import os
import sys
sys.path.insert(0, os.path.abspath("../"))
import pandas
import numpy as np
import torch
from pathlib import Path
import t4c22
from t4c22.misc.t4c22_logging import t4c_apply_basic_logging_config
from t4c22.t4c22_config import load_basedir
from t4c22.misc.parquet_helpers import load_df_from_parquet
from tqdm import tqdm
t4c_apply_basic_logging_config(loglevel="DEBUG")
# Load BASEDIR from file, change to your data root.
BASEDIR = load_basedir(fn="t4c22_config.json", pkg=t4c22)

In [2]:
def trans_tunnel(x):
    if x == '':
        return 0
    else:
        return 1
def tran_counter_dis(x):
    if x == 0:
        x = 0.5
    return 1/x
def trans_lanes(x):
    if x =='':
        return 0
    if isinstance (eval(x),int):
        y =  int(x)
    elif isinstance (eval(x),list):
        y = int(max(eval(x)))
    else:
        y =  int(eval(x))
    if y > 4:
        return 4
    else:
        return y-1
def trans_oneway(x):
    if x == 'False':
        return 0
    else:
        return 1
def trans_edge_attr(city):
    df_edge = load_df_from_parquet(BASEDIR/'road_graph'/city/'road_graph_edges.parquet')
    df_edge['oneway'] = df_edge['oneway'].apply(lambda x: trans_oneway(x))
    df_edge['tunnel'] = df_edge['tunnel'].apply(lambda x: trans_tunnel(x))
    df_edge['counter_distance'] = df_edge['counter_distance'].apply(lambda x: tran_counter_dis(x))
    df_edge['lanes'] = df_edge['lanes'].apply(lambda x: trans_lanes(x))
    df_edge.to_parquet(BASEDIR/'road_graph'/city/'road_graph_edges.parquet',compression='snappy')
for city in ["london","madrid","melbourne"]:
    trans_edge_attr(city)

In [5]:
def get_edge_flow(city):
    edge_free_flows_kph = {}
    free_flow_low = set()
    sc_files = sorted((BASEDIR / 'speed_classes' / city ).glob('*.parquet'))
    for i in tqdm(range(len(sc_files))):  # reading from every 5th file seems sufficient
        sc_df = pandas.read_parquet(sc_files[i])
        for u, v, ff in zip(sc_df["u"], sc_df["v"], sc_df["free_flow_kph"]):
            if ff < 8 or ff != ff:  # Check for too low or NaN values
                free_flow_low.add((u, v))
                continue
            edge_free_flows_kph[(u, v)] = ff
    print("free_flow_low: ",len(free_flow_low))
    df_edge = pandas.read_parquet(BASEDIR/'road_graph'/city/'road_graph_edges.parquet')

    df_edge['flow'] = df_edge['parsed_maxspeed']
    for uv,f in tqdm(edge_free_flows_kph.items()):

        df_edge.iloc[df_edge[(df_edge['u'] == uv[0]) & (df_edge['v'] == uv[1])].index,-1]= f
    df_edge.to_parquet(BASEDIR/'road_graph'/city/'road_graph_edges.parquet', compression='snappy')
for city in ["london","madrid","melbourne"]:
    get_edge_flow(city)

100%|██████████| 110/110 [01:49<00:00,  1.01it/s]


free_flow_low:  149


100%|██████████| 132233/132233 [01:24<00:00, 1562.34it/s]
100%|██████████| 109/109 [02:29<00:00,  1.37s/it]


free_flow_low:  613


100%|██████████| 121250/121250 [01:16<00:00, 1580.24it/s]
100%|██████████| 108/108 [00:33<00:00,  3.22it/s]


free_flow_low:  1793


100%|██████████| 92926/92926 [00:56<00:00, 1655.40it/s]


In [9]:
def free_flow_speed_limit(free_flow_kph, speed_limit_kph):
    if not free_flow_kph or np.isnan(free_flow_kph) or free_flow_kph < 20:
        free_flow_kph = 20
    if speed_limit_kph >= 5 and free_flow_kph > speed_limit_kph:
        free_flow_kph = speed_limit_kph
    # Reduce free flow to max 60% but not below, e.g. 32->20, 50->30, 80->48, 110->66
    free_flow_kph = max(free_flow_kph, speed_limit_kph * 0.6)
    return free_flow_kph
    
def get_edge_limit_speed(city):
    df_edge = pandas.read_parquet(BASEDIR/'road_graph'/city/'road_graph_edges.parquet')
    speed_limit_kph = torch.from_numpy(np.array(df_edge["parsed_maxspeed"].values,dtype=float))
    free_flow_kph = torch.from_numpy(np.array(df_edge["flow"].values,dtype=float))
    limit_speed = []
    for i in range(len(df_edge)):
        speed = free_flow_speed_limit(free_flow_kph[i], speed_limit_kph[i])
        limit_speed.append(1/speed)
    df_edge["limit_speed"] = np.array(limit_speed)
    df_edge.to_parquet(BASEDIR/'road_graph'/city/'road_graph_edges.parquet', compression='snappy')
for city in ["london","madrid","melbourne"]:
    get_edge_limit_speed(city)

In [15]:

def get_new_edge_index(city):
    raw_edges = pandas.read_parquet(BASEDIR/'road_graph'/city/"road_graph_edges.parquet")
    raw_nodes = pandas.read_parquet(BASEDIR/'road_graph'/city/"road_graph_nodes.parquet")

    raw_edges["e_id"] = raw_edges.index
    raw_edges = raw_edges[['u','v','e_id']]
    # raw_edges
    nodes = list(raw_nodes["node_id"].values)
    new_edges = []
    node_id_map = {}
    for i,k in enumerate(nodes):
        node_id_map[k] = i
    for node in tqdm(nodes):
        a = raw_edges[raw_edges['v'] == node]
        if a.empty:
            continue
        a = a.copy().reset_index()
        b = raw_edges[raw_edges['u'] == node]
        if b.empty:
            continue
        b = b.copy().reset_index()
        for i in range(len(a)):
            for j in range(len(b)):
                eu = a.iloc[i,3]
                ev = b.iloc[j,3]
                new_edges.append((eu,ev))
    
    new_edges = np.array(new_edges).T
    
    np.save(BASEDIR/'road_graph'/city/"new_edge_index.npy",new_edges)
    print(new_edges.shape)
    return new_edges

In [None]:
for city in ["london","madrid","melbourne"]:
    get_new_edge_index(city)