In [1]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime
import json

import matplotlib.pyplot as plt
from cartoframes.viz import *

import pickle
def load_graph_data(pkl_filename):
    sensor_ids, sensor_id_to_ind, adj_mx = load_pickle(pkl_filename)
    return sensor_ids, sensor_id_to_ind, adj_mx

def load_pickle(pickle_file):
    try:
        with open(pickle_file, 'rb') as f:
            pickle_data = pickle.load(f)
    except UnicodeDecodeError as e:
        with open(pickle_file, 'rb') as f:
            pickle_data = pickle.load(f, encoding='latin1')
    except Exception as e:
        print('Unable to load data ', pickle_file, ':', e)
        raise
    return pickle_data

In [2]:
import pandas as pd

meta_df_list = []
for fname in os.listdir('pems-output/metr-la'):
    mdf = pd.read_excel(f'pems-output/metr-la/{fname}')
    meta_df_list.append(mdf)
meta_df = pd.concat(meta_df_list)

In [3]:
meta_df

Unnamed: 0,Fwy,District,County,City,CA PM,Abs PM,Length,ID,Name,Lanes,Type,Sensor Type,HOV,MS ID,IRM
0,SR126-E,7,Los Angeles,,4.9,39.398,,775962,COMMERCE CENTER DR,1,Off Ramp,,No,2695,
1,SR126-E,7,Los Angeles,,4.9,39.398,,775963,COMMERCE CENTER DR,1,On Ramp,,No,2695,
2,SR126-E,7,Los Angeles,,4.9,39.398,,775976,COMMERCE CENTER DR.2,1,On Ramp,,No,2696,
3,SR126-E,7,Los Angeles,,4.9,39.398,2.500,775975,COMMERCE CENTER DR.2,4,Mainline,,No,2696,
4,SR126-E,7,Los Angeles,,4.9,39.398,1.076,775961,COMMERCE CENTER DR,3,Mainline,,No,2695,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30,SR2-E,7,Los Angeles,Glendale,R21.5,14.062,0.913,767609,FERN LANE,5,Mainline,loops,No,2473,
31,SR2-E,7,Los Angeles,Glendale,R22.626,15.188,,768235,EB-NB 2 TO WB 210,3,Fwy-Fwy,loops,No,4472,
32,SR2-E,7,Los Angeles,Glendale,R22.626,15.188,3.063,768238,VERDUGO BLVD,2,Mainline,loops,No,4472,
33,SR2-E,7,Los Angeles,Glendale,R22.626,15.188,,768246,VERDUGO BLVD,2,Off Ramp,loops,No,4472,


In [4]:
flist = os.listdir('california-vds')
item_list = []
for fname in sorted(flist):
    if fname[-4:] != 'json':
        continue
    print(fname)
    with open('california-vds/' + fname) as fp:
        json_data = json.load(fp)

        for item in json_data['matches']:
            item_list.append({
                'ID': str(int(item['ID'])),
                'lat': item['PT'][0],
                'lng': item['PT'][1]
            })
vds_df = pd.DataFrame(item_list).drop_duplicates('ID')

exact_id2loc = dict()
for _, item in vds_df.iterrows():
    exact_id2loc[item['ID']] = item.lat, item.lng

california-vds-10.json
california-vds-11.json
california-vds-12.json
california-vds-13.json
california-vds-14.json
california-vds-15.json
california-vds-17.json
california-vds-2.json
california-vds-3.json
california-vds-4.json
california-vds-5.json
california-vds-6.json
california-vds-7.json
california-vds-8.json
california-vds-9.json
california-vds.json


# METR-LA Match

In [22]:
la_sensors = pd.read_csv('metr-la/graph_sensor_locations.csv')
la_sensors = la_sensors.set_index('index')
la_sensors.columns = ['sid', 'lat', 'lng']
la_sensors['sid'] = la_sensors['sid'].astype(str)

In [23]:
sid2fwy = {sid:fwy for sid, fwy in zip(meta_df['ID'].astype(str), meta_df['Fwy'])}

In [150]:
fwys = []
elats = []
elngs = []
for _, item in la_sensors.iterrows():
    
    if item['sid'] in sid2fwy:
#         print(item['sid'], sid2fwy[item['sid']])
        fwys.append(sid2fwy[item['sid']])
    elif item['sid'] in recorrect_sid2fwy:
        fwys.append(recorrect_sid2fwy[item['sid']])
            
    else:
        fwys.append('-')
    
    if item['sid'] in exact_id2loc:
        elat, elng = exact_id2loc[item['sid']]
    else:
        elat, elng = item['lat'], item['lng']
    elats.append(elat)
    elngs.append(elng)
        
la_sensors['Fwy2'] = fwys
la_sensors['elat'] = elats
la_sensors['elng'] = elngs

In [151]:
sensor_df = la_sensors
sensor_gdf = gpd.GeoDataFrame(
    sensor_df, geometry=gpd.points_from_xy(x=sensor_df.elng, y=sensor_df.elat)
)
sensor_gdf.crs = 'epsg:4326'

In [152]:
from cartoframes.viz import Layer, color_category_style

In [156]:
Layer(sensor_gdf, color_category_style('Fwy2'))

In [157]:
os.listdir('metr-la')

['graph_sensor_ids.txt',
 'graph_sensor_locations.csv',
 'metr-la.h5',
 'adj_mx.pkl',
 'graph_sensor_locations_corrected.csv',
 'distances_la_2012.csv']

In [158]:
correct_list = []
for _, item in la_sensors.iterrows():
#     print(int(item['sid']))
    sid = str(item['sid'])
    if sid in exact_id2loc:
        correct_item = {
            'sid': sid,
            'lat': exact_id2loc[sid][0],
            'lng': exact_id2loc[sid][1]
        }
#         print('corrected:', correct_item)
    else:
        correct_item = {
            'sid': sid,
            'lat': item['lat'],
            'lng': item['lng']
        }
        print('uncorrected:', correct_item)
    correct_list.append(correct_item)
nla_sensors = pd.DataFrame(correct_list)

uncorrected: {'sid': '765604', 'lat': 34.16415, 'lng': -118.38223}
uncorrected: {'sid': '764424', 'lat': 34.17878, 'lng': -118.39469}
uncorrected: {'sid': '764120', 'lat': 34.20164, 'lng': -118.40366}
uncorrected: {'sid': '765182', 'lat': 34.06491, 'lng': -118.25126}
uncorrected: {'sid': '764858', 'lat': 34.1527, 'lng': -118.3754}


In [159]:
nla_sensors.to_csv('metr-la/graph_sensor_locations_corrected.csv')

# OSM load

In [160]:
from cartoframes.viz import Layer, popup_element, color_category_style, basic_style

In [14]:
import osmnx as ox

graph = ox.load_graphml(filepath=f'../graph_generation/osm_graph/metr-la-drive.graphml')

In [18]:
gdf_nodes, gdf_edges = ox.graph_to_gdfs(graph)
gdf_nodes['osmidn'] = gdf_nodes.index
gdf_nodes['osmidstr'] = gdf_nodes['osmidn'].astype(str)

fgdf_edges = gdf_edges.reset_index()
cond = np.array([str(type(s)) for s in fgdf_edges['highway']]) == "<class 'str'>"
fgdf_edges = fgdf_edges[cond]

fmotorway = fgdf_edges[fgdf_edges['highway'].isin(['motorway'])]
fgdf_nodes = gdf_nodes[gdf_nodes.index.isin(set(fmotorway['u'].tolist() + fmotorway['v'].tolist()))]

In [161]:
Map([
    Layer(fgdf_nodes, basic_style(color='black'), popup_click=[
        popup_element('osmidstr')]),
    Layer(fmotorway, encode_data=False),
    Layer(sensor_gdf, color_category_style('Fwy2'), popup_click=[
        popup_element('sid')]),
])

In [162]:
sorted(sensor_gdf['Fwy2'].unique())

['I110-N',
 'I110-S',
 'I210-E',
 'I210-W',
 'I405-N',
 'I405-S',
 'I5-N',
 'I5-S',
 'SR134-E',
 'SR134-W',
 'SR170-N',
 'SR170-S',
 'SR2-E',
 'SR2-W',
 'US101-N',
 'US101-S']

In [163]:
fwy_path_dict = {
    'I110-N': ('269444554', '138283524'),
    'I110-S': ('14920985', '268648607'),
    'I210-E': ('1842038344', '318024379'),
    'I210-W': ('15682495', '322581612'),
    'I405-N': ('338511634', '362826332'),
    'I405-S': ('593060800', '2486107947'),
    'I5-N': ('269459338', '26740008'),
    'I5-S': ('20930309', '653766'), 
    'SR134-E': ('1340580299', '300944436'),
    'SR134-W': ('300944078', '361468272'),
    'SR170-N': ('6354846109', '122467736'),
    'SR170-S': ('361449050', '122520856'),
    'SR2-E': ('19828298', '19828348'),
    'SR2-W': ('1832672826', '90577276'),
    'US101-N': ('1614922982', '295426636'),
    'US101-S': ('305705196', '604319666')
    
}

In [164]:
key = 'SR170-N'
Map([
    Layer(fmotorway, encode_data=False),
    Layer(fgdf_nodes, basic_style(color='grey'), popup_click=[
        popup_element('osmidstr')]),
    Layer(sensor_gdf[sensor_gdf['Fwy2'] == key], basic_style(color='red'), popup_click=[
        popup_element('sid')]),
])

In [165]:
path = a_star(mygraph, fwy_path_dict[key][0], fwy_path_dict[key][1])
fgdf_tmp = fgdf_edges[fgdf_edges['u'].astype(str).isin(path) & fgdf_edges['v'].astype(str).isin(path)].copy()

Map([
    Layer(fgdf_tmp),
    Layer(sensor_gdf[sensor_gdf['Fwy2'] == key], basic_style(color='red'), popup_click=[
        popup_element('sid')]),
])


In [166]:
df_list = []
for uid in fwy_path_dict:
    path = a_star(mygraph, fwy_path_dict[uid][0], fwy_path_dict[uid][1])
    print(uid)
    fgdf_tmp = fgdf_edges[fgdf_edges['u'].astype(str).isin(path) & fgdf_edges['v'].astype(str).isin(path)].copy()
    fgdf_tmp['pathid'] = uid
    df_list.append(fgdf_tmp)

# adf = pd.concat(df_list)
agdf = gpd.GeoDataFrame(pd.concat(df_list))

I110-N
I110-S
I210-E
I210-W
I405-N
I405-S
I5-N
I5-S
SR134-E
SR134-W
SR170-N
SR170-S
SR2-E
SR2-W
US101-N
US101-S


In [207]:
from shapely.ops import linemerge
from shapely.geometry import LineString
from shapely.geometry import Point, LineString
from shapely.ops import nearest_points


sid2osmpath = []
# df_list = []

count = 0
fgdf_tmp_list = []

new_items = []
for uid in fwy_path_dict:
    path = a_star(mygraph, fwy_path_dict[uid][0], fwy_path_dict[uid][1])
    print(uid)
    fgdf_tmp = fgdf_edges[fgdf_edges['u'].astype(str).isin(path) & fgdf_edges['v'].astype(str).isin(path)].copy()
    fgdf_tmp['pathid'] = uid
    
    fgdf_tmp_list.append(fgdf_tmp)
    #     df_list.append(fgdf_tmp)

    # Merge the LineString objects into one LineString
    merged_line = linemerge(fgdf_tmp.geometry.tolist())

    # Print the resulting LineString
#     print(merged_line)
    
    for _, item in sensor_gdf[sensor_gdf['Fwy2'] == uid].iterrows():    
        closest_point_on_line, closest_point_on_point = nearest_points(merged_line, item.geometry)
        nitem = dict(item)
        nitem['geometry'] = closest_point_on_line
        new_items.append(nitem)
        
        clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]
        print(item['sid'], clineitem['u'], clineitem['v'])
        opitem = {
            'sid': str(item['sid']), 
            'fwy': item['Fwy2'],
            'lat': float(closest_point_on_line.y),
            'lng': float(closest_point_on_line.x), 
            'u': str(clineitem['u']), 
            'v': str(clineitem['v'])
        }
        sid2osmpath.append(opitem)
        
        count += 1
        
ngdf = gpd.GeoDataFrame(new_items)


fgdf_tmp_list = pd.concat(fgdf_tmp_list)

assert count == len(sensor_gdf)

I110-N
771667 1614923195 26902002
773013 33717752 29416313
767751 375198617 21507076
772513 269444554 21662853
772669 26902002 13885935
771673 26902002 13885935
I110-S



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


773012 15261772 269598309
718045 806547157 606441164
760650 20400047 38020241
767750 26187122 1613525416
I210-E
769819 1687054124 316803571
769831 355765824 1687054124
769847 305421514 305416215
769806 318269258 318024379
I210-W



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


769953 15682495 1842004390
769941 1842004390 1376737825
769867 1931511081 322581612
769926 305421106 1842042919
I405-N
717804 338511634 122416956
717819 293826694 631892092
767351 35317437 293826694
717818 26819499 35317437
717825 122491482 362826332
717823 362706086 362708680
I405-S



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


737529 362708318 26819492
717816 269571660 269571563
765273 293827863 293828095
767366 593060800 362708318
765171 305532520 26819502
767053 7118722407 305532520
767350 701899548 7118722407
765265 293828095 701899548
717821 26819492 362704179
I5-N
716956 1686580866 20189446
761604 403623346 338933805
718371 269596401 14995402
761599 77741551 298585779
716968 1884813453 26740008
716942 269459338 1530583594
759591 911820291 38407085
716953 14995402 291676550
716951 269596436 269596401



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


759602 21490199 20954460
I5-S
716941 363477390 326712429
716960 20820632 20820633
716955 25274550 6872663603
718379 298583385 298585645
718089 20820632 20820633
718090 364873626 1884813438
716958 21478645 20820635
716943 363477390 326712429
716949 269595908 269595905
716939 1614922775 653766
759772 20930309 358218015
SR134-E



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


773869 26428271 20857671
717578 364873589 1686591011
760987 122459534 7668301080
718204 1233732679 1686584226
773927 912112449 364873589
774011 17589348 300944376
773953 1233732679 1686584226
773880 122459534 7668301080
773916 1686584229 912112449
717573 20857688 26428271
717570 1340580299 1855249852
773904 1233732679 1686584226
761003 20857671 1956040650
773995 361523868 17589348
717583 553500027 1956040672
717580 1956040644 383846660
717587 553499971 1734644776
717585 1956040672 553499971
773974 361523787 361523813
717592 361523787 361523813
717595 300944376 300944436
SR134-W



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


773906 338935762 7664051915
774012 300944078 363839552
716554 1799706845 13890125
773954 338935762 7664051915
773939 1833669048 299528406
774067 1377536917 338935592
717099 20854038 13890126
717576 338935762 7664051915
717572 1830204049 340145423
717571 6853166455 1833669048
718496 20659737 81952272
718499 1233732718 529607866
773996 300944078 363839552
717582 1686590990 1799706845
773975 363839552 363840285
774204 20659737 81952272
717590 363840285 361523734
SR170-N
765604 361463277 273607575
764424 361458491 18154009
764120 361451218 1957285395
763995 306526678 1957285397
764106 166795424 18154007
SR170-S
717610 362409104 1957285381
716571 361449050 361450999
717608 21703085 29570294
764101 21703087 166914602
SR2-E



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


767541 577421639 382650862
767620 493139643 332160601
767572 1669532236 493139643
767609 598486452 19828348
767470 361523236 361523381
767585 19828340 598486452
767455 361523254 598382077
767523 1920619864 577421639
767509 1619395035 9602171183
767554 1669532236 493139643
767494 19828298 911820304
SR2-W
767542 17040712 1782812735
767471 10652200 361523258
767573 1376737820 1783926563
767610 1832672826 79367034
718064 1782812735 1792481492
718066 1783926563 17040712



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


767621 361523299 1376737820
718076 79367034 10652200
718072 598384097 361523277
767454 361523277 361523293
762329 361524116 361523299
767495 295204143 90577276
US101-N
717447 1613525409 2353109732
717445 21063619 191333482
716337 27390001 353511096
769402 2351758618 2351758770
769418 2351758618 2351758770
764949 2351758770 2351758627
769443 305537102 374409516
717497 361119302 361120646
717491 82481755 701954220
717492 361468272 6354846133
765176 2351758770 2351758627
717498 29409683 305537102
765164 353511096 349446224
769431 59832561 35363002
772596 1733962055 1733961944
773023 1614922982 1614923029
717483 2351758719 2351758772
717480 35362974 1717816401
769467 299532356 299534703
764760 2351758627 2351758741
717473 1613580618 2354337200
765099 269577317 305532687
765182 268644978 365786990
769358 361120646 29409683
769346 299534703 361119302
717502 702979635 702979641
717469 408616057 18153986
717465 21569368 408616057
764794 269577317 305532687
717460 349437247 188888821
717463 215


  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


In [208]:
gpd.GeoDataFrame(sid2osmpath).to_csv('corrected-metr-la-sensorid-osm-path-uv.csv', index=None)

In [168]:
category_order = list(fwy_path_dict.keys())

In [209]:
recorrect_sid2fwy = {
    '764120': 'SR170-N',
    '764424': 'SR170-N',
    '765604': 'SR170-N',
    '764858': 'US101-N',
    '765182': 'US101-N',
}

Map([
    Layer(agdf, color_category_style('pathid', cat=category_order),
          popup_click=[
              popup_element('pathid')], 
          popup_hover=[
              popup_element('pathid')]
          
         ),
    Layer(ngdf, color_category_style('Fwy2', cat=category_order), 
          popup_click=[
                popup_element('sid'),
              popup_element('Fwy2')], 
          popup_hover=[
                popup_element('sid'),
              popup_element('Fwy2')]),
#     Layer(sensor_gdf[sensor_gdf['Fwy2'] == '-'], basic_style(color='yellow'), popup_click=[
#         popup_element('sid')], popup_hover=[
#         popup_element('sid')])
])


In [37]:
mygraph = dict()
osmidpos = {osmidstr: (x, y) for osmidstr, y, x in zip(fgdf_nodes['osmidstr'], fgdf_nodes['y'], fgdf_nodes['x'])}
ffgdf_edges = fgdf_edges[fgdf_edges['u'].astype(str).isin(osmidpos) & fgdf_edges['v'].astype(str).isin(osmidpos)]

for _, item in ffgdf_edges.iterrows():
    us = str(item['u'])
    vs = str(item['v'])
    dist = item['length']
    
    mygraph.setdefault(us, {'pos': osmidpos[us]})
    mygraph.setdefault(vs, {'pos': osmidpos[vs]})
    mygraph[us][vs] = dist

import heapq
from typing import Dict, List

distances = {}
            
        
def a_star(graph: Dict[str, Dict[str, float]], 
           start: str, end: str) -> List[str]:
    # Heuristic function for estimating the distance between two nodes
    def h(node):
        if (node, end) not in distances:
            # In this example, we use a simple heuristic that assumes
            # a straight-line distance between nodes, ignoring obstacles
            x1, y1 = graph[node]['pos']
            x2, y2 = graph[end]['pos']
            distances[(node, end)] = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
            distances[(end, node)] = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
        return distances[(node, end)]
    
    # Initialize distance and previous node dictionaries
    g_score = {node: float('inf') for node in graph}
    g_score[start] = 0
    f_score = {node: float('inf') for node in graph}
    f_score[start] = h(start)
    prev = {node: None for node in graph}
    
    # Initialize heap with start node and its f score
    heap = [(f_score[start], start)]
    
    while heap:
        # Pop the node with the smallest f score from the heap
        (f, curr_node) = heapq.heappop(heap)
        
        # If we have reached the end node, return the shortest path
        if curr_node == end:
            path = []
            while curr_node is not None:
                path.append(curr_node)
                curr_node = prev[curr_node]
                
            return path[::-1]
        
        # Otherwise, update the f and g scores of all adjacent nodes
        for neighbor, weight in graph[curr_node].items():
            # Check if there is an edge between the current node and the neighbor
            if neighbor not in g_score:
                continue
                
            new_g_score = g_score[curr_node] + weight
            if new_g_score < g_score[neighbor]:
                g_score[neighbor] = new_g_score
                f_score[neighbor] = new_g_score + h(neighbor)
                prev[neighbor] = curr_node
                heapq.heappush(heap, (f_score[neighbor], neighbor))
    
    # If we get here, there is no path from start to end
    return None

# # Example graph with node positions
# graph = {
#     'A': {'B': 2, 'C': 1, 'pos': (0, 0)},
#     'B': {'C': 2, 'D': 3, 'pos': (1, 1)},
#     'C': {'D': 4, 'E': 3, 'pos': (1, -1)},
#     'D': {'E': 2, 'pos': (2, 0)},
#     'E': {'F': 3, 'pos': (3, -1)},
#     'F': {'pos': (4, 0)},
# }

# Find the shortest path from A to F using A* algorithm
# path = a_star(mygraph, 'A', 'F')
# print(path)  # Output: ['A', 'C', 'E', 'F']


In [40]:
path = a_star(mygraph, fwy_path_dict[key][0], fwy_path_dict[key][1])
fgdf_tmp = fgdf_edges[fgdf_edges['u'].astype(str).isin(path) & fgdf_edges['v'].astype(str).isin(path)].copy()
Layer(fgdf_tmp)

In [116]:
df_list = []
for uid in fwy_path_dict:
    path = a_star(mygraph, fwy_path_dict[uid][0], fwy_path_dict[uid][1])
    print(uid)
    fgdf_tmp = fgdf_edges[fgdf_edges['u'].astype(str).isin(path) & fgdf_edges['v'].astype(str).isin(path)].copy()
    fgdf_tmp['pathid'] = uid
    df_list.append(fgdf_tmp)

17N
17S
85N


KeyError: '1377399032'