In [11]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime
import json

import matplotlib.pyplot as plt
from cartoframes.viz import *

import pickle
def load_graph_data(pkl_filename):
    sensor_ids, sensor_id_to_ind, adj_mx = load_pickle(pkl_filename)
    return sensor_ids, sensor_id_to_ind, adj_mx

def load_pickle(pickle_file):
    try:
        with open(pickle_file, 'rb') as f:
            pickle_data = pickle.load(f)
    except UnicodeDecodeError as e:
        with open(pickle_file, 'rb') as f:
            pickle_data = pickle.load(f, encoding='latin1')
    except Exception as e:
        print('Unable to load data ', pickle_file, ':', e)
        raise
    return pickle_data

# Pems-Bay Match

In [4]:
import osmnx as ox

graph = ox.load_graphml(filepath=f'../graph_generation/osm_graph/pems-bay-drive.graphml')

In [5]:
gdf_nodes, gdf_edges = ox.graph_to_gdfs(graph)
gdf_nodes['osmidn'] = gdf_nodes.index
gdf_nodes['osmidstr'] = gdf_nodes['osmidn'].astype(str)

fgdf_edges = gdf_edges.reset_index()
cond = np.array([str(type(s)) for s in fgdf_edges['highway']]) == "<class 'str'>"
fgdf_edges = fgdf_edges[cond]

fmotorway = fgdf_edges[fgdf_edges['highway'].isin(['motorway'])]
fgdf_nodes = gdf_nodes[gdf_nodes.index.isin(set(fmotorway['u'].tolist() + fmotorway['v'].tolist()))]

In [19]:
from cartoframes.viz import Layer, popup_element, color_category_style, basic_style


pems_df = pd.read_hdf(f'pems-bay/pems-bay-meta.h5')
pems_gdf = gpd.GeoDataFrame(
    pems_df, geometry=gpd.points_from_xy(x=pems_df.Longitude, y=pems_df.Latitude)
)
pems_gdf.crs = 'EPSG:4326'
pems_gdf['sid'] = pems_gdf.index
pems_gdf['Fwy2'] = pems_gdf['Fwy'].astype(str) + pems_gdf['Dir'].astype(str)

sensor_gdf = pems_gdf



Map([
    Layer(fgdf_nodes, basic_style(color='black'), popup_click=[
        popup_element('osmidstr')]),
    Layer(fmotorway, encode_data=False),
#     Layer(gpd.GeoDataFrame(geometry=lines),  basic_style(color='pink')),
    Layer(pems_gdf, color_category_style('Fwy2'), popup_click=[
        popup_element('sid')]),
])

In [13]:
fwy_path_dict = {
    '17N': ('310961963', '257884249'),
    '17S': ('65401698', '7048863128'),
    '85N': ('248212016', '3660749993'),
    '85S': ('209854346', '1377399032'),
    '87N': ('248212014', '1698569739'),
    '87S': ('245509711', '309053289'),
    '101N': ('311950047', '239637816'),
    '101S': ('65405009', '2510564064'),
    '237W': ('3665202993', '32788810'),
    '237E': ('5334144482', '1389052694'),
    '280N': ('4974822843', '1067449074'),
    '280S': ('30237212', '246295695'),
    '680N': ('4974822842', '257881716'),
    '680S': ('65508508', '4974822843'),
    '880N': ('257884249', '65414355'),
    '880S': ('1688870513', '65401698')
}

In [14]:
mygraph = dict()
osmidpos = {osmidstr: (x, y) for osmidstr, y, x in zip(fgdf_nodes['osmidstr'], fgdf_nodes['y'], fgdf_nodes['x'])}
ffgdf_edges = fgdf_edges[fgdf_edges['u'].astype(str).isin(osmidpos) & fgdf_edges['v'].astype(str).isin(osmidpos)]

for _, item in ffgdf_edges.iterrows():
    us = str(item['u'])
    vs = str(item['v'])
    dist = item['length']
    
    mygraph.setdefault(us, {'pos': osmidpos[us]})
    mygraph.setdefault(vs, {'pos': osmidpos[vs]})
    mygraph[us][vs] = dist

In [15]:
import heapq
from typing import Dict, List

distances = {}
            
        
def a_star(graph: Dict[str, Dict[str, float]], 
           start: str, end: str) -> List[str]:
    # Heuristic function for estimating the distance between two nodes
    def h(node):
        if (node, end) not in distances:
            # In this example, we use a simple heuristic that assumes
            # a straight-line distance between nodes, ignoring obstacles
            x1, y1 = graph[node]['pos']
            x2, y2 = graph[end]['pos']
            distances[(node, end)] = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
            distances[(end, node)] = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
        return distances[(node, end)]
    
    # Initialize distance and previous node dictionaries
    g_score = {node: float('inf') for node in graph}
    g_score[start] = 0
    f_score = {node: float('inf') for node in graph}
    f_score[start] = h(start)
    prev = {node: None for node in graph}
    
    # Initialize heap with start node and its f score
    heap = [(f_score[start], start)]
    
    while heap:
        # Pop the node with the smallest f score from the heap
        (f, curr_node) = heapq.heappop(heap)
        
        # If we have reached the end node, return the shortest path
        if curr_node == end:
            path = []
            while curr_node is not None:
                path.append(curr_node)
                curr_node = prev[curr_node]
                
            return path[::-1]
        
        # Otherwise, update the f and g scores of all adjacent nodes
        for neighbor, weight in graph[curr_node].items():
            # Check if there is an edge between the current node and the neighbor
            if neighbor not in g_score:
                continue
                
            new_g_score = g_score[curr_node] + weight
            if new_g_score < g_score[neighbor]:
                g_score[neighbor] = new_g_score
                f_score[neighbor] = new_g_score + h(neighbor)
                prev[neighbor] = curr_node
                heapq.heappush(heap, (f_score[neighbor], neighbor))
    
    # If we get here, there is no path from start to end
    return None

# # Example graph with node positions
# graph = {
#     'A': {'B': 2, 'C': 1, 'pos': (0, 0)},
#     'B': {'C': 2, 'D': 3, 'pos': (1, 1)},
#     'C': {'D': 4, 'E': 3, 'pos': (1, -1)},
#     'D': {'E': 2, 'pos': (2, 0)},
#     'E': {'F': 3, 'pos': (3, -1)},
#     'F': {'pos': (4, 0)},
# }

# Find the shortest path from A to F using A* algorithm
# path = a_star(mygraph, 'A', 'F')
# print(path)  # Output: ['A', 'C', 'E', 'F']


In [17]:
df_list = []
for uid in fwy_path_dict:
    path = a_star(mygraph, fwy_path_dict[uid][0], fwy_path_dict[uid][1])
    print(uid)
    fgdf_tmp = fgdf_edges[fgdf_edges['u'].astype(str).isin(path) & fgdf_edges['v'].astype(str).isin(path)].copy()
    fgdf_tmp['pathid'] = uid
    df_list.append(fgdf_tmp)

# adf = pd.concat(df_list)
agdf = gpd.GeoDataFrame(pd.concat(df_list))

17N
17S
85N
85S
87N
87S
101N
101S
237W
237E
280N
280S
680N
680S
880N
880S


In [34]:
from shapely.ops import linemerge
from shapely.geometry import LineString
from shapely.geometry import Point, LineString
from shapely.ops import nearest_points


sid2osmpath = []
# df_list = []

count = 0
fgdf_tmp_list = []

new_items = []
for uid in fwy_path_dict:
    path = a_star(mygraph, fwy_path_dict[uid][0], fwy_path_dict[uid][1])
    print(uid)
    fgdf_tmp = fgdf_edges[fgdf_edges['u'].astype(str).isin(path) & fgdf_edges['v'].astype(str).isin(path)].copy()
    fgdf_tmp['pathid'] = uid
    
    fgdf_tmp_list.append(fgdf_tmp)
    #     df_list.append(fgdf_tmp)

    # Merge the LineString objects into one LineString
    merged_line = linemerge(fgdf_tmp.geometry.tolist())

    # Print the resulting LineString
#     print(merged_line)
    
    for _, item in sensor_gdf[sensor_gdf['Fwy2'] == uid].iterrows():    
        closest_point_on_line, closest_point_on_point = nearest_points(merged_line, item.geometry)
        nitem = dict(item)
        nitem['geometry'] = closest_point_on_line
        new_items.append(nitem)
        
        clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]
        print(item['sid'], clineitem['u'], clineitem['v'])
        opitem = {
            'sid': str(item['sid']), 
            'fwy': item['Fwy2'],
            'lat': float(closest_point_on_line.y),
            'lng': float(closest_point_on_line.x), 
            'u': str(clineitem['u']), 
            'v': str(clineitem['v'])
        }
        sid2osmpath.append(opitem)
        
        count += 1
        
ngdf = gpd.GeoDataFrame(new_items)


fgdf_tmp_list = pd.concat(fgdf_tmp_list)

assert count == len(sensor_gdf)

17N
400649 310961963 310962519
400715 310962519 310960273
400240 310960273 1080001365
403329 1079992643 258982002
400436 1079992643 258982002
400221 52153048 312387699
404370 312387699 312387512
17S
400100 65386586 65386609
400298 65386609 312386268
400278 52152854 1079995412
400073 1079999436 310951427
400832 310951427 310961241
85N
400268 248212016 257718800
401555 248212016 257718800
400052 257806071 31416653



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


400461 31416653 310988083
400750 262601804 310986742
400148 262601804 310986742
400336 310986742 310967444
400713 310967444 1954370019
400485 310964172 1954370022
400822 310964172 1954370022
400464 1954370022 26497337
400688 26497337 310970415
404434 310970415 310974825
400222 310974825 310977443
400952 310974825 310977443
401210 310977443 306534211
400507 306534211 306534238
400185 306534211 306534238
400677 433475314 272272951
404435 272272951 49071638
407344 272272951 49071638
407321 49071638 704831935
407331 49071638 704831935
407341 704832630 49067107
407339 49067107 49067139
407337 49067107 49067139
402060 49067139 1067460085
407336 49067139 1067460085
407335 49067139 1067460085
407328 1067460085 10734321292
407342 26029664 8820954213
407325 187632461 152195400
407323 220226948 65397653
400174 706934715 65386023
407332 5448680402 3660749993
85S



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


407359 209854346 26028051
400869 26028051 34153357
407374 26028045 26028038
400528 26028045 26028038
407348 193209044 240448549
407367 240448549 1954370143
407352 150089564 1067457597
407360 1067457597 206696652
407361 1067457597 206696652
402061 1067457597 206696652
407364 206696652 302878
407372 302878 437325360
407373 437325360 31362863
407370 31362863 33104718
400209 302889 1067736024
400648 306534142 1067614249
400828 306534142 1067614249
401224 1067614249 310979305
400097 310979305 310974852
400582 310979305 310974852
404444 310974852 26506863
400213 26506863 26729676
400907 26729676 310961411
400792 310961411 310961906
400934 310961411 310961906
400017 310961906 65536436
400700 310969385 310967127
400772 310969385 310967127
400717 310967127 65536465
400059 65536465 65536472
400690 65536502 257806722
400085 257806722 65412681
401495 259001354 309052216
400637 259001354 309052216
400519 309052216 309052181
87N



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


400280 248212014 248212008
405613 248212014 248212008
401129 248212008 261764012
401957 248212008 261764012
404451 248212008 261764012
400178 261764012 248212608
401567 248212608 248212505
404452 248212608 248212505
401597 248212608 248212505
400418 248212608 248212505
400654 248212505 248212500
400664 248212500 8849106418
404453 8849106418 246581778
400258 8849106418 246581778
400916 246574426 704869519
400563 704869519 65548053
402056 704869519 65548053
402059 246008821 33783809
402118 33783800 33783802
409524 33783800 33783802
409525 245809995 4938097618
409526 245809995 4938097618
401817 4938097618 1698569739
87S
401816 245509711 65415963
409529 309048971 245809992
409528 309048971 245809992
402117 245919608 65548078
402058 65402740 8847870779
402057 65402740 8847870779
400236 311443383 1298973519
400065 366742999 1760559744
400995 366742999 1760559744
404461 1760559744 258998915
400837 258998915 258999024
400040 258999024 312401171
400257 312401171 8849363376
401606 8849363376 176


  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


400353 121128093 312939639
401994 33784157 80698939
400246 80698939 60434557
402366 60434557 26031433
400168 26031433 82464828
400790 82464828 82464810
402368 82464810 33784671
402370 33784671 26028101
401996 26028101 67036838
401997 67036838 1970807538
404522 26117823 258944399
401998 258944399 26028106
402372 258944399 26028106
401534 102212724 1350115238
401948 26028109 3660749993
401507 447918140 256889363
101S
401926 65389683 447919027
400149 447919027 65386032
401937 447919027 65386032
401936 65405940 65393469
402373 65393813 447920760
400435 65393813 447920760
404554 447920760 5334134384
400545 5334134384 65381269
400743 6447149857 447924496
400330 447924496 447924963
402371 447924963 65417858
404553 65417858 1970807537
400904 65391616 65386173
402369 65386173 65415279
400643 65415279 447928131
400794 447928131 447928265



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


402367 447928265 60434555
400069 60434555 289620912
400372 289620912 258966218
400206 4989314674 258966276
400895 4989314674 258966276
400586 9483248816 258966453
400172 258966453 258966414
402365 245540666 245812729
400394 245812729 65401537
400922 245812719 245812727
400109 33783786 311645239
402363 311645239 640768452
400440 640768452 245974454
400160 1700578617 65593075
401891 65593082 65404590
401906 65404590 65390162
402360 65390162 1700578554
237W
413845 1724786594 225227642
400158 262911505 32790340
401014 262911505 32790340
400449 32790340 262911480
400201 262911480 3973823697
400096 728171219 262911516
400274 82457619 262911295
400687 82457619 262911295
404586 262911295 32351595
400581 258969754 1958092697
401351 26117820 32788810
237E



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


404585 262910598 225208672
400970 225208672 225208837
400973 33717296 728171220
400567 728171220 725893684
400104 725893684 33466457
400296 33466459 728171989
400873 33466459 728171989
280N
401403 4974822843 353134071
401655 353134071 65382744
401808 353134071 65382744
403402 65379910 1298171832
400799 1298171832 1935687161
400953 1935687161 1298171942
401810 1298171942 305551936
413878 305551936 65535226
414284 8948350638 1298989113
401942 8948350638 1298989113
413877 1298989113 1298989019
414694 1299177532 1299177642
401167 1299177642 305551118
405701 1299177642 305551118
407710 1299142209 279689642
401400 65410786 65405974
400714 65410786 65405974
400414 1652633096 257883573
400560 258965297 289651583
400499 289638735 1067712932
400823 255655733 220218075
401845 26506813 5834900091
280S



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


401846 276317974 92274382
404640 49071606 704811950
400673 206691786 206691795
400084 206691786 206691795
400429 258965393 1067707130
400292 1298225880 1298225884
401388 257883727 1299142177
407711 1299142177 1299142206
401163 1299142171 315229069
403419 315229069 26404645
401327 315229069 26404645
403414 315229069 26404645
402067 1298999088 311447923
401943 1298999088 311447923
403412 1298999088 311447923
403409 311447923 246574388
401811 1299012726 246451017
403406 1299012726 246451017
403404 246480598 311467576
403401 311467576 1298171982
401809 246295716 8557520304
401391 8557520304 246295695
680N
407172 4974822842 1086294628
407173 4974822842 1086294628
407165 1086294628 246343403
407176 259698534 1299317685
407177 1299317685 311715515
407179 259997530 259997528
407180 259997528 312718678
407181 259997528 312718678
407157 312718678 259980516
407155 259981647 311915415
407153 311915415 259982043
407174 311915415 259982043
407161 259982043 313445252
407152 313445252 313455374
407151


  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]

  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


400057 5261661337 283018947
400147 311577375 311575955
400343 245919614 1912088277
401560 245810016 4977823052
400045 4977823048 65401606
400122 65394535 3742063554
401541 33102541 65405085
402281 312714757 258474804
402283 312714757 258474804
402285 258474804 1080573699
402286 258474804 1080573699
400088 247138149 311912822
402288 311912822 1700446579
413026 311912822 1700446579
401464 1700446579 251911884
401489 251911884 65414355
880S
402119 1688870513 248092076
402289 1688870513 248092076
400238 248092076 252005231
402287 252005231 248093005
402120 248093005 288072859
402284 288072859 65381898
402282 65381898 65388338
402121 65381898 65388338
400971 65391242 26404564
404759 26404564 26404567
400479 312118657 52140457
400030 4104395806 4977823055
401440 4977823055 245810051
403225 287826065 3678154056
403265 287826065 3678154056
400508 3678154056 245917505
400253 65397622 65390120
400709 65390120 6439958932
400514 65390602 65417082
400723 65390602 65417082
408911 305549799 65495541



  clineitem = fgdf_tmp.iloc[fgdf_tmp.distance(item.geometry).argmin()]


In [35]:
gpd.GeoDataFrame(sid2osmpath).to_csv('corrected-pems-bay-sensorid-osm-path-uv.csv', index=None)

In [36]:
category_order = list(fwy_path_dict.keys())
Map([
    Layer(agdf, color_category_style('pathid', cat=category_order),
          popup_click=[
              popup_element('pathid')], 
          popup_hover=[
              popup_element('pathid')]
          
         ),
    Layer(ngdf, color_category_style('Fwy2', cat=category_order), 
          popup_click=[
                popup_element('sid'),
              popup_element('Fwy2')], 
          popup_hover=[
                popup_element('sid'),
              popup_element('Fwy2')]),
])
