In [1]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime
import json

import matplotlib.pyplot as plt
from cartoframes.viz import *

import pickle
def load_graph_data(pkl_filename):
    sensor_ids, sensor_id_to_ind, adj_mx = load_pickle(pkl_filename)
    return sensor_ids, sensor_id_to_ind, adj_mx

def load_pickle(pickle_file):
    try:
        with open(pickle_file, 'rb') as f:
            pickle_data = pickle.load(f)
    except UnicodeDecodeError as e:
        with open(pickle_file, 'rb') as f:
            pickle_data = pickle.load(f, encoding='latin1')
    except Exception as e:
        print('Unable to load data ', pickle_file, ':', e)
        raise
    return pickle_data

In [19]:
import pandas as pd

meta_df_list = []
for fname in os.listdir('pems-output/metr-la'):
    mdf = pd.read_excel(f'pems-output/metr-la/{fname}')
    meta_df_list.append(mdf)
meta_df = pd.concat(meta_df_list)

In [20]:
meta_df

Unnamed: 0,Fwy,District,County,City,CA PM,Abs PM,Length,ID,Name,Lanes,Type,Sensor Type,HOV,MS ID,IRM
0,SR126-E,7,Los Angeles,,4.9,39.398,,775962,COMMERCE CENTER DR,1,Off Ramp,,No,2695,
1,SR126-E,7,Los Angeles,,4.9,39.398,,775963,COMMERCE CENTER DR,1,On Ramp,,No,2695,
2,SR126-E,7,Los Angeles,,4.9,39.398,,775976,COMMERCE CENTER DR.2,1,On Ramp,,No,2696,
3,SR126-E,7,Los Angeles,,4.9,39.398,2.500,775975,COMMERCE CENTER DR.2,4,Mainline,,No,2696,
4,SR126-E,7,Los Angeles,,4.9,39.398,1.076,775961,COMMERCE CENTER DR,3,Mainline,,No,2695,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30,SR2-E,7,Los Angeles,Glendale,R21.5,14.062,0.913,767609,FERN LANE,5,Mainline,loops,No,2473,
31,SR2-E,7,Los Angeles,Glendale,R22.626,15.188,,768235,EB-NB 2 TO WB 210,3,Fwy-Fwy,loops,No,4472,
32,SR2-E,7,Los Angeles,Glendale,R22.626,15.188,3.063,768238,VERDUGO BLVD,2,Mainline,loops,No,4472,
33,SR2-E,7,Los Angeles,Glendale,R22.626,15.188,,768246,VERDUGO BLVD,2,Off Ramp,loops,No,4472,


In [58]:
flist = os.listdir('california-vds')
item_list = []
for fname in sorted(flist):
    if fname[-4:] != 'json':
        continue
    print(fname)
    with open('california-vds/' + fname) as fp:
        json_data = json.load(fp)

        for item in json_data['matches']:
            item_list.append({
                'ID': str(int(item['ID'])),
                'lat': item['PT'][0],
                'lng': item['PT'][1]
            })
vds_df = pd.DataFrame(item_list).drop_duplicates('ID')

exact_id2loc = dict()
for _, item in vds_df.iterrows():
    exact_id2loc[item['ID']] = item.lat, item.lng

california-vds-10.json
california-vds-11.json
california-vds-12.json
california-vds-13.json
california-vds-14.json
california-vds-15.json
california-vds-17.json
california-vds-2.json
california-vds-3.json
california-vds-4.json
california-vds-5.json
california-vds-6.json
california-vds-7.json
california-vds-8.json
california-vds-9.json
california-vds.json


# METR-LA Match

In [56]:
la_sensors = pd.read_csv('metr-la/graph_sensor_locations.csv')
la_sensors = la_sensors.set_index('index')
la_sensors.columns = ['sid', 'lat', 'lng']
la_sensors['sid'] = la_sensors['sid'].astype(str)

In [39]:
sid2fwy = {sid:fwy for sid, fwy in zip(meta_df['ID'].astype(str), meta_df['Fwy'])}

In [61]:
fwys = []
elats = []
elngs = []
for _, item in la_sensors.iterrows():
    
    if item['sid'] in sid2fwy:
#         print(item['sid'], sid2fwy[item['sid']])
        fwys.append(sid2fwy[item['sid']])
    else:
#         print(item['sid'], '-')
        fwys.append('-')
    
    if item['sid'] in exact_id2loc:
        elat, elng = exact_id2loc[item['sid']]
    else:
        elat, elng = item['lat'], item['lng']
    elats.append(elat)
    elngs.append(elng)
        
la_sensors['Fwy'] = fwys
la_sensors['elat'] = elats
la_sensors['elng'] = elngs

In [62]:
sensor_df = la_sensors
sensor_gdf = gpd.GeoDataFrame(
    sensor_df, geometry=gpd.points_from_xy(x=sensor_df.elng, y=sensor_df.elat)
)
sensor_gdf.crs = 'epsg:4326'

In [63]:
from cartoframes.viz import Layer, color_category_style
Layer(sensor_gdf, color_category_style('Fwy'))

In [64]:
Layer(sensor_gdf, color_category_style('Fwy'))

In [4]:
os.listdir('metr-la')

['graph_sensor_ids.txt',
 'graph_sensor_locations.csv',
 'metr-la.h5',
 'adj_mx.pkl',
 'graph_sensor_locations_corrected.csv',
 'distances_la_2012.csv']

In [85]:
correct_list = []
for _, item in la_sensors.iterrows():
#     print(int(item['sid']))
    sid = int(item['sid'])
    if sid in exact_id2loc:
        correct_item = {
            'sid': sid,
            'lat': exact_id2loc[sid][0],
            'lng': exact_id2loc[sid][1]
        }
#         print('corrected:', correct_item)
    else:
        correct_item = {
            'sid': sid,
            'lat': item['lat'],
            'lng': item['lng']
        }
        print('uncorrected:', correct_item)
    correct_list.append(correct_item)
nla_sensors = pd.DataFrame(correct_list)

uncorrected: {'sid': 765604, 'lat': 34.16415, 'lng': -118.38223}
uncorrected: {'sid': 764424, 'lat': 34.17878, 'lng': -118.39469}
uncorrected: {'sid': 764120, 'lat': 34.20164, 'lng': -118.40366}
uncorrected: {'sid': 765182, 'lat': 34.06491, 'lng': -118.25126}
uncorrected: {'sid': 764858, 'lat': 34.1527, 'lng': -118.3754}


In [86]:
nla_sensors.to_csv('metr-la/graph_sensor_locations_corrected.csv')

# Pems-Bay Match

In [98]:
import osmnx as ox

graph = ox.load_graphml(filepath=f'../graph_generation/osm_graph/pems-bay-drive.graphml')

In [99]:
gdf_nodes, gdf_edges = ox.graph_to_gdfs(graph)
gdf_nodes['osmidn'] = gdf_nodes.index
gdf_nodes['osmidstr'] = gdf_nodes['osmidn'].astype(str)

fgdf_edges = gdf_edges.reset_index()
cond = np.array([str(type(s)) for s in fgdf_edges['highway']]) == "<class 'str'>"
fgdf_edges = fgdf_edges[cond]

fmotorway = fgdf_edges[fgdf_edges['highway'].isin(['motorway'])]
fgdf_nodes = gdf_nodes[gdf_nodes.index.isin(set(fmotorway['u'].tolist() + fmotorway['v'].tolist()))]

In [112]:
from cartoframes.viz import Layer, popup_element, color_category_style, basic_style


pems_df = pd.read_hdf(f'pems-bay/pems-bay-meta.h5')
pems_gdf = gpd.GeoDataFrame(
    pems_df, geometry=gpd.points_from_xy(x=pems_df.Longitude, y=pems_df.Latitude)
)
pems_gdf.crs = 'EPSG:4326'
pems_gdf['sid'] = pems_gdf.index
pems_gdf['Fwy2'] = pems_gdf['Fwy'].astype(str) + pems_gdf['Dir'].astype(str)



Map([
    Layer(fgdf_nodes, basic_style(color='black'), popup_click=[
        popup_element('osmidstr')]),
    Layer(fmotorway, encode_data=False),
#     Layer(gpd.GeoDataFrame(geometry=lines),  basic_style(color='pink')),
    Layer(pems_gdf, color_category_style('Fwy2'), popup_click=[
        popup_element('sid')]),
])

In [113]:
fwy_path_dict = {
    '17N': ('310961963', '257884249'),
    '17S': ('65401698', '7048863128'),
    '85N': ('248212016', '3660749993'),
    '85S': ('209854346', '1377399032'),
    '87N': ('248212014', '1698569739'),
    '87S': ('245509711', '309053289'),
    '101N': ('311950047', '239637816'),
    '101S': ('65405009', '2510564064'),
    '237W': ('3665202993', '32788810'),
    '237E': ('5334144482', '1389052694'),
    '280N': ('4974822843', '1067449074'),
    '280S': ('30237212', '246295695'),
    '680N': ('4974822842', '257881716'),
    '680S': ('65508508', '4974822843'),
    '880N': ('257884249', '65414355'),
    '880S': ('1688870513', '65401698')
}

In [114]:
mygraph = dict()
osmidpos = {osmidstr: (x, y) for osmidstr, y, x in zip(fgdf_nodes['osmidstr'], fgdf_nodes['y'], fgdf_nodes['x'])}
ffgdf_edges = fgdf_edges[fgdf_edges['u'].astype(str).isin(osmidpos) & fgdf_edges['v'].astype(str).isin(osmidpos)]

for _, item in ffgdf_edges.iterrows():
    us = str(item['u'])
    vs = str(item['v'])
    dist = item['length']
    
    mygraph.setdefault(us, {'pos': osmidpos[us]})
    mygraph.setdefault(vs, {'pos': osmidpos[vs]})
    mygraph[us][vs] = dist

In [115]:
import heapq
from typing import Dict, List

distances = {}
            
        
def a_star(graph: Dict[str, Dict[str, float]], 
           start: str, end: str) -> List[str]:
    # Heuristic function for estimating the distance between two nodes
    def h(node):
        if (node, end) not in distances:
            # In this example, we use a simple heuristic that assumes
            # a straight-line distance between nodes, ignoring obstacles
            x1, y1 = graph[node]['pos']
            x2, y2 = graph[end]['pos']
            distances[(node, end)] = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
            distances[(end, node)] = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
        return distances[(node, end)]
    
    # Initialize distance and previous node dictionaries
    g_score = {node: float('inf') for node in graph}
    g_score[start] = 0
    f_score = {node: float('inf') for node in graph}
    f_score[start] = h(start)
    prev = {node: None for node in graph}
    
    # Initialize heap with start node and its f score
    heap = [(f_score[start], start)]
    
    while heap:
        # Pop the node with the smallest f score from the heap
        (f, curr_node) = heapq.heappop(heap)
        
        # If we have reached the end node, return the shortest path
        if curr_node == end:
            path = []
            while curr_node is not None:
                path.append(curr_node)
                curr_node = prev[curr_node]
                
            return path[::-1]
        
        # Otherwise, update the f and g scores of all adjacent nodes
        for neighbor, weight in graph[curr_node].items():
            # Check if there is an edge between the current node and the neighbor
            if neighbor not in g_score:
                continue
                
            new_g_score = g_score[curr_node] + weight
            if new_g_score < g_score[neighbor]:
                g_score[neighbor] = new_g_score
                f_score[neighbor] = new_g_score + h(neighbor)
                prev[neighbor] = curr_node
                heapq.heappush(heap, (f_score[neighbor], neighbor))
    
    # If we get here, there is no path from start to end
    return None

# # Example graph with node positions
# graph = {
#     'A': {'B': 2, 'C': 1, 'pos': (0, 0)},
#     'B': {'C': 2, 'D': 3, 'pos': (1, 1)},
#     'C': {'D': 4, 'E': 3, 'pos': (1, -1)},
#     'D': {'E': 2, 'pos': (2, 0)},
#     'E': {'F': 3, 'pos': (3, -1)},
#     'F': {'pos': (4, 0)},
# }

# Find the shortest path from A to F using A* algorithm
# path = a_star(mygraph, 'A', 'F')
# print(path)  # Output: ['A', 'C', 'E', 'F']


In [116]:
df_list = []
for uid in fwy_path_dict:
    path = a_star(mygraph, fwy_path_dict[uid][0], fwy_path_dict[uid][1])
    print(uid)
    fgdf_tmp = fgdf_edges[fgdf_edges['u'].astype(str).isin(path) & fgdf_edges['v'].astype(str).isin(path)].copy()
    fgdf_tmp['pathid'] = uid
    df_list.append(fgdf_tmp)

17N
17S
85N


KeyError: '1377399032'

# Pemsd7

In [12]:
d7_sensors = pd.read_csv('pemsd7/PeMSD7_M_Station_Info.csv', index_col=0)
# d7_sensors = d7_sensors.set_index('index')
# d7_sensors.columns = ['sid', 'lat', 'lng']

In [13]:
d7_sensors

Unnamed: 0,ID,Fwy,Dir,District,Latitude,Longitude
0,716939,5,S,7,34.041407,-118.218373
1,759700,5,S,7,34.020961,-118.195456
2,716925,5,N,7,33.995023,-118.144513
3,715930,5,N,7,33.971763,-118.122905
4,715938,5,N,7,34.002541,-118.150997
...,...,...,...,...,...,...
223,766337,110,S,7,33.855273,-118.284900
224,763272,110,S,7,33.968828,-118.281010
225,763532,110,N,7,33.845433,-118.285155
226,763522,110,N,7,33.811070,-118.287268
