In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd

from sklearn.neighbors import BallTree

import matplotlib.pyplot as plt

import osmnx as ox
import networkx as nx

In [2]:
station_attribute_file = "../data/external/mtc/station-attributes/Station_Attribute_Data-Final File.csv"
pedestrian_network_file = "../data/interim/pedestrian-viewer.geojson"
node_file = "../data/interim/step5_tidy_roadway/node.geojson"

In [3]:
CALCS_CRS = 3857 
LAT_LNG_CRS = 4326

In [4]:
station_df = pd.read_csv(station_attribute_file)
station_df.head()

Unnamed: 0,station,attribute,lat,lon,walk_time_to_platform_seconds
0,Dublin/Pleasanton,Platform,37.701593,-121.899072,0
1,Dublin/Pleasanton,Bus,37.702677,-121.899263,193
2,Dublin/Pleasanton,Bike,37.701654,-121.897476,171
3,Dublin/Pleasanton,Garage,37.702819,-121.89661,261
4,Dublin/Pleasanton,Surface Lot,37.700544,-121.894907,261


In [6]:
ped_gdf = gpd.read_file(pedestrian_network_file)
ped_gdf.to_crs(epsg=CALCS_CRS)
ped_gdf = ped_gdf.drop(columns=['u', 'v'])
ped_gdf.head()

Unnamed: 0,model_link_id,shstReferenceId,shstGeometryId,a_node_id,b_node_id,length,geometry
0,4000000,00000461094d7f302e7afdcfc7ff5ba3,208e093f10a62dcc6646a8efa0bde136,3000000,3007836,84.64525,"LINESTRING (-122.33155 37.98120, -122.33156 37..."
1,4000001,00000b467d2e08f9abf13eeafee3ed46,5fe3056a5583474c0c898983cd6a638b,3000001,3077456,509.283331,"LINESTRING (-121.94477 37.95332, -121.94439 37..."
2,4000002,000089408c36d4bc42ee46fc631cc3a8,08226050f43562c1d05cf9e36255507f,3000002,3061300,149.610203,"LINESTRING (-122.02678 37.93099, -122.02660 37..."
3,4000003,0000a7a8e3867aa0c76a6208a79fedfa,5a4c6b238c713ef2aae389bccb813f16,3000003,3016463,122.157883,"LINESTRING (-122.31661 37.96036, -122.31657 37..."
4,4000004,0000ef92881341b20a23b16439cd8cc6,e237214c59de5ae1c88cfcbeda9dca1e,3000004,3044382,1707.414801,"LINESTRING (-121.59541 37.82273, -121.59982 37..."


In [35]:
override_df = pd.DataFrame({'station':['Dublin/Pleasanton'], 'attribute':['Platform'], 'override_id':[2587587]})
override_df

Unnamed: 0,station,attribute,override_id
0,Dublin/Pleasanton,Platform,2587587


In [7]:
node_gdf = gpd.read_file(node_file)
node_gdf = node_gdf.loc[node_gdf['walk_access'] == 1]
node_gdf.to_crs(CALCS_CRS)
node_gdf.head()

Unnamed: 0,osm_node_id,shst_node_id,county,drive_access,walk_access,bike_access,model_node_id,county_numbering_start,geometry
0,2401244716,505d64eb98f1da8d812a3b3801034308,Contra Costa,1,1,1,3000000,3000000,POINT (-122.33155 37.98120)
1,57839068,473979c78435732f01ca5a168afb62e0,Contra Costa,1,1,1,3000001,3000000,POINT (-121.94477 37.95332)
2,1024388950,fc7b575d5d8c961d4a70fca846ae7f80,Marin,1,1,1,5000000,5000000,POINT (-122.53983 37.89800)
3,65561433,6c60cf34e9dc3e123eefb829fe80c76a,Santa Clara,1,1,1,2000000,2000000,POINT (-122.03190 37.26739)
4,4545575571,013e1f994fd86c1f226098f8364f7286,Santa Clara,1,1,1,2000001,2000000,POINT (-122.01186 37.37845)


In [40]:
station_gdf = gpd.GeoDataFrame(station_df, geometry=gpd.points_from_xy(station_df.lon, station_df.lat))
station_gdf.set_crs(LAT_LNG_CRS).to_crs(CALCS_CRS)

Unnamed: 0,station,attribute,lat,lon,walk_time_to_platform_seconds,geometry,model_node_id,distance_from_model_node
0,Dublin/Pleasanton,Platform,37.701593,-121.899072,0,POINT (-13569742.623 4537356.256),2521214,47.182241
1,Dublin/Pleasanton,Bus,37.702677,-121.899263,193,POINT (-13569763.885 4537508.772),2607048,19.991444
2,Dublin/Pleasanton,Bike,37.701654,-121.897476,171,POINT (-13569564.957 4537364.839),2544655,46.433856
3,Dublin/Pleasanton,Garage,37.702819,-121.89661,261,POINT (-13569468.555 4537528.751),2616183,43.741806
4,Dublin/Pleasanton,Surface Lot,37.700544,-121.894907,261,POINT (-13569278.978 4537208.667),2531903,15.910385
5,Dublin/Pleasanton,Kiss and ride drop off,37.702821,-121.899292,182,POINT (-13569767.114 4537529.032),2607048,28.450547
6,West Dublin/Plesanton,Platform,37.699746,-121.928205,0,POINT (-13572985.694 4537096.394),2617670,19.739904
7,West Dublin/Plesanton,Bus,37.700779,-121.92701,209,POINT (-13572852.667 4537241.730),2575300,58.826571
8,West Dublin/Plesanton,Bike,37.700559,-121.927741,199,POINT (-13572934.042 4537210.778),2553289,7.080708
9,West Dublin/Plesanton,Garage,37.701027,-121.927173,159,POINT (-13572870.812 4537276.623),2578511,46.494847


In [41]:
def get_nearest(src_points, candidates, k_neighbors=1):
    """Find nearest neighbors for all source points from a set of candidate points"""

    # Create tree from the candidate points
    tree = BallTree(candidates, leaf_size=15, metric='haversine')

    # Find closest points and distances
    distances, indices = tree.query(src_points, k=k_neighbors)

    # Transpose to get distances and indices into arrays
    distances = distances.transpose()
    indices = indices.transpose()

    # Get closest indices and distances (i.e. array at index 0)
    # note: for the second closest points, you would take index 1, etc.
    closest = indices[0]
    closest_dist = distances[0]

    # Return indices and distances
    return (closest, closest_dist)


def nearest_neighbor(left_gdf, right_gdf, return_dist=False):
    """
    For each point in left_gdf, find closest point in right GeoDataFrame and return them.

    NOTICE: Assumes that the input Points are in WGS84 projection (lat/lon).
    """

    left_geom_col = left_gdf.geometry.name
    right_geom_col = right_gdf.geometry.name

    # Ensure that index in right gdf is formed of sequential numbers
    right = right_gdf.copy().reset_index(drop=True)

    # Parse coordinates from points and insert them into a numpy array as RADIANS
    left_radians = np.array(left_gdf[left_geom_col].apply(lambda geom: (geom.x * np.pi / 180, geom.y * np.pi / 180)).to_list())
    right_radians = np.array(right[right_geom_col].apply(lambda geom: (geom.x * np.pi / 180, geom.y * np.pi / 180)).to_list())

    # Find the nearest points
    # -----------------------
    # closest ==> index in right_gdf that corresponds to the closest point
    # dist ==> distance between the nearest neighbors (in meters)

    closest, dist = get_nearest(src_points=left_radians, candidates=right_radians)

    # Return points from right GeoDataFrame that are closest to points in left GeoDataFrame
    closest_points = right.loc[closest]

    # Ensure that the index corresponds the one in left_gdf
    closest_points = closest_points.reset_index(drop=True)

    # Add distance if requested
    if return_dist:
        # Convert to meters from radians
        earth_radius = 6371000  # meters
        closest_points['distance'] = dist * earth_radius

    return closest_points

In [42]:
nearest_gdf = nearest_neighbor(station_gdf, node_gdf, return_dist=True)
nearest_gdf.head()

Unnamed: 0,osm_node_id,shst_node_id,county,drive_access,walk_access,bike_access,model_node_id,county_numbering_start,geometry,distance
0,1420600286,9e0e839ef0d74248cba37812b731d051,Alameda,1,1,1,2521214,2500000,POINT (-121.89894 37.70083),47.182241
1,657540257,132609e4a91cb4435e237497143f77bd,Alameda,1,1,1,2607048,2500000,POINT (-121.89913 37.70244),19.991444
2,3901985424,acc683c7d7c21fa30f321e9b01d4f512,Alameda,0,1,1,2544655,2500000,POINT (-121.89777 37.70221),46.433856
3,3901985444,d2ec165c4fa3c6346279fa0933f001a5,Alameda,0,1,1,2616183,2500000,POINT (-121.89697 37.70312),43.741806
4,1420600278,296e4a90a37420599808e990d57f4671,Alameda,1,1,1,2531903,2500000,POINT (-121.89493 37.70081),15.910385


In [43]:
station_gdf['model_node_id'] = nearest_gdf['model_node_id']
station_gdf['distance_from_model_node'] = nearest_gdf['distance']

station_gdf = pd.merge(
    station_gdf,
    override_df,
    on=['station','attribute'],
    how='left',
)

station_gdf.head()

station_gdf['model_node_id'] = np.where(
    pd.isnull(station_gdf['override_id']),
    station_gdf['model_node_id'],
    station_gdf['override_id']
)

station_gdf = station_gdf.drop(columns=['override_id'])

station_gdf.head()

Unnamed: 0,station,attribute,lat,lon,walk_time_to_platform_seconds,geometry,model_node_id,distance_from_model_node
0,Dublin/Pleasanton,Platform,37.701593,-121.899072,0,POINT (-121.89907 37.70159),2587587.0,47.182241
1,Dublin/Pleasanton,Bus,37.702677,-121.899263,193,POINT (-121.89926 37.70268),2607048.0,19.991444
2,Dublin/Pleasanton,Bike,37.701654,-121.897476,171,POINT (-121.89748 37.70165),2544655.0,46.433856
3,Dublin/Pleasanton,Garage,37.702819,-121.89661,261,POINT (-121.89661 37.70282),2616183.0,43.741806
4,Dublin/Pleasanton,Surface Lot,37.700544,-121.894907,261,POINT (-121.89491 37.70054),2531903.0,15.910385


In [44]:
def ox_graph(n_gdf, l_gdf):

    graph_nodes = n_gdf.copy()

    graph_nodes.gdf_name = "network_nodes"
    graph_nodes['id'] = graph_nodes['model_node_id']

    graph_links = l_gdf.copy()
    graph_links['id'] = graph_links['model_link_id']
    graph_links['u'] = graph_links['a_node_id']
    graph_links['v'] = graph_links['b_node_id']
    graph_links['key'] = graph_links['model_link_id']

    G = ox.graph_from_gdfs(graph_nodes, graph_links)

    return G

In [45]:
G = ox_graph(node_gdf, ped_gdf)

In [47]:
platform_df = pd.DataFrame(station_gdf.loc[station_gdf['attribute'] == 'Platform'])
platform_df = platform_df[['station', 'model_node_id']].rename(columns={'model_node_id':'b_node'})
platform_df.head()

Unnamed: 0,station,b_node
0,Dublin/Pleasanton,2587587.0
6,West Dublin/Plesanton,2617670.0
12,BayFair,2528506.0


In [48]:
a_to_b_df = pd.DataFrame(station_gdf.loc[station_gdf['attribute'] != 'Platform'])
a_to_b_df = a_to_b_df[['station', 'attribute', 'model_node_id']].rename(columns={'model_node_id':'a_node'})
a_to_b_df = pd.merge(a_to_b_df, platform_df, on='station', how='left')
a_to_b_df.head()

Unnamed: 0,station,attribute,a_node,b_node
0,Dublin/Pleasanton,Bus,2607048.0,2587587.0
1,Dublin/Pleasanton,Bike,2544655.0,2587587.0
2,Dublin/Pleasanton,Garage,2616183.0,2587587.0
3,Dublin/Pleasanton,Surface Lot,2531903.0,2587587.0
4,Dublin/Pleasanton,Kiss and ride drop off,2607048.0,2587587.0


In [50]:
def _calc(df):
    return nx.shortest_path_length(G, df.a_node, df.b_node, weight = "length")

a_to_b_df['path_distance'] = a_to_b_df.apply(lambda x: _calc(x), axis=1)
a_to_b_df.head()

Unnamed: 0,station,attribute,a_node,b_node,path_distance
0,Dublin/Pleasanton,Bus,2607048.0,2587587.0,136.251634
1,Dublin/Pleasanton,Bike,2544655.0,2587587.0,1265.003895
2,Dublin/Pleasanton,Garage,2616183.0,2587587.0,730.269031
3,Dublin/Pleasanton,Surface Lot,2531903.0,2587587.0,364.477424
4,Dublin/Pleasanton,Kiss and ride drop off,2607048.0,2587587.0,136.251634


In [51]:
def _calc(df):
    return nx.shortest_path(G, df.a_node, df.b_node, weight = "length")

a_to_b_df['path_sequence'] = a_to_b_df.apply(lambda x: _calc(x), axis=1)
a_to_b_df.head()

Unnamed: 0,station,attribute,a_node,b_node,path_distance,path_sequence
0,Dublin/Pleasanton,Bus,2607048.0,2587587.0,136.251634,"[2607048.0, 2570898, 2587587]"
1,Dublin/Pleasanton,Bike,2544655.0,2587587.0,1265.003895,"[2544655.0, 2505604, 2504325, 2597970, 2597191..."
2,Dublin/Pleasanton,Garage,2616183.0,2587587.0,730.269031,"[2616183.0, 2560083, 2552080, 2578390, 2513657..."
3,Dublin/Pleasanton,Surface Lot,2531903.0,2587587.0,364.477424,"[2531903.0, 2540096, 2529282, 2548796, 2587587]"
4,Dublin/Pleasanton,Kiss and ride drop off,2607048.0,2587587.0,136.251634,"[2607048.0, 2570898, 2587587]"


In [57]:
paths_df = pd.DataFrame()
for index, row in a_to_b_df.iterrows():
    df = pd.DataFrame(row['path_sequence']).rename({0:'a_node_id'}, axis=1)
    df['station'] = row['station']
    df['attribute'] = row['attribute']
    df['sequence'] = np.arange(len(df))
    df['b_node_id'] = df['a_node_id'].shift(-1)
    paths_df = pd.concat([paths_df, df], ignore_index=True)

paths_df = paths_df.loc[paths_df['b_node_id']>0]
paths_df.head()

Unnamed: 0,a_node_id,station,attribute,sequence,b_node_id
0,2607048.0,Dublin/Pleasanton,Bus,0,2570898.0
1,2570898.0,Dublin/Pleasanton,Bus,1,2587587.0
3,2544655.0,Dublin/Pleasanton,Bike,0,2505604.0
4,2505604.0,Dublin/Pleasanton,Bike,1,2504325.0
5,2504325.0,Dublin/Pleasanton,Bike,2,2597970.0


In [58]:
df = pd.merge(
    paths_df,
    ped_gdf,
    on=['a_node_id', 'b_node_id'],
    how='left'
    ).rename(columns=({'length':'length_meters'}))
output_gdf = gpd.GeoDataFrame(df, geometry='geometry')
output_gdf.head()

Unnamed: 0,a_node_id,station,attribute,sequence,b_node_id,model_link_id,shstReferenceId,shstGeometryId,length_meters,geometry
0,2607048.0,Dublin/Pleasanton,Bus,0,2570898.0,3163870,f6e356fcae023d5fc953e35d4d766420,5501b7e670ea6cfa05badc2ca83abb54,129.886528,"LINESTRING (-121.89913 37.70244, -121.89895 37..."
1,2570898.0,Dublin/Pleasanton,Bus,1,2587587.0,3249836,8cdff263a780ae9630a21c70acba135b,83cd02a4984b976bf9ba31a43dad8ba3,6.365106,"LINESTRING (-121.89830 37.70163, -121.89823 37..."
2,2544655.0,Dublin/Pleasanton,Bike,0,2505604.0,3240530,7c71cac36d0ba3ec40dccf69b4c62a46,b4eca262aa12a31e7dbcf23fdd58aa50,75.26462,"LINESTRING (-121.89699 37.70216, -121.89715 37..."
3,2505604.0,Dublin/Pleasanton,Bike,1,2504325.0,3273620,b6bb4c366b244164c8b69b330502dcbc,dfc54319a1f4e8120614624dbff5e4e1,75.799234,"LINESTRING (-121.89620 37.70211, -121.89699 37..."
4,2504325.0,Dublin/Pleasanton,Bike,2,2597970.0,3196068,2e924f3a46c2689e33b8fe579f00cb8d,9b33eaca27bcf887dc09b47aaee31f16,27.644251,"LINESTRING (-121.89591 37.70210, -121.89620 37..."


In [59]:
station_join_df = station_gdf[['station', 'attribute', 'walk_time_to_platform_seconds']]
station_join_df.head()

Unnamed: 0,station,attribute,walk_time_to_platform_seconds
0,Dublin/Pleasanton,Platform,0
1,Dublin/Pleasanton,Bus,193
2,Dublin/Pleasanton,Bike,171
3,Dublin/Pleasanton,Garage,261
4,Dublin/Pleasanton,Surface Lot,261


In [60]:
output_gdf = pd.merge(
    output_gdf,
    station_join_df,
    on=['station', 'attribute'],
    how='left'
)
output_gdf.head()

Unnamed: 0,a_node_id,station,attribute,sequence,b_node_id,model_link_id,shstReferenceId,shstGeometryId,length_meters,geometry,walk_time_to_platform_seconds
0,2607048.0,Dublin/Pleasanton,Bus,0,2570898.0,3163870,f6e356fcae023d5fc953e35d4d766420,5501b7e670ea6cfa05badc2ca83abb54,129.886528,"LINESTRING (-121.89913 37.70244, -121.89895 37...",193
1,2570898.0,Dublin/Pleasanton,Bus,1,2587587.0,3249836,8cdff263a780ae9630a21c70acba135b,83cd02a4984b976bf9ba31a43dad8ba3,6.365106,"LINESTRING (-121.89830 37.70163, -121.89823 37...",193
2,2544655.0,Dublin/Pleasanton,Bike,0,2505604.0,3240530,7c71cac36d0ba3ec40dccf69b4c62a46,b4eca262aa12a31e7dbcf23fdd58aa50,75.26462,"LINESTRING (-121.89699 37.70216, -121.89715 37...",171
3,2505604.0,Dublin/Pleasanton,Bike,1,2504325.0,3273620,b6bb4c366b244164c8b69b330502dcbc,dfc54319a1f4e8120614624dbff5e4e1,75.799234,"LINESTRING (-121.89620 37.70211, -121.89699 37...",171
4,2504325.0,Dublin/Pleasanton,Bike,2,2597970.0,3196068,2e924f3a46c2689e33b8fe579f00cb8d,9b33eaca27bcf887dc09b47aaee31f16,27.644251,"LINESTRING (-121.89591 37.70210, -121.89620 37...",171


In [62]:
output_gdf.to_file('../data/interim/station-attribute.geojson', driver='GeoJSON')