In [1]:
from pyproj import CRS
from shapely.geometry import LineString, Point

In [2]:
import os
import sys
import yaml
import pickle
import glob

import pandas as pd
import geopandas as gpd
import numpy as np

from network_wrangler import RoadwayNetwork
from network_wrangler import TransitNetwork
from network_wrangler import ProjectCard
from network_wrangler import Scenario
from network_wrangler import WranglerLogger

from lasso import ModelRoadwayNetwork
from lasso import StandardTransit
from lasso import Parameters
from lasso import mtc

In [3]:
%load_ext autoreload
%autoreload 2

# I/O

In [4]:
root_dir = "../../data"
input_dir = os.path.join(root_dir, 'processed', 'version_03')
output_dir = os.path.join(root_dir, 'interim', 'step9_taps')

lasso_dir = 'C:/Users/ywang/Documents/GitHub/Lasso'

In [5]:
parameters = Parameters(lasso_base_dir = lasso_dir)

2021-10-25 16:57:27, INFO: Lasso base directory set as: C:/Users/ywang/Documents/GitHub/Lasso
2021-10-25 16:57:27, INFO: Lasso base directory set as: C:/Users/ywang/Documents/GitHub/Lasso


In [6]:
version_00_pickle_file_name = os.path.join(input_dir, 'working_scenario_01.pickle')
v_00_scenario = pickle.load(open(version_00_pickle_file_name, 'rb'))

In [7]:
# check data in the pickle file
# print(v_00_scenario.road_net.nodes_df.columns)
# print(v_00_scenario.road_net.links_df.columns)
# print(v_00_scenario.road_net.shapes_df.columns)

v_00_scenario.transit_net.feed.stops.info()

print('\n number of unique stop_id: {}'.format(v_00_scenario.transit_net.feed.stop_times.stop_id.nunique()))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21757 entries, 0 to 21756
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   stop_name            21757 non-null  object 
 1   stop_desc            409 non-null    object 
 2   stop_lat             21757 non-null  float64
 3   stop_lon             21757 non-null  float64
 4   zone_id              8232 non-null   object 
 5   stop_url             261 non-null    object 
 6   location_type        5475 non-null   float64
 7   parent_station       249 non-null    object 
 8   stop_timezone        126 non-null    object 
 9   wheelchair_boarding  377 non-null    float64
 10  stop_code            15289 non-null  object 
 11  platform_code        52 non-null     object 
 12  position             0 non-null      object 
 13  direction            0 non-null      object 
 14  stop_id              21757 non-null  object 
 15  osm_node_id          21093 non-null 

In [8]:
### check stops that are not walk accessible - there should be none

stops_df = v_00_scenario.transit_net.feed.stops.copy()
stops_df["model_node_id"] = stops_df["model_node_id"].astype(int)

stops_df = pd.merge(stops_df, 
                    v_00_scenario.road_net.nodes_df[["model_node_id", "X", "Y", "drive_access", "walk_access"]], 
                    how = "left", 
                    on = "model_node_id")

print(stops_df[stops_df.walk_access == 0].shape[0])

print(v_00_scenario.road_net.links_df[
    (v_00_scenario.road_net.links_df.A.isin(stops_df[stops_df.walk_access == 0].model_node_id)) |
    (v_00_scenario.road_net.links_df.B.isin(stops_df[stops_df.walk_access == 0].model_node_id))].roadway.value_counts())

0
Series([], Name: roadway, dtype: int64)


# Explore TAP creation options (not required for creating the network)

This part tries three options to create TAPs and TAP links (k-means option 1, k-means option 2, kmeans final option), and compare the results with the TAPs and TAP links of the legacy TM2 network (non-Marin version). 

### k-means option 1

In [9]:
%%time
kmeans_loc_taps_gdf, stops_loc_taps_df = mtc.create_taps_kmeans_location_based(
    transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    bus_clusters = 6000,
)

Wall time: 4min 15s


In [10]:
kmeans_loc_taps_gdf

Unnamed: 0,tap_id,X,Y,geometry
0,0,-122.465759,37.753650,POINT (-122.46576 37.75365)
1,1,-121.996227,37.552652,POINT (-121.99623 37.55265)
2,2,-122.658717,38.267802,POINT (-122.65872 38.26780)
3,3,-122.098965,37.966814,POINT (-122.09896 37.96681)
4,4,-121.829094,37.334932,POINT (-121.82909 37.33493)
...,...,...,...,...
6132,6132,-122.353896,37.936766,POINT (-122.35390 37.93677)
6133,6133,-121.500699,38.584004,POINT (-121.50070 38.58400)
6134,6134,-121.902491,37.330286,POINT (-121.90249 37.33029)
6135,6135,-122.041192,38.243446,POINT (-122.04119 38.24345)


In [11]:
stops_loc_taps_df

Unnamed: 0,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding,...,platform_code,position,direction,stop_id,osm_node_id,shst_node_id,model_node_id,X,Y,tap_id
0,Grant St/Gill Dr,,37.984310,-122.037562,,,0.0,,,,...,,,,6359,57953402,de35e94fe56ab8b420ae52c9bd736aa6,3031741,-122.037351,37.984227,1830
1,Mitchell Dr/Oak Grove Rd,,37.932171,-122.019366,,,0.0,,,,...,,,,5952,4952813425,f22574708786d20e454884da84d474cc,3062312,-122.019329,37.932348,1971
2,Bollinger Canyon Rd/Main Branch Rd,,37.769419,-121.909048,,,0.0,,,,...,,,,6128,659026418,a4815ae72a3d56cff93a0739bae5507c,3064969,-121.909132,37.769538,1478
3,Bollinger Canyon Rd/Albion Rd,,37.766152,-121.905245,,,0.0,,,,...,,,,6129,659026630,2b9afa0d6c6fca4e0055b7ea2ba1f51e,3051011,-121.905310,37.766411,3071
4,Bollinger Canyon Rd/Stoneleaf Rd,,37.773160,-121.917894,,,0.0,,,,...,,,,6126,659026406,5994c393d2c4ffe74f08f951715e9561,3058393,-121.918163,37.773274,3940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21752,Richmond,,37.936766,-122.353896,56137,,,,,,...,,,,6674,,,3097284,-122.353896,37.936766,6132
21753,Sacramento,,38.584004,-121.500699,56133,,,,,,...,,,,6677,,,3547318,-121.500699,38.584004,6133
21754,San Jose,,37.329935,-121.902496,56146,,,,,,...,,,,6680,530429941,22ad1bd485b332c81ad2561e2bba9644,2010503,-121.902491,37.330286,6134
21755,Suisun-Fairfield,,38.243446,-122.041192,56135,,,,,,...,,,,6681,,,3547320,-122.041192,38.243446,6135


### k-means option 2

In [12]:
%%time
kmeans_fre_taps_gdf, stops_fre_taps_df = mtc.create_taps_kmeans_frequency_based(
    transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    bus_clusters = 6000,
)

Wall time: 4min 17s


In [13]:
kmeans_fre_taps_gdf

Unnamed: 0,tap_id,X,Y,geometry
0,0,-122.308045,37.938210,POINT (-122.30805 37.93821)
1,1,-121.891454,37.338309,POINT (-121.89145 37.33831)
2,2,-122.742060,38.426848,POINT (-122.74206 38.42685)
3,3,-122.175902,37.751655,POINT (-122.17590 37.75166)
4,4,-122.432299,37.711969,POINT (-122.43230 37.71197)
...,...,...,...,...
6132,6132,-122.353896,37.936766,POINT (-122.35390 37.93677)
6133,6133,-121.500699,38.584004,POINT (-121.50070 38.58400)
6134,6134,-121.902491,37.330286,POINT (-121.90249 37.33029)
6135,6135,-122.041192,38.243446,POINT (-122.04119 38.24345)


In [14]:
stops_fre_taps_df

Unnamed: 0,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding,...,position,direction,stop_id,osm_node_id,shst_node_id,model_node_id,X,Y,num_trip,tap_id
0,Grant St/Gill Dr,,37.984310,-122.037562,,,0.0,,,,...,,,6359,57953402,de35e94fe56ab8b420ae52c9bd736aa6,3031741,-122.037351,37.984227,14.000000,3441
1,Mitchell Dr/Oak Grove Rd,,37.932171,-122.019366,,,0.0,,,,...,,,5952,4952813425,f22574708786d20e454884da84d474cc,3062312,-122.019329,37.932348,20.000486,3761
2,Bollinger Canyon Rd/Main Branch Rd,,37.769419,-121.909048,,,0.0,,,,...,,,6128,659026418,a4815ae72a3d56cff93a0739bae5507c,3064969,-121.909132,37.769538,22.667153,2788
3,Bollinger Canyon Rd/Albion Rd,,37.766152,-121.905245,,,0.0,,,,...,,,6129,659026630,2b9afa0d6c6fca4e0055b7ea2ba1f51e,3051011,-121.905310,37.766411,22.667153,1131
4,Bollinger Canyon Rd/Stoneleaf Rd,,37.773160,-121.917894,,,0.0,,,,...,,,6126,659026406,5994c393d2c4ffe74f08f951715e9561,3058393,-121.918163,37.773274,22.667153,5547
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21752,Richmond,,37.936766,-122.353896,56137,,,,,,...,,,6674,,,3097284,-122.353896,37.936766,,6132
21753,Sacramento,,38.584004,-121.500699,56133,,,,,,...,,,6677,,,3547318,-121.500699,38.584004,,6133
21754,San Jose,,37.329935,-121.902496,56146,,,,,,...,,,6680,530429941,22ad1bd485b332c81ad2561e2bba9644,2010503,-121.902491,37.330286,,6134
21755,Suisun-Fairfield,,38.243446,-122.041192,56135,,,,,,...,,,6681,,,3547320,-122.041192,38.243446,,6135


### kmeans final option

In [15]:
%%time
kmeans_taps_gdf, stops_taps_df = mtc.create_taps_kmeans(
    transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    clusters = 6000,
)

Wall time: 4min 13s


In [16]:
kmeans_taps_gdf

Unnamed: 0,tap_id,X,Y,geometry
0,0,-122.206739,37.800475,POINT (-122.20674 37.80047)
1,1,-121.878186,37.347975,POINT (-121.87819 37.34797)
2,2,-122.460198,38.276453,POINT (-122.46020 38.27645)
3,3,-122.401455,37.686157,POINT (-122.40146 37.68616)
4,4,-122.034218,37.960194,POINT (-122.03422 37.96019)
...,...,...,...,...
5995,5995,-121.934372,37.706288,POINT (-121.93437 37.70629)
5996,5996,-122.698467,38.328935,POINT (-122.69847 38.32894)
5997,5997,-122.869304,38.606936,POINT (-122.86930 38.60694)
5998,5998,-122.684513,38.435187,POINT (-122.68451 38.43519)


In [18]:
stops_taps_df

Unnamed: 0,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding,...,platform_code,position,direction,stop_id,osm_node_id,shst_node_id,model_node_id,X,Y,tap_id
0,12th St. Oakland City Center,,37.803664,-122.271604,12TH,http://www.bart.gov/stations/12TH/,0.0,,,1.0,...,,,,5411,,,2625945,-122.271604,37.803664,2894
1,16th St. Mission,,37.765062,-122.419694,16TH,http://www.bart.gov/stations/16TH/,0.0,,,1.0,...,,,,5412,,,1027612,-122.419694,37.765062,3054
2,19th St. Oakland,,37.807870,-122.269029,19TH,http://www.bart.gov/stations/19TH/,0.0,,,1.0,...,,,,5413,,,2625944,-122.269029,37.807870,2220
3,19th St. Oakland,,37.807870,-122.269029,19TH,http://www.bart.gov/stations/19TH/,0.0,,,1.0,...,,,,5414,53077120,b40a224e561bcb18b9c1c6ed21b6b6bb,2504171,-122.269023,37.807778,2220
4,24th St. Mission,,37.752254,-122.418466,24TH,http://www.bart.gov/stations/24TH/,0.0,,,1.0,...,,,,5415,,,1027613,-122.418466,37.752254,2285
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21752,VALLEY AVE & VIA DE LOS MILAGROS NB,,37.674396,-121.898354,,,,,,,...,,,,21516,53023064,3c8ed8454fbf722f8c6d22c057360b0f,2550116,-121.898588,37.674179,1938
21753,MARTIN & MOHR,,37.682395,-121.862633,,,,,,,...,,,,21517,53037185,f55737df268513de5d5ff9ddda219205,2569722,-121.862624,37.681998,1881
21754,MARTIN & MOHR,,37.682208,-121.862656,,,,,,,...,,,,21518,53037185,f55737df268513de5d5ff9ddda219205,2569722,-121.862624,37.681998,1881
21755,WALNUT CREEK BART,,37.905224,-122.066914,,,,,,,...,,,,21519,2411611647,82fff27ccfa2c3be9ea3ad51c088685c,3046886,-122.066999,37.904910,1243


### TM2 legacy network taps and tap links

In [19]:
# read legacy TM2 network nodes nonMarin version
existing_network_node_gdf = gpd.read_file(root_dir + "/external/TM2_nonMarin/tm2_nodes.shp")

existing_network_node_gdf.crs = CRS("ESRI:102646")
existing_network_node_gdf = existing_network_node_gdf.to_crs({'init': 'epsg:4326'})

  return _prepare_from_string(" ".join(pjargs))


In [20]:
# get TAPS from the legacy network

TAP_N_list = list(range(90001, 99999)) + list(range(190001, 199999)) + list(range(290001, 299999))\
+ list(range(390001, 399999))\
+ list(range(490001, 499999)) + list(range(590001, 599999)) + list(range(690001, 699999)) + list(range(790001, 799999))\
+ list(range(890001, 899999))

existing_network_node_gdf["X"] = existing_network_node_gdf["geometry"].apply(lambda g: g.x)
existing_network_node_gdf["Y"] = existing_network_node_gdf["geometry"].apply(lambda g: g.y)

existing_taps_gdf = existing_network_node_gdf[existing_network_node_gdf.N.isin(TAP_N_list)].copy()

existing_taps_gdf.rename(columns = {"N" : "tap_id_tm2"}, inplace = True)

existing_taps_gdf[["tap_id_tm2", "X", "Y", "geometry"]]

Unnamed: 0,tap_id_tm2,X,Y,geometry
4789,90001,-122.485068,37.785822,POINT (-122.48507 37.78582)
4790,90002,-122.446404,37.775892,POINT (-122.44640 37.77589)
4791,90003,-122.396752,37.795590,POINT (-122.39675 37.79559)
4792,90004,-122.476274,37.754315,POINT (-122.47627 37.75432)
4793,90005,-122.466386,37.751010,POINT (-122.46639 37.75101)
...,...,...,...,...
50672,890193,-122.561657,37.976329,POINT (-122.56166 37.97633)
50673,890194,-122.510898,37.945639,POINT (-122.51090 37.94564)
50674,890195,-122.434522,37.869048,POINT (-122.43452 37.86905)
50675,890196,-122.454779,37.873311,POINT (-122.45478 37.87331)


In [21]:
%%time
# read legacy TM2 network links nonMarin version
existing_network_link_gdf = gpd.read_file(root_dir + "/external/TM2_nonMarin/tm2_links.shp")

existing_network_link_gdf.crs = CRS("ESRI:102646")
existing_network_link_gdf = existing_network_link_gdf.to_crs({'init': 'epsg:4326'})

  return _prepare_from_string(" ".join(pjargs))


Wall time: 2min 55s


In [31]:
# existing_network_link_gdf.ASSIGNABLE.value_counts()

1    1129308
0     275837
Name: ASSIGNABLE, dtype: int64

In [32]:
# existing_network_link_gdf.DELETE.value_counts()

0    754513
1    650632
Name: DELETE, dtype: int64

In [33]:
# pd.crosstab([existing_network_link_gdf.CNTYPE, existing_network_link_gdf.FT, existing_network_link_gdf.ASSIGNABLE], 
#             existing_network_link_gdf.DELETE)

Unnamed: 0_level_0,Unnamed: 1_level_0,DELETE,0,1
CNTYPE,FT,ASSIGNABLE,Unnamed: 3_level_1,Unnamed: 4_level_1
BIKE,0,0,0,6678
CRAIL,0,0,100,44
EXT,6,1,44,0
FERRY,0,0,6,46
HRAIL,0,0,90,0
LRAIL,0,0,846,18
MAZ,6,1,251493,944
PED,0,0,315,221590
TANA,1,1,583,25
TANA,2,0,4,0


In [34]:
# pd.crosstab(existing_network_link_gdf.ASSIGNABLE, existing_network_link_gdf.DELETE)

DELETE,0,1
ASSIGNABLE,Unnamed: 1_level_1,Unnamed: 2_level_1
0,44351,231486
1,710162,419146


In [22]:
# get TAP links from the legacy network

existing_taps_links_gdf = existing_network_link_gdf[existing_network_link_gdf.CNTYPE == "TAP"].copy()

# calculate link length, which represents distance to TAPs
geom_length = existing_taps_links_gdf[['geometry']].copy()
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

existing_taps_links_gdf["distance_to_tap"] = geom_length["length"]

print('max distance to tap: {} meters, or {} miles'.format(
    existing_taps_links_gdf["distance_to_tap"].max(),
    existing_taps_links_gdf["distance_to_tap"].max() * 0.000621371))

max distance to tap: 1841.5977926914074 meters, or 1.1443154620424525 miles


In [23]:
# tag tap_id and stop_id of TAP links

existing_taps_links_gdf["tap_id_tm2"] = np.where(existing_taps_links_gdf.A.isin(TAP_N_list), 
                                                 existing_taps_links_gdf.A, 
                                                 existing_taps_links_gdf.B)
existing_taps_links_gdf["stop_id"] = np.where(existing_taps_links_gdf.A.isin(TAP_N_list), 
                                              existing_taps_links_gdf.B, 
                                              existing_taps_links_gdf.A)

existing_taps_links_gdf = existing_taps_links_gdf.drop_duplicates(subset = ["tap_id_tm2", "stop_id"])

In [24]:
# add other node attributes for the "stop" end of the TAP links
existing_taps_links_gdf = pd.merge(existing_taps_links_gdf,
                                   existing_network_node_gdf[["N", "X", "Y"]].rename(columns = {"N" : "stop_id"}),
                                   how = "left",
                                   on = "stop_id")

In [25]:
# add link type and stop_source (for later comparison)
existing_taps_links_gdf["type"] = "stops"
existing_taps_links_gdf["stop_source"] = "tm2"

In [26]:
existing_taps_links_gdf[["tap_id_tm2", "stop_id", "X", "Y", "geometry", "distance_to_tap","type", "stop_source"]]

Unnamed: 0,tap_id_tm2,stop_id,X,Y,geometry,distance_to_tap,type,stop_source
0,90001,1007968,-122.485149,37.785750,"LINESTRING (-122.48507 37.78582, -122.48515 37...",11.661132,stops,tm2
1,90001,1015082,-122.484880,37.782011,"LINESTRING (-122.48507 37.78582, -122.48488 37...",459.509913,stops,tm2
2,90001,1029156,-122.485013,37.783883,"LINESTRING (-122.48507 37.78582, -122.48501 37...",233.700060,stops,tm2
3,90002,1010258,-122.444972,37.776956,"LINESTRING (-122.44640 37.77589, -122.44497 37...",187.494816,stops,tm2
4,90002,1031191,-122.446485,37.775820,"LINESTRING (-122.44640 37.77589, -122.44649 37...",11.659087,stops,tm2
...,...,...,...,...,...,...,...,...
22039,5030252,5032386,-122.517046,37.889899,"LINESTRING (-122.51705 37.88990, -122.51648 37...",133.105748,stops,tm2
22040,5030210,5032406,-122.504649,37.899227,"LINESTRING (-122.50465 37.89923, -122.50167 37...",308.748253,stops,tm2
22041,5019789,5032892,-122.529499,37.926422,"LINESTRING (-122.52950 37.92642, -122.52559 37...",373.033953,stops,tm2
22042,5030283,5032892,-122.529499,37.926422,"LINESTRING (-122.52950 37.92642, -122.52232 37...",684.956181,stops,tm2


### assemble data

In [27]:
# merge tap links (stop-tap mapping) of 3 k-mean calculations with stops_df from the working_scenario

consolidate_stops_taps_df = pd.merge(
    stops_df, 
    stops_loc_taps_df[["stop_id", "tap_id"]].rename(
        columns = {"tap_id" : "tap_id_location_based"}),     # k-mean option 1
    how = "left",
    on = "stop_id"
)

consolidate_stops_taps_df = pd.merge(
    consolidate_stops_taps_df, 
    stops_fre_taps_df[["stop_id", "num_trip", "tap_id"]].rename(
        columns = {"tap_id" : "tap_id_frequency_based"}),    # k-mean option 2
    how = "left",
    on = "stop_id"
)

consolidate_stops_taps_df = pd.merge(
    consolidate_stops_taps_df, 
    stops_taps_df[["stop_id", "tap_id"]].rename(
        columns = {"tap_id" : "tap_id_kmeans"}),            # kmeans final option
    how = "left",
    on = "stop_id"
)

consolidate_stops_taps_df["type"] = "stops"
consolidate_stops_taps_df["stop_source"] = "new"

In [28]:
consolidate_stops_taps_df

Unnamed: 0,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding,...,X,Y,drive_access,walk_access,tap_id_location_based,num_trip,tap_id_frequency_based,tap_id_kmeans,type,stop_source
0,12th St. Oakland City Center,,37.803664,-122.271604,12TH,http://www.bart.gov/stations/12TH/,0.0,,,1.0,...,-122.271604,37.803664,0,1,6000,,6000,2894,stops,new
1,16th St. Mission,,37.765062,-122.419694,16TH,http://www.bart.gov/stations/16TH/,0.0,,,1.0,...,-122.419694,37.765062,0,1,6001,,6001,3054,stops,new
2,19th St. Oakland,,37.807870,-122.269029,19TH,http://www.bart.gov/stations/19TH/,0.0,,,1.0,...,-122.269029,37.807870,0,1,6002,,6002,2220,stops,new
3,19th St. Oakland,,37.807870,-122.269029,19TH,http://www.bart.gov/stations/19TH/,0.0,,,1.0,...,-122.269023,37.807778,1,1,6003,,6003,2220,stops,new
4,24th St. Mission,,37.752254,-122.418466,24TH,http://www.bart.gov/stations/24TH/,0.0,,,1.0,...,-122.418466,37.752254,0,1,6004,,6004,2285,stops,new
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21752,VALLEY AVE & VIA DE LOS MILAGROS NB,,37.674396,-121.898354,,,,,,,...,-121.898588,37.674179,1,1,3763,18.666667,3995,1938,stops,new
21753,MARTIN & MOHR,,37.682395,-121.862633,,,,,,,...,-121.862624,37.681998,1,1,1587,3.000000,3320,1881,stops,new
21754,MARTIN & MOHR,,37.682208,-121.862656,,,,,,,...,-121.862624,37.681998,1,1,1587,2.000000,3320,1881,stops,new
21755,WALNUT CREEK BART,,37.905224,-122.066914,,,,,,,...,-122.066999,37.904910,1,1,876,15.000000,833,1243,stops,new


In [29]:
consolidate_stops_taps_df.columns

Index(['stop_name', 'stop_desc', 'stop_lat', 'stop_lon', 'zone_id', 'stop_url',
       'location_type', 'parent_station', 'stop_timezone',
       'wheelchair_boarding', 'stop_code', 'platform_code', 'position',
       'direction', 'stop_id', 'osm_node_id', 'shst_node_id', 'model_node_id',
       'X', 'Y', 'drive_access', 'walk_access', 'tap_id_location_based',
       'num_trip', 'tap_id_frequency_based', 'tap_id_kmeans', 'type',
       'stop_source'],
      dtype='object')

In [30]:
# merge taps of 3 k-mean calculations with existing taps in the legacy TM2 network

kmeans_loc_taps_gdf["tap_source"] = "kmeans_location_based"
kmeans_loc_taps_gdf.rename(columns = {"tap_id" :"tap_id_location_based"}, inplace = True)

kmeans_fre_taps_gdf["tap_source"] = "kmeans_frequency_based"
kmeans_fre_taps_gdf.rename(columns = {"tap_id" :"tap_id_frequency_based"}, inplace = True)

kmeans_taps_gdf["tap_source"] = "kmeans"
kmeans_taps_gdf.rename(columns = {"tap_id" :"tap_id_kmeans"}, inplace = True)

existing_taps_gdf["tap_source"] = "tm2"


taps_df = pd.concat([kmeans_loc_taps_gdf,
                     kmeans_fre_taps_gdf,
                     existing_taps_gdf[["tap_id_tm2", "X", "Y", "geometry", "tap_source"]],
                     kmeans_taps_gdf],
                   sort = False,
                   ignore_index = True)

taps_df["type"] = "taps"

In [31]:
# combine consolidated taps attributes with consolidated tap links
# the resulting dataframe has both stops (with attributes of the corresponding tap links) and taps (with only tap attributes)
out_df = pd.concat(
    [consolidate_stops_taps_df[["X", "Y", "stop_id", "stop_name", "num_trip",
                    "tap_id_location_based", "tap_id_frequency_based", 'tap_id_kmeans',"type", "stop_source"]],
    taps_df[["tap_id_location_based", "tap_id_frequency_based", 'tap_id_kmeans', "tap_id_tm2", "X", "Y", "tap_source", "type"]]],
    sort = False,
    ignore_index = True
)

In [32]:
# some stats:
print(out_df['type'].value_counts())

display(out_df.groupby(['type', 'tap_source'])['X'].count().reset_index())

print(out_df.num_trip.max())

print(out_df.columns)

taps     24490
stops    21757
Name: type, dtype: int64


Unnamed: 0,type,tap_source,X
0,taps,kmeans,6000
1,taps,kmeans_frequency_based,6137
2,taps,kmeans_location_based,6137
3,taps,tm2,6216


828.1066091236149
Index(['X', 'Y', 'stop_id', 'stop_name', 'num_trip', 'tap_id_location_based',
       'tap_id_frequency_based', 'tap_id_kmeans', 'type', 'stop_source',
       'tap_id_tm2', 'tap_source'],
      dtype='object')


In [33]:
# add tap link distance - based on the length of the lingstring, not distance along the network 

out_df = pd.merge(out_df, 
                  kmeans_taps_gdf[["tap_id_kmeans", "X", "Y"]].rename(columns = {"X" : "tap_X", "Y" : "tap_Y"}),
                  how = 'left',
                  on = ["tap_id_kmeans"])

out_df["geometry"] = out_df.apply(lambda x: LineString([Point(x.X, x.Y), Point(x.tap_X, x.tap_Y)]), axis = 1)

out_df = gpd.GeoDataFrame(out_df, geometry = out_df["geometry"], crs = CRS("EPSG:4326"))

geom_length = out_df[['geometry']].copy()
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

out_df["distance_to_tap"] = geom_length["length"]
out_df["distance_to_tap"] = np.where(out_df["type"] == "stops", 
                                     out_df["distance_to_tap"], 
                                     99999)

out_df.drop(["tap_X", "tap_Y", "geometry"], axis = 1, inplace = True)

In [34]:
# append tm2 tap distance

out_df = pd.concat([out_df, 
                    existing_taps_links_gdf[["tap_id_tm2", "stop_id", "X", "Y", "distance_to_tap", "type", "stop_source"]]],
                  sort = False,
                  ignore_index = True)

In [35]:
out_df.info()

display(out_df)

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 68291 entries, 0 to 68290
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   X                       68291 non-null  float64
 1   Y                       68291 non-null  float64
 2   stop_id                 43801 non-null  object 
 3   stop_name               21757 non-null  object 
 4   num_trip                21620 non-null  float64
 5   tap_id_location_based   27894 non-null  float64
 6   tap_id_frequency_based  27894 non-null  float64
 7   tap_id_kmeans           27757 non-null  float64
 8   type                    68291 non-null  object 
 9   stop_source             43801 non-null  object 
 10  tap_id_tm2              28260 non-null  float64
 11  tap_source              24490 non-null  object 
 12  distance_to_tap         68291 non-null  float64
dtypes: float64(8), object(5)
memory usage: 6.8+ MB


Unnamed: 0,X,Y,stop_id,stop_name,num_trip,tap_id_location_based,tap_id_frequency_based,tap_id_kmeans,type,stop_source,tap_id_tm2,tap_source,distance_to_tap
0,-122.271604,37.803664,5411,12th St. Oakland City Center,,6000.0,6000.0,2894.0,stops,new,,,66.785762
1,-122.419694,37.765062,5412,16th St. Mission,,6001.0,6001.0,3054.0,stops,new,,,54.875577
2,-122.269029,37.807870,5413,19th St. Oakland,,6002.0,6002.0,2220.0,stops,new,,,71.048299
3,-122.269023,37.807778,5414,19th St. Oakland,,6003.0,6003.0,2220.0,stops,new,,,61.281243
4,-122.418466,37.752254,5415,24th St. Mission,,6004.0,6004.0,2285.0,stops,new,,,53.881331
...,...,...,...,...,...,...,...,...,...,...,...,...,...
68286,-122.517046,37.889899,5032386,,,,,,stops,tm2,5030252.0,,133.105748
68287,-122.504649,37.899227,5032406,,,,,,stops,tm2,5030210.0,,308.748253
68288,-122.529499,37.926422,5032892,,,,,,stops,tm2,5019789.0,,373.033953
68289,-122.529499,37.926422,5032892,,,,,,stops,tm2,5030283.0,,684.956181


In [36]:
%%time
# add county

county_file = root_dir + "/external/county_boundaries/cb_2018_us_county_500k/cb_2018_us_county_500k.shp"
county_gdf = gpd.read_file(county_file)
county_gdf = county_gdf.to_crs("EPSG:4326")

out_df["geometry"] = out_df.apply(lambda x: Point(x.X, x.Y), axis = 1)
out_df = gpd.GeoDataFrame(out_df, geometry = out_df["geometry"], crs = CRS("EPSG:4326"))

out_df = gpd.sjoin(out_df, county_gdf[["NAME", "geometry"]], how = "left", op = "intersects")

Wall time: 31.7 s


In [37]:
out_df.NAME.value_counts()

Alameda          17057
Santa Clara      13616
San Francisco    10163
Contra Costa      9042
San Mateo         6709
Sonoma            5396
Solano            2977
Marin             2305
Napa               985
San Joaquin         12
Yolo                 8
Sacramento           4
Name: NAME, dtype: int64

In [38]:
# export for analysis
out_df.to_csv(output_dir + "/stops_and_taps.csv", index = False)

# Use the chosen method to create TAPs and write out 

In [39]:
%%time
tap_nodes_gdf, tap_links_gdf, tap_shapes_gdf = mtc.create_tap_nodes_and_links(
transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    num_taps = 6000,
)

2021-10-25 18:02:06, INFO: Adding centroid and centroid connector to standard network
2021-10-25 18:02:06, INFO: Lasso base directory set as: C:/Users/ywang/Documents/GitHub/Lasso
2021-10-25 18:02:06, INFO: Lasso base directory set as: C:/Users/ywang/Documents/GitHub/Lasso
2021-10-25 18:02:34, INFO: Finished adding centroid and centroid connectors
Wall time: 5min 12s


In [40]:
print("-------write out pickle---------")

tap_links_gdf.to_pickle(output_dir + "/tap_link.pickle")
tap_shapes_gdf.to_pickle(output_dir + "/tap_shape.pickle")
tap_nodes_gdf.to_pickle(output_dir + "/tap_node.pickle")

-------write out pickle---------


# TAP file

In [131]:
tap_node_gdf = pd.read_pickle(output_dir + "/tap_node.pickle")

In [132]:
tap_shape_gdf = pd.read_pickle(output_dir + "/tap_shape.pickle")

In [135]:
mode_crosswalk = pd.read_csv(parameters.mode_crosswalk_file)
display(mode_crosswalk.head())
mode_crosswalk.drop_duplicates(subset = ["agency_raw_name", "route_type", "is_express_bus"], inplace = True)

Unnamed: 0,agency_raw_name,agency_name,agency_id,TM2_operator,route_type,TM2_mode,TM2_line_haul_name,TM2_faresystem,is_express_bus,VEHTYPE
0,ACE_2017_3_20,ACE Altamont Corridor Express,CE,5,2,133,Commuter rail,44,0,Unknown Train
1,ACTransit_2015_8_14,AC Transit,AC Transit,30,3,30,Local bus,6,0,Motor Standard Bus
2,ACTransit_2015_8_14,AC Transit,AC Transit,30,3,84,Express bus,28,1,AC Plus Bus
3,BART_2015_8_3,Bay Area Rapid Transit,BART,26,1,120,Heavy rail,41,0,10 Car BART
4,Blue&Gold_gtfs_10_4_2017,Blue & Gold Fleet,BG,3,4,103,Ferry service,37,0,Ferry small


In [136]:
v_00_scenario.transit_net.feed.routes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 701 entries, 0 to 700
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   route_id_original       701 non-null    object
 1   agency_id               526 non-null    object
 2   route_short_name        665 non-null    object
 3   route_long_name         679 non-null    object
 4   route_desc              14 non-null     object
 5   route_type              701 non-null    int64 
 6   route_url               217 non-null    object
 7   route_color             260 non-null    object
 8   route_text_color        243 non-null    object
 9   agency_raw_name         701 non-null    object
 10  route_sort_order        95 non-null     object
 11  min_headway_minutes     32 non-null     object
 12  eligibility_restricted  4 non-null      object
 13  continuous_pickup       4 non-null      object
 14  continuous_drop_off     4 non-null      object
 15  route_

In [137]:
trip_df = v_00_scenario.transit_net.feed.trips.copy()

trip_df = pd.merge(trip_df, v_00_scenario.transit_net.feed.routes.drop("agency_raw_name", axis = 1), how="left", on="route_id")

trip_df = pd.merge(trip_df, v_00_scenario.transit_net.feed.agency[["agency_name", "agency_raw_name", "agency_id"]], 
                   how = "left", on = ["agency_raw_name", "agency_id"])

# identify express bus
trip_df["is_express_bus"] = trip_df.apply(lambda x: mtc._is_express_bus(x), axis = 1)
trip_df.drop("agency_name", axis = 1 , inplace = True)

trip_df = pd.merge(
        trip_df,
        mode_crosswalk.drop("agency_id", axis = 1),
        how = "left",
        on = ["agency_raw_name", "route_type", "is_express_bus"]
    )

In [138]:
stop_times_df = v_00_scenario.transit_net.feed.stop_times.copy()

stops_df = stop_times_df.groupby(["stop_id", "trip_id"])["stop_sequence"].count().reset_index().drop("stop_sequence", axis = 1)

In [139]:
stops_df = pd.merge(stops_df, trip_df[["trip_id", "TM2_line_haul_name"]], how = "left", on = ["trip_id"])

In [140]:
stops_df = pd.merge(stops_df,
                    v_00_scenario.transit_net.feed.stops[["stop_id", "model_node_id"]],
                   how = "left",
                   on = "stop_id")

In [141]:
stops_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 131338 entries, 0 to 131337
Data columns (total 4 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   stop_id             131338 non-null  object
 1   trip_id             131338 non-null  object
 2   TM2_line_haul_name  131338 non-null  object
 3   model_node_id       131338 non-null  object
dtypes: object(4)
memory usage: 5.0+ MB


In [142]:
stops_df["model_node_id"] = stops_df["model_node_id"].astype(int)

In [143]:
tap_df = pd.merge(tap_node_gdf, 
                  tap_shape_gdf[["tap_id", "model_node_id"]].rename(columns = {"model_node_id" : "stop_model_node_id"}), 
                  how = "left", on = "tap_id")

tap_df = pd.merge(tap_df,
                  stops_df[["model_node_id", "TM2_line_haul_name"]].rename(columns = {"model_node_id" : "stop_model_node_id"}),
                  how = "left",
                  on = "stop_model_node_id")

In [144]:
tap_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 131338 entries, 0 to 131337
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype   
---  ------                 --------------   -----   
 0   tap_id                 131338 non-null  int64   
 1   X                      131338 non-null  float64 
 2   Y                      131338 non-null  float64 
 3   geometry               131338 non-null  geometry
 4   index_right            130012 non-null  float64 
 5   county                 131338 non-null  object  
 6   tap_node_county_start  131338 non-null  int64   
 7   model_node_id          131338 non-null  int64   
 8   stop_model_node_id     131338 non-null  int32   
 9   TM2_line_haul_name     131338 non-null  object  
dtypes: float64(3), geometry(1), int32(1), int64(3), object(2)
memory usage: 10.5+ MB


In [145]:
tap_df.TM2_line_haul_name.unique()

array(['Local bus', 'Express bus', 'Light rail', 'Commuter rail',
       'Ferry service', 'Heavy rail'], dtype=object)

In [146]:
line_haul_name_dict = {'Local bus' : 1, 'Express bus' : 2, 'Commuter rail' : 6, 'Light rail' : 4,
       'Heavy rail' : 5, 'Ferry service' : 3}

In [147]:
tap_df["mode"] = tap_df["TM2_line_haul_name"].map(line_haul_name_dict)

In [148]:
out_df = tap_df.groupby(["tap_id", "county", "model_node_id", "X", "Y"])["mode"].apply(lambda x: list(set(x))).reset_index()

In [149]:
out_df = out_df.sort_values(by = "model_node_id")

In [150]:
out_df['county'] = out_df['county'].map(parameters.county_code_dict)

In [151]:
out_df.rename(columns = {"tap_id" : "OBJECTID", "model_node_id" : "N", "X" : "long", "Y" : "lat"},inplace = True)

In [152]:
out_df["mode"] = out_df["mode"].apply(lambda x: ",".join(map(str, x)))

In [153]:
out_df.to_csv(output_dir + "/tap_node.csv", index = False)