In [1]:
from pyproj import CRS
from shapely.geometry import LineString, Point

In [2]:
import os
import sys
import yaml
import pickle
import glob

import pandas as pd
import geopandas as gpd
import numpy as np

from network_wrangler import RoadwayNetwork
from network_wrangler import TransitNetwork
from network_wrangler import ProjectCard
from network_wrangler import Scenario
from network_wrangler import WranglerLogger

from lasso import ModelRoadwayNetwork
from lasso import StandardTransit
from lasso import Parameters
from lasso import mtc

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
root_dir = "D:/github/travel-model-two-networks"
input_dir = os.path.join(root_dir, 'data', 'processed', 'version_03')
output_dir = os.path.join(root_dir, 'data', 'interim', 'step9_taps')

lasso_dir = "Z:/Data/Users/Sijia/MTC/github/Lasso"

In [5]:
parameters = Parameters(lasso_base_dir = lasso_dir)

2021-02-16 09:58:37, INFO: Lasso base directory set as: Z:/Data/Users/Sijia/MTC/github/Lasso
2021-02-16 09:58:37, INFO: Lasso base directory set as: Z:/Data/Users/Sijia/MTC/github/Lasso


In [6]:
version_00_pickle_file_name = os.path.join(input_dir, 'working_scenario_00.pickle')
v_00_scenario = pickle.load(open(version_00_pickle_file_name, 'rb'))

In [7]:
v_00_scenario.transit_net.feed.stops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21757 entries, 0 to 21756
Data columns (total 18 columns):
stop_name              21757 non-null object
stop_desc              409 non-null object
stop_lat               21757 non-null float64
stop_lon               21757 non-null float64
zone_id                8232 non-null object
stop_url               261 non-null object
location_type          5475 non-null float64
parent_station         249 non-null object
stop_timezone          126 non-null object
wheelchair_boarding    377 non-null float64
stop_code              15289 non-null object
platform_code          52 non-null object
position               0 non-null object
direction              0 non-null object
stop_id                21757 non-null object
osm_node_id            21093 non-null object
shst_node_id           21093 non-null object
model_node_id          21757 non-null object
dtypes: float64(4), object(14)
memory usage: 3.0+ MB


In [8]:
v_00_scenario.transit_net.feed.stop_times.stop_id.nunique()

21757

# stops that are not walk accessible

In [10]:
stops_df = v_00_scenario.transit_net.feed.stops.copy()
stops_df["model_node_id"] = stops_df["model_node_id"].astype(int)

stops_df = pd.merge(stops_df, 
                    v_00_scenario.road_net.nodes_df[["model_node_id", "X", "Y", "drive_access", "walk_access"]], 
                    how = "left", 
                    on = "model_node_id")

In [11]:
stops_df[stops_df.walk_access == 0].shape

(0, 22)

stops_df[stops_df.walk_access == 0].to_csv(output_dir + "/stops_walk_inaccessible.csv", index = False)

In [12]:
stops_df[stops_df.walk_access == 0].model_node_id.nunique()

0

v_00_scenario.road_net.nodes_df[v_00_scenario.road_net.nodes_df.model_node_id.isin(
    stops_df[stops_df.walk_access == 0].model_node_id)
].to_file(output_dir + "transit_node_non_walk.geojson", driver = "GeoJSON")

In [13]:
v_00_scenario.road_net.links_df[(v_00_scenario.road_net.links_df.A.isin(stops_df[stops_df.walk_access == 0].model_node_id)) |
                                (v_00_scenario.road_net.links_df.B.isin(stops_df[stops_df.walk_access == 0].model_node_id))
                               ].roadway.value_counts()

Series([], Name: roadway, dtype: int64)

# k means option1

In [12]:
kmeans_loc_taps_gdf, stops_loc_taps_df = mtc.create_taps_kmeans_location_based(
    transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    bus_clusters = 6000,
)

In [14]:
kmeans_loc_taps_gdf

Unnamed: 0,tap_id,X,Y,geometry
0,0,-121.903459,37.328670,POINT (-121.90346 37.32867)
1,1,-122.277745,37.824577,POINT (-122.27774 37.82458)
2,2,-122.705219,38.348600,POINT (-122.70522 38.34860)
3,3,-121.992661,37.976040,POINT (-121.99266 37.97604)
4,4,-122.080740,37.596070,POINT (-122.08074 37.59607)
...,...,...,...,...
6132,6132,-122.353896,37.936766,POINT (-122.35390 37.93677)
6133,6133,-121.500699,38.584004,POINT (-121.50070 38.58400)
6134,6134,-121.902491,37.330286,POINT (-121.90249 37.33029)
6135,6135,-122.041192,38.243446,POINT (-122.04119 38.24345)


# k means option 2

In [15]:
kmeans_fre_taps_gdf, stops_fre_taps_df = mtc.create_taps_kmeans_frequency_based(
    transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    bus_clusters = 6000,
)

In [16]:
kmeans_fre_taps_gdf

Unnamed: 0,tap_id,X,Y,geometry
0,0,-121.965572,37.369921,POINT (-121.96557 37.36992)
1,1,-122.259849,37.875639,POINT (-122.25985 37.87564)
2,2,-122.667683,38.334933,POINT (-122.66768 38.33493)
3,3,-122.500296,37.747620,POINT (-122.50030 37.74762)
4,4,-122.008654,37.987623,POINT (-122.00865 37.98762)
...,...,...,...,...
6132,6132,-122.353896,37.936766,POINT (-122.35390 37.93677)
6133,6133,-121.500699,38.584004,POINT (-121.50070 38.58400)
6134,6134,-121.902491,37.330286,POINT (-121.90249 37.33029)
6135,6135,-122.041192,38.243446,POINT (-122.04119 38.24345)


# TM2 taps

In [31]:
existing_network_node_gdf = gpd.read_file(root_dir + "/data/external/TM2_nonMarin/tm2_nodes.shp")

existing_network_node_gdf.crs = CRS("ESRI:102646")

existing_network_node_gdf = existing_network_node_gdf.to_crs({'init': 'epsg:4326'})

TAP_N_list = list(range(90001, 99999)) + list(range(190001, 199999)) + list(range(290001, 299999))\
+ list(range(390001, 399999))\
+ list(range(490001, 499999)) + list(range(590001, 599999)) + list(range(690001, 699999)) + list(range(790001, 799999))\
+ list(range(890001, 899999))

  return _prepare_from_string(" ".join(pjargs))


In [33]:
existing_network_node_gdf.FAREZONE.value_counts()

0     529470
4       3294
2       1528
5        498
13       253
       ...  
69         1
70         1
71         1
72         1
68         1
Name: FAREZONE, Length: 105, dtype: int64

In [18]:
existing_network_node_gdf["X"] = existing_network_node_gdf["geometry"].apply(lambda g: g.x)
existing_network_node_gdf["Y"] = existing_network_node_gdf["geometry"].apply(lambda g: g.y)

existing_taps_gdf = existing_network_node_gdf[existing_network_node_gdf.N.isin(TAP_N_list)].copy()

In [19]:
existing_taps_gdf.rename(columns = {"N" : "tap_id_tm2"}, inplace = True)

In [20]:
existing_taps_gdf[["tap_id_tm2", "X", "Y", "geometry"]]

Unnamed: 0,tap_id_tm2,X,Y,geometry
4789,90001,-122.485068,37.785822,POINT (-122.48507 37.78582)
4790,90002,-122.446404,37.775892,POINT (-122.44640 37.77589)
4791,90003,-122.396752,37.795590,POINT (-122.39675 37.79559)
4792,90004,-122.476274,37.754315,POINT (-122.47627 37.75432)
4793,90005,-122.466386,37.751010,POINT (-122.46639 37.75101)
...,...,...,...,...
50672,890193,-122.561657,37.976329,POINT (-122.56166 37.97633)
50673,890194,-122.510898,37.945639,POINT (-122.51090 37.94564)
50674,890195,-122.434522,37.869048,POINT (-122.43452 37.86905)
50675,890196,-122.454779,37.873311,POINT (-122.45478 37.87331)


In [21]:
root_dir = "Z:/Data/Users/Sijia/MTC/github/travel-model-two-networks"

In [22]:
existing_network_link_gdf = gpd.read_file(root_dir + "/data/external/TM2_nonMarin/tm2_links.shp")

existing_network_link_gdf.crs = CRS("ESRI:102646")

existing_network_link_gdf = existing_network_link_gdf.to_crs({'init': 'epsg:4326'})

  return _prepare_from_string(" ".join(pjargs))


In [23]:
existing_network_link_gdf.columns

Index(['A', 'B', 'NUMLANES', 'F_JNCTID', 'T_JNCTID', 'FRC', 'NAME', 'FREEWAY',
       'TOLLRD', 'ONEWAY', 'KPH', 'MINUTES', 'CARRIAGE', 'LANES', 'RAMP',
       'SPEEDCAT', 'FEET', 'ASSIGNABLE', 'CNTYPE', 'TRANSIT', 'HIGHWAYT',
       'B_CLASS', 'GRADE_CAT', 'PED_FLAG', 'BIKEPEDOK', 'PEMSID', 'PEMSLANES',
       'RTEDIR', 'PEMSDIST', 'REPRIORITIZ', 'TMCLOCREF', 'TAP_DRIVE', 'FT',
       'FFS', 'USECLASS', 'TOLLBOOTH', 'DANGLING', 'HASTRANSIT', 'DELETE',
       'TOLLSEG', 'geometry'],
      dtype='object')

In [25]:
existing_network_link_gdf.ASSIGNABLE.value_counts()

1    1129308
0     275837
Name: ASSIGNABLE, dtype: int64

In [24]:
existing_network_link_gdf.DELETE.value_counts()

0    754513
1    650632
Name: DELETE, dtype: int64

In [30]:
pd.crosstab([existing_network_link_gdf.CNTYPE, existing_network_link_gdf.FT, existing_network_link_gdf.ASSIGNABLE], 
            existing_network_link_gdf.DELETE)

Unnamed: 0_level_0,Unnamed: 1_level_0,DELETE,0,1
CNTYPE,FT,ASSIGNABLE,Unnamed: 3_level_1,Unnamed: 4_level_1
BIKE,0,0,0,6678
CRAIL,0,0,100,44
EXT,6,1,44,0
FERRY,0,0,6,46
HRAIL,0,0,90,0
LRAIL,0,0,846,18
MAZ,6,1,251493,944
PED,0,0,315,221590
TANA,1,1,583,25
TANA,2,0,4,0


In [27]:
pd.crosstab(existing_network_link_gdf.ASSIGNABLE, existing_network_link_gdf.DELETE)

DELETE,0,1
ASSIGNABLE,Unnamed: 1_level_1,Unnamed: 2_level_1
0,44351,231486
1,710162,419146


In [22]:
existing_taps_links_gdf = existing_network_link_gdf[existing_network_link_gdf.CNTYPE == "TAP"].copy()

In [23]:
geom_length = existing_taps_links_gdf[['geometry']].copy()
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

existing_taps_links_gdf["distance_to_tap"] = geom_length["length"]

In [24]:
existing_taps_links_gdf["distance_to_tap"].max()

1841.5977926914074

In [27]:
existing_taps_links_gdf["tap_id_tm2"] = np.where(existing_taps_links_gdf.A.isin(TAP_N_list), 
                                                 existing_taps_links_gdf.A, 
                                                 existing_taps_links_gdf.B)
existing_taps_links_gdf["stop_id"] = np.where(existing_taps_links_gdf.A.isin(TAP_N_list), 
                                              existing_taps_links_gdf.B, 
                                              existing_taps_links_gdf.A)

existing_taps_links_gdf = existing_taps_links_gdf.drop_duplicates(subset = ["tap_id_tm2", "stop_id"])

In [28]:
existing_taps_links_gdf = pd.merge(existing_taps_links_gdf,
                                   existing_network_node_gdf[["N", "X", "Y"]].rename(columns = {"N" : "stop_id"}),
                                   how = "left",
                                   on = "stop_id")

In [29]:
existing_taps_links_gdf["type"] = "stops"
existing_taps_links_gdf["stop_source"] = "tm2"

In [30]:
existing_taps_links_gdf[["tap_id_tm2", "stop_id", "X", "Y", "geometry", "distance_to_tap","type", "stop_source"]]

Unnamed: 0,tap_id_tm2,stop_id,X,Y,geometry,distance_to_tap,type,stop_source
0,90001,1007968,-122.485149,37.785750,"LINESTRING (-122.48507 37.78582, -122.48515 37...",11.661132,stops,tm2
1,90001,1015082,-122.484880,37.782011,"LINESTRING (-122.48507 37.78582, -122.48488 37...",459.509913,stops,tm2
2,90001,1029156,-122.485013,37.783883,"LINESTRING (-122.48507 37.78582, -122.48501 37...",233.700060,stops,tm2
3,90002,1010258,-122.444972,37.776956,"LINESTRING (-122.44640 37.77589, -122.44497 37...",187.494816,stops,tm2
4,90002,1031191,-122.446485,37.775820,"LINESTRING (-122.44640 37.77589, -122.44649 37...",11.659087,stops,tm2
...,...,...,...,...,...,...,...,...
22039,5030252,5032386,-122.517046,37.889899,"LINESTRING (-122.51705 37.88990, -122.51648 37...",133.105748,stops,tm2
22040,5030210,5032406,-122.504649,37.899227,"LINESTRING (-122.50465 37.89923, -122.50167 37...",308.748253,stops,tm2
22041,5019789,5032892,-122.529499,37.926422,"LINESTRING (-122.52950 37.92642, -122.52559 37...",373.033953,stops,tm2
22042,5030283,5032892,-122.529499,37.926422,"LINESTRING (-122.52950 37.92642, -122.52232 37...",684.956181,stops,tm2


# kmeans final

In [31]:
kmeans_taps_gdf, stops_taps_df = mtc.create_taps_kmeans(
    transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    clusters = 6000,
)

In [32]:
kmeans_taps_gdf

Unnamed: 0,tap_id,X,Y,geometry
0,0,-122.261246,37.880199,POINT (-122.26125 37.88020)
1,1,-121.984496,37.403095,POINT (-121.98450 37.40309)
2,2,-122.691635,38.443438,POINT (-122.69164 38.44344)
3,3,-122.445237,37.758698,POINT (-122.44524 37.75870)
4,4,-121.778669,37.958568,POINT (-121.77867 37.95857)
...,...,...,...,...
5995,5995,-122.248617,38.008294,POINT (-122.24862 38.00829)
5996,5996,-122.340309,37.581006,POINT (-122.34031 37.58101)
5997,5997,-122.119837,37.643293,POINT (-122.11984 37.64329)
5998,5998,-122.445842,37.767286,POINT (-122.44584 37.76729)


# assemble data

In [33]:
consolidate_stops_taps_df = pd.merge(
    stops_df, 
    stops_loc_taps_df[["stop_id", "tap_id"]].rename(columns = {"tap_id" : "tap_id_location_based"}),
    how = "left",
    on = "stop_id"
)

consolidate_stops_taps_df = pd.merge(
    consolidate_stops_taps_df, 
    stops_fre_taps_df[["stop_id", "num_trip", "tap_id"]].rename(columns = {"tap_id" : "tap_id_frequency_based"}),
    how = "left",
    on = "stop_id"
)

consolidate_stops_taps_df = pd.merge(
    consolidate_stops_taps_df, 
    stops_taps_df[["stop_id", "tap_id"]].rename(columns = {"tap_id" : "tap_id_kmeans"}),
    how = "left",
    on = "stop_id"
)

In [34]:
consolidate_stops_taps_df

Unnamed: 0,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding,...,shst_node_id,model_node_id,X,Y,drive_access,walk_access,tap_id_location_based,num_trip,tap_id_frequency_based,tap_id_kmeans
0,12th St. Oakland City Center,,37.803664,-122.271604,12TH,http://www.bart.gov/stations/12TH/,0.0,,,1.0,...,,2625945,-122.271604,37.803664,0,1,6000,,6000,1474
1,16th St. Mission,,37.765062,-122.419694,16TH,http://www.bart.gov/stations/16TH/,0.0,,,1.0,...,,1027612,-122.419694,37.765062,0,1,6001,,6001,2470
2,19th St. Oakland,,37.807870,-122.269029,19TH,http://www.bart.gov/stations/19TH/,0.0,,,1.0,...,,2625944,-122.269029,37.807870,0,1,6002,,6002,502
3,19th St. Oakland,,37.807870,-122.269029,19TH,http://www.bart.gov/stations/19TH/,0.0,,,1.0,...,b40a224e561bcb18b9c1c6ed21b6b6bb,2504171,-122.269023,37.807778,1,1,6003,,6003,502
4,24th St. Mission,,37.752254,-122.418466,24TH,http://www.bart.gov/stations/24TH/,0.0,,,1.0,...,,1027613,-122.418466,37.752254,0,1,6004,,6004,3363
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21752,VALLEY AVE & VIA DE LOS MILAGROS NB,,37.674396,-121.898354,,,,,,,...,3c8ed8454fbf722f8c6d22c057360b0f,2550116,-121.898588,37.674179,1,1,3470,18.666667,3663,4013
21753,MARTIN & MOHR,,37.682395,-121.862633,,,,,,,...,f55737df268513de5d5ff9ddda219205,2569722,-121.862624,37.681998,1,1,2292,3.000000,2275,1970
21754,MARTIN & MOHR,,37.682208,-121.862656,,,,,,,...,f55737df268513de5d5ff9ddda219205,2569722,-121.862624,37.681998,1,1,2292,2.000000,2275,1970
21755,WALNUT CREEK BART,,37.905224,-122.066914,,,,,,,...,82fff27ccfa2c3be9ea3ad51c088685c,3046886,-122.066999,37.904910,1,1,1979,15.000000,769,2389


In [35]:
consolidate_stops_taps_df["type"] = "stops"
consolidate_stops_taps_df["stop_source"] = "new"

In [36]:
consolidate_stops_taps_df.columns

Index(['stop_name', 'stop_desc', 'stop_lat', 'stop_lon', 'zone_id', 'stop_url',
       'location_type', 'parent_station', 'stop_timezone',
       'wheelchair_boarding', 'stop_code', 'platform_code', 'position',
       'direction', 'stop_id', 'osm_node_id', 'shst_node_id', 'model_node_id',
       'X', 'Y', 'drive_access', 'walk_access', 'tap_id_location_based',
       'num_trip', 'tap_id_frequency_based', 'tap_id_kmeans', 'type',
       'stop_source'],
      dtype='object')

In [37]:
kmeans_loc_taps_gdf["tap_source"] = "kmeans_location_based"
kmeans_loc_taps_gdf.rename(columns = {"tap_id" :"tap_id_location_based"}, inplace = True)

kmeans_fre_taps_gdf["tap_source"] = "kmeans_frequency_based"
kmeans_fre_taps_gdf.rename(columns = {"tap_id" :"tap_id_frequency_based"}, inplace = True)

kmeans_taps_gdf["tap_source"] = "kmeans"
kmeans_taps_gdf.rename(columns = {"tap_id" :"tap_id_kmeans"}, inplace = True)

existing_taps_gdf["tap_source"] = "tm2"

In [38]:
taps_df = pd.concat([kmeans_loc_taps_gdf, kmeans_fre_taps_gdf, existing_taps_gdf[["tap_id_tm2", "X", "Y", "geometry", "tap_source"]], kmeans_taps_gdf],
                   sort = False,
                   ignore_index = True)

In [39]:
taps_df["type"] = "taps"

In [40]:
taps_df[taps_df.tap_source == "tm2"]

Unnamed: 0,tap_id_location_based,X,Y,geometry,tap_source,tap_id_frequency_based,tap_id_tm2,tap_id_kmeans,type
12274,,-122.485068,37.785822,POINT (-122.48507 37.78582),tm2,,90001.0,,taps
12275,,-122.446404,37.775892,POINT (-122.44640 37.77589),tm2,,90002.0,,taps
12276,,-122.396752,37.795590,POINT (-122.39675 37.79559),tm2,,90003.0,,taps
12277,,-122.476274,37.754315,POINT (-122.47627 37.75432),tm2,,90004.0,,taps
12278,,-122.466386,37.751010,POINT (-122.46639 37.75101),tm2,,90005.0,,taps
...,...,...,...,...,...,...,...,...,...
18485,,-122.561657,37.976329,POINT (-122.56166 37.97633),tm2,,890193.0,,taps
18486,,-122.510898,37.945639,POINT (-122.51090 37.94564),tm2,,890194.0,,taps
18487,,-122.434522,37.869048,POINT (-122.43452 37.86905),tm2,,890195.0,,taps
18488,,-122.454779,37.873311,POINT (-122.45478 37.87331),tm2,,890196.0,,taps


In [41]:
out_df = pd.concat(
    [consolidate_stops_taps_df[["X", "Y", "stop_id", "stop_name", "num_trip",
                    "tap_id_location_based", "tap_id_frequency_based", 'tap_id_kmeans',"type", "stop_source"]],
    taps_df[["tap_id_location_based", "tap_id_frequency_based", 'tap_id_kmeans', "tap_id_tm2", "X", "Y", "tap_source", "type"]]],
    sort = False,
    ignore_index = True
)

In [42]:
out_df[out_df.type=="stops"]

Unnamed: 0,X,Y,stop_id,stop_name,num_trip,tap_id_location_based,tap_id_frequency_based,tap_id_kmeans,type,stop_source,tap_id_tm2,tap_source
0,-122.271604,37.803664,5411,12th St. Oakland City Center,,6000.0,6000.0,1474.0,stops,new,,
1,-122.419694,37.765062,5412,16th St. Mission,,6001.0,6001.0,2470.0,stops,new,,
2,-122.269029,37.807870,5413,19th St. Oakland,,6002.0,6002.0,502.0,stops,new,,
3,-122.269023,37.807778,5414,19th St. Oakland,,6003.0,6003.0,502.0,stops,new,,
4,-122.418466,37.752254,5415,24th St. Mission,,6004.0,6004.0,3363.0,stops,new,,
...,...,...,...,...,...,...,...,...,...,...,...,...
21752,-121.898588,37.674179,21516,VALLEY AVE & VIA DE LOS MILAGROS NB,18.666667,3470.0,3663.0,4013.0,stops,new,,
21753,-121.862624,37.681998,21517,MARTIN & MOHR,3.000000,2292.0,2275.0,1970.0,stops,new,,
21754,-121.862624,37.681998,21518,MARTIN & MOHR,2.000000,2292.0,2275.0,1970.0,stops,new,,
21755,-122.066999,37.904910,21519,WALNUT CREEK BART,15.000000,1979.0,769.0,2389.0,stops,new,,


In [43]:
out_df.num_trip.max()

828.1066091236149

# stats

In [44]:
# add tap distance

out_df = pd.merge(out_df, 
                  kmeans_taps_gdf[["tap_id_kmeans", "X", "Y"]].rename(columns = {"X" : "tap_X", "Y" : "tap_Y"}),
                  how = 'left',
                  on = ["tap_id_kmeans"])

out_df["geometry"] = out_df.apply(lambda x: LineString([Point(x.X, x.Y), Point(x.tap_X, x.tap_Y)]), axis = 1)

In [45]:
out_df = gpd.GeoDataFrame(out_df, geometry = out_df["geometry"], crs = CRS("EPSG:4326"))

In [46]:
geom_length = out_df[['geometry']].copy()
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

out_df["distance_to_tap"] = geom_length["length"]

In [47]:
out_df

Unnamed: 0,X,Y,stop_id,stop_name,num_trip,tap_id_location_based,tap_id_frequency_based,tap_id_kmeans,type,stop_source,tap_id_tm2,tap_source,tap_X,tap_Y,geometry,distance_to_tap
0,-122.271604,37.803664,5411,12th St. Oakland City Center,,6000.0,6000.0,1474.0,stops,new,,,-122.271545,37.804217,"LINESTRING (-122.27160 37.80366, -122.27154 37...",66.785762
1,-122.419694,37.765062,5412,16th St. Mission,,6001.0,6001.0,2470.0,stops,new,,,-122.420130,37.765358,"LINESTRING (-122.41969 37.76506, -122.42013 37...",54.875577
2,-122.269029,37.807870,5413,19th St. Oakland,,6002.0,6002.0,502.0,stops,new,,,-122.269316,37.807325,"LINESTRING (-122.26903 37.80787, -122.26932 37...",71.048299
3,-122.269023,37.807778,5414,19th St. Oakland,,6003.0,6003.0,502.0,stops,new,,,-122.269316,37.807325,"LINESTRING (-122.26902 37.80778, -122.26932 37...",61.281243
4,-122.418466,37.752254,5415,24th St. Mission,,6004.0,6004.0,3363.0,stops,new,,,-122.418507,37.752700,"LINESTRING (-122.41847 37.75225, -122.41851 37...",53.881331
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46242,-122.248617,38.008294,,,,,,5995.0,taps,,,kmeans,-122.248617,38.008294,"LINESTRING (-122.24862 38.00829, -122.24862 38...",0.000000
46243,-122.340309,37.581006,,,,,,5996.0,taps,,,kmeans,-122.340309,37.581006,"LINESTRING (-122.34031 37.58101, -122.34031 37...",0.000000
46244,-122.119837,37.643293,,,,,,5997.0,taps,,,kmeans,-122.119837,37.643293,"LINESTRING (-122.11984 37.64329, -122.11984 37...",0.000000
46245,-122.445842,37.767286,,,,,,5998.0,taps,,,kmeans,-122.445842,37.767286,"LINESTRING (-122.44584 37.76729, -122.44584 37...",0.000000


In [48]:
out_df["distance_to_tap"] = np.where(out_df["type"] == "stops", 
                                     out_df["distance_to_tap"], 
                                     99999)

out_df.drop(["tap_X", "tap_Y", "geometry"], axis = 1, inplace = True)

In [49]:
# append tm2 tap distance

out_df = pd.concat([out_df, 
                    existing_taps_links_gdf[["tap_id_tm2", "stop_id", "X", "Y", "distance_to_tap", "type", "stop_source"]]],
                  sort = False,
                  ignore_index = True)

In [50]:
out_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 68291 entries, 0 to 68290
Data columns (total 13 columns):
X                         68291 non-null float64
Y                         68291 non-null float64
stop_id                   43801 non-null object
stop_name                 21757 non-null object
num_trip                  21620 non-null float64
tap_id_location_based     27894 non-null float64
tap_id_frequency_based    27894 non-null float64
tap_id_kmeans             27757 non-null float64
type                      68291 non-null object
stop_source               43801 non-null object
tap_id_tm2                28260 non-null float64
tap_source                24490 non-null object
distance_to_tap           68291 non-null float64
dtypes: float64(8), object(5)
memory usage: 6.8+ MB


In [51]:
out_df

Unnamed: 0,X,Y,stop_id,stop_name,num_trip,tap_id_location_based,tap_id_frequency_based,tap_id_kmeans,type,stop_source,tap_id_tm2,tap_source,distance_to_tap
0,-122.271604,37.803664,5411,12th St. Oakland City Center,,6000.0,6000.0,1474.0,stops,new,,,66.785762
1,-122.419694,37.765062,5412,16th St. Mission,,6001.0,6001.0,2470.0,stops,new,,,54.875577
2,-122.269029,37.807870,5413,19th St. Oakland,,6002.0,6002.0,502.0,stops,new,,,71.048299
3,-122.269023,37.807778,5414,19th St. Oakland,,6003.0,6003.0,502.0,stops,new,,,61.281243
4,-122.418466,37.752254,5415,24th St. Mission,,6004.0,6004.0,3363.0,stops,new,,,53.881331
...,...,...,...,...,...,...,...,...,...,...,...,...,...
68286,-122.517046,37.889899,5032386,,,,,,stops,tm2,5030252.0,,133.105748
68287,-122.504649,37.899227,5032406,,,,,,stops,tm2,5030210.0,,308.748253
68288,-122.529499,37.926422,5032892,,,,,,stops,tm2,5019789.0,,373.033953
68289,-122.529499,37.926422,5032892,,,,,,stops,tm2,5030283.0,,684.956181


In [52]:
county_file = root_dir + "/data/external/county_boundaries/cb_2018_us_county_500k/cb_2018_us_county_500k.shp"

county_gdf = gpd.read_file(county_file)

county_gdf = county_gdf.to_crs("EPSG:4326")

In [53]:
county_gdf

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,21,007,00516850,0500000US21007,21007,Ballard,06,639387454,69473325,"POLYGON ((-89.18137 37.04630, -89.17938 37.053..."
1,21,017,00516855,0500000US21017,21017,Bourbon,06,750439351,4829777,"POLYGON ((-84.44266 38.28324, -84.44114 38.283..."
2,21,031,00516862,0500000US21031,21031,Butler,06,1103571974,13943044,"POLYGON ((-86.94486 37.07341, -86.94346 37.074..."
3,21,065,00516879,0500000US21065,21065,Estill,06,655509930,6516335,"POLYGON ((-84.12662 37.64540, -84.12483 37.646..."
4,21,069,00516881,0500000US21069,21069,Fleming,06,902727151,7182793,"POLYGON ((-83.98428 38.44549, -83.98246 38.450..."
...,...,...,...,...,...,...,...,...,...,...
3228,31,073,00835858,0500000US31073,31073,Gosper,06,1186616237,11831826,"POLYGON ((-100.09510 40.43866, -100.08937 40.4..."
3229,39,075,01074050,0500000US39075,39075,Holmes,06,1094405866,3695230,"POLYGON ((-82.22066 40.66758, -82.19327 40.667..."
3230,48,171,01383871,0500000US48171,48171,Gillespie,06,2740719114,9012764,"POLYGON ((-99.30400 30.49983, -99.28234 30.499..."
3231,55,079,01581100,0500000US55079,55079,Milwaukee,06,625440563,2455383635,"POLYGON ((-88.06958 42.86727, -88.06958 42.872..."


In [54]:
# add county

out_df["geometry"] = out_df.apply(lambda x: Point(x.X, x.Y), axis = 1)

out_df = gpd.GeoDataFrame(out_df, geometry = out_df["geometry"], crs = CRS("EPSG:4326"))

out_df = gpd.sjoin(out_df, county_gdf[["NAME", "geometry"]], how = "left", op = "intersects")

In [55]:
v_00_scenario.road_net.nodes_df.columns

Index(['osm_node_id', 'shst_node_id', 'county', 'drive_access', 'walk_access',
       'bike_access', 'model_node_id', 'rail_only', 'X', 'Y', 'geometry'],
      dtype='object')

In [56]:
v_00_scenario.road_net.links_df.columns

Index(['access', 'bike_access', 'drive_access', 'fromIntersectionId', 'lanes',
       'maxspeed', 'name', 'oneWay', 'ref', 'roadway', 'shstGeometryId',
       'shstReferenceId', 'toIntersectionId', 'u', 'v', 'walk_access', 'wayId',
       'county', 'model_link_id', 'A', 'B', 'rail_traveltime', 'rail_only',
       'locationReferences', 'shape_id', 'geometry', 'ft_cal', 'ft',
       'useclass', 'assignable', 'transit'],
      dtype='object')

In [57]:
v_00_scenario.road_net.shapes_df.columns

Index(['id', 'shape_id', 'fromIntersectionId', 'toIntersectionId',
       'forwardReferenceId', 'backReferenceId', 'geometry'],
      dtype='object')

In [58]:
out_df.NAME.value_counts()

Alameda          17053
Santa Clara      13626
San Francisco    10165
Contra Costa      9025
San Mateo         6717
Sonoma            5380
Solano            2969
Marin             2324
Napa               991
San Joaquin         12
Yolo                 8
Sacramento           4
Name: NAME, dtype: int64

In [59]:
out_df.to_csv(output_dir + "/stops_and_taps.csv", index = False)

# Finalize TAPs and write out 

In [14]:
tap_nodes_gdf, tap_links_gdf, tap_shapes_gdf = mtc.create_tap_nodes_and_links(
transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    num_taps = 6000,
)

2021-01-26 09:59:12, INFO: Adding centroid and centroid connector to standard network
2021-01-26 09:59:12, INFO: Lasso base directory set as: Z:/Data/Users/Sijia/MTC/github/Lasso
2021-01-26 09:59:12, INFO: Lasso base directory set as: Z:/Data/Users/Sijia/MTC/github/Lasso
2021-01-26 09:59:25, INFO: Finished adding centroid and centroid connectors


In [16]:
tap_links_gdf.groupby(["A", "B"])["model_link_id"].count().shape

(35242,)

In [17]:
tap_links_gdf.locationReferences.iloc[0]

[{'sequence': 1,
  'point': [-122.27160400000001, 37.803664000000005],
  'distanceToNextRef': 50.32453516987399,
  'bearing': 0,
  'intersectionId': nan},
 {'sequence': 2,
  'point': [-122.27130628, 37.80400915333333],
  'intersectionId': nan}]

In [18]:
tap_links_gdf["length"].max()

455.70853016995864

In [19]:
print("-------write out pickle---------")

tap_links_gdf.to_pickle(output_dir + "/tap_link.pickle")
tap_shapes_gdf.to_pickle(output_dir + "/tap_shape.pickle")
tap_nodes_gdf.to_pickle(output_dir + "/tap_node.pickle")

-------write out pickle---------


# TAP file

In [124]:
tap_node_gdf = pd.read_pickle(output_dir + "/tap_node.pickle")

In [125]:
tap_shape_gdf = pd.read_pickle(output_dir + "/tap_shape.pickle")

In [126]:
tap_node_gdf

Unnamed: 0,tap_id,X,Y,geometry,index_right,county,tap_node_county_start,model_node_id
0,0,-122.641450,38.235886,POINT (-122.64145 38.23589),2.0,Sonoma,790001,790001
1,1,-122.113528,37.658320,POINT (-122.11353 37.65832),8.0,Alameda,390001,390001
2,2,-122.397458,37.785970,POINT (-122.39746 37.78597),0.0,San Francisco,90001,90001
3,3,-121.833643,37.346326,POINT (-121.83364 37.34633),1.0,Santa Clara,290001,290001
4,4,-122.047619,38.003040,POINT (-122.04762 38.00304),5.0,Contra Costa,490001,490001
...,...,...,...,...,...,...,...,...
5995,5995,-122.264302,37.829265,POINT (-122.26430 37.82926),8.0,Alameda,390001,391422
5996,5996,-122.046516,37.982781,POINT (-122.04652 37.98278),5.0,Contra Costa,490001,490835
5997,5997,-122.231947,37.804882,POINT (-122.23195 37.80488),8.0,Alameda,390001,391423
5998,5998,-121.784958,37.695256,POINT (-121.78496 37.69526),8.0,Alameda,390001,391424


In [153]:
tap_node_gdf
tap_shape_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 17621 entries, 0 to 21756
Data columns (total 26 columns):
stop_name              17621 non-null object
stop_desc              375 non-null object
stop_lat               17621 non-null float64
stop_lon               17621 non-null float64
zone_id                6999 non-null object
stop_url               237 non-null object
location_type          4536 non-null float64
parent_station         161 non-null object
stop_timezone          113 non-null object
wheelchair_boarding    343 non-null float64
stop_code              12495 non-null object
platform_code          52 non-null object
position               0 non-null object
direction              0 non-null object
stop_id                17621 non-null object
osm_node_id            16957 non-null object
shst_node_id           16957 non-null object
model_node_id          17621 non-null int32
X                      17621 non-null float64
Y                      17621 non-null float64
t

In [128]:
v_00_scenario.transit_net.feed.stops.model_node_id.nunique()

17621

In [129]:
mode_crosswalk = pd.read_csv(parameters.mode_crosswalk_file)
mode_crosswalk.drop_duplicates(subset = ["agency_raw_name", "route_type", "is_express_bus"], inplace = True)

In [130]:
v_00_scenario.transit_net.feed.routes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 701 entries, 0 to 700
Data columns (total 16 columns):
route_id_original         701 non-null object
agency_id                 526 non-null object
route_short_name          665 non-null object
route_long_name           679 non-null object
route_desc                14 non-null object
route_type                701 non-null int64
route_url                 217 non-null object
route_color               260 non-null object
route_text_color          243 non-null object
agency_raw_name           701 non-null object
route_sort_order          95 non-null object
min_headway_minutes       32 non-null object
eligibility_restricted    4 non-null object
continuous_pickup         4 non-null object
continuous_drop_off       4 non-null object
route_id                  701 non-null object
dtypes: int64(1), object(15)
memory usage: 87.8+ KB


In [131]:
trip_df = v_00_scenario.transit_net.feed.trips.copy()

trip_df = pd.merge(trip_df, v_00_scenario.transit_net.feed.routes.drop("agency_raw_name", axis = 1), how="left", on="route_id")

trip_df = pd.merge(trip_df, v_00_scenario.transit_net.feed.agency[["agency_name", "agency_raw_name", "agency_id"]], 
                   how = "left", on = ["agency_raw_name", "agency_id"])

# identify express bus
trip_df["is_express_bus"] = trip_df.apply(lambda x: mtc._is_express_bus(x), axis = 1)
trip_df.drop("agency_name", axis = 1 , inplace = True)

trip_df = pd.merge(
        trip_df,
        mode_crosswalk.drop("agency_id", axis = 1),
        how = "left",
        on = ["agency_raw_name", "route_type", "is_express_bus"]
    )

In [132]:
stop_times_df = v_00_scenario.transit_net.feed.stop_times.copy()

stops_df = stop_times_df.groupby(["stop_id", "trip_id"])["stop_sequence"].count().reset_index().drop("stop_sequence", axis = 1)

In [133]:
stops_df = pd.merge(stops_df, trip_df[["trip_id", "TM2_line_haul_name"]], how = "left", on = ["trip_id"])

In [134]:
stops_df = pd.merge(stops_df,
                    v_00_scenario.transit_net.feed.stops[["stop_id", "model_node_id"]],
                   how = "left",
                   on = "stop_id")

In [135]:
stops_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 131338 entries, 0 to 131337
Data columns (total 4 columns):
stop_id               131338 non-null object
trip_id               131338 non-null object
TM2_line_haul_name    131338 non-null object
model_node_id         131338 non-null object
dtypes: object(4)
memory usage: 5.0+ MB


In [136]:
stops_df["model_node_id"] = stops_df["model_node_id"].astype(int)

In [137]:
tap_df = pd.merge(tap_node_gdf, 
                  tap_shape_gdf[["tap_id", "model_node_id"]].rename(columns = {"model_node_id" : "stop_model_node_id"}), 
                  how = "left", on = "tap_id")

In [138]:
tap_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 17621 entries, 0 to 17620
Data columns (total 9 columns):
tap_id                   17621 non-null int64
X                        17621 non-null float64
Y                        17621 non-null float64
geometry                 17621 non-null geometry
index_right              17492 non-null float64
county                   17621 non-null object
tap_node_county_start    17621 non-null int64
model_node_id            17621 non-null int64
stop_model_node_id       17621 non-null int32
dtypes: float64(3), geometry(1), int32(1), int64(3), object(1)
memory usage: 1.3+ MB


In [139]:
tap_df = pd.merge(tap_df,
                  stops_df[["model_node_id", "TM2_line_haul_name"]].rename(columns = {"model_node_id" : "stop_model_node_id"}),
                  how = "left",
                  on = "stop_model_node_id")

In [140]:
tap_df.TM2_line_haul_name.unique()

array(['Local bus', 'Express bus', 'Commuter rail', 'Light rail',
       'Heavy rail', 'Ferry service'], dtype=object)

In [141]:
line_haul_name_dict = {'Local bus' : 1, 'Express bus' : 2, 'Commuter rail' : 6, 'Light rail' : 4,
       'Heavy rail' : 5, 'Ferry service' : 3}

In [142]:
tap_df["mode"] = tap_df["TM2_line_haul_name"].map(line_haul_name_dict)

In [143]:
out_df = tap_df.groupby(["tap_id", "county", "model_node_id", "X", "Y"])["mode"].apply(lambda x: list(set(x))).reset_index()

In [144]:
out_df = out_df.sort_values(by = "model_node_id")

In [145]:
out_df['county'] = out_df['county'].map(parameters.county_code_dict)

In [146]:
out_df.rename(columns = {"tap_id" : "OBJECTID", "model_node_id" : "N", "X" : "long", "Y" : "lat"},inplace = True)

In [147]:
out_df["mode"] = out_df["mode"].apply(lambda x: ",".join(map(str, x)))

In [150]:
out_df.to_csv(output_dir + "/tap_node.csv", index = False)