In [None]:
from pyproj import CRS
from shapely.geometry import LineString, Point

In [None]:
import os
import sys
import yaml
import pickle
import glob

import pandas as pd
import geopandas as gpd
import numpy as np

# https://github.com/BayAreaMetro/network_wrangler/tree/generic_agency
from network_wrangler import RoadwayNetwork
from network_wrangler import TransitNetwork
from network_wrangler import ProjectCard
from network_wrangler import Scenario
from network_wrangler import WranglerLogger

# https://github.com/BayAreaMetro/Lasso/tree/mtc_parameters
from lasso import ModelRoadwayNetwork
from lasso import StandardTransit
from lasso import Parameters
from lasso import mtc

In [None]:
%load_ext autoreload
%autoreload 2

# I/O

In [None]:
# input directories - use Box
TM2_REBUILD_BOX_DIR      = r"C:\Users\{}\Box\Modeling and Surveys\Development\Travel Model Two Development\Travel Model Two Network Rebuild".format(os.getenv('USERNAME'))
TM2_NETWORKS_DATA_DIR    = os.path.join(TM2_REBUILD_BOX_DIR,    "travel-model-two-networks", "data")
input_data_interim_dir   = os.path.join(TM2_NETWORKS_DATA_DIR,  "interim")
input_data_processed_dir = os.path.join(TM2_NETWORKS_DATA_DIR,  "processed")
input_data_external_dir  = os.path.join(TM2_NETWORKS_DATA_DIR,  "external")

# why version_03?
input_dir               = os.path.join(input_data_processed_dir, 'version_03')

# Lasso
LASSO_DIR               = 'C:/Users/{}/Documents/GitHub/Lasso'.format(os.getenv('USERNAME'))

In [None]:
# output folders - use local, most likely
output_data_interim_dir = r"C:\Users\{}\Documents\scratch\tm2_network_building\interim\step9_taps"
os.makedirs(output_data_interim_dir, exist_ok=True)

In [None]:
parameters = Parameters(lasso_base_dir = LASSO_DIR)

In [None]:
%%time

# todo: What is this?
version_00_pickle_file_name = os.path.join(input_dir, 'working_scenario_01.pickle')
v_00_scenario = pickle.load(open(version_00_pickle_file_name, 'rb'))

In [None]:
# check data in the pickle file
# print(v_00_scenario.road_net.nodes_df.columns)
# print(v_00_scenario.road_net.links_df.columns)
# print(v_00_scenario.road_net.shapes_df.columns)

v_00_scenario.transit_net.feed.stops.info()

print('\n number of unique stop_id: {}'.format(v_00_scenario.transit_net.feed.stop_times.stop_id.nunique()))

In [None]:
### check stops that are not walk accessible - there should be none

stops_df = v_00_scenario.transit_net.feed.stops.copy()
stops_df["model_node_id"] = stops_df["model_node_id"].astype(int)

stops_df = pd.merge(stops_df, 
                    v_00_scenario.road_net.nodes_df[["model_node_id", "X", "Y", "drive_access", "walk_access"]], 
                    how = "left", 
                    on = "model_node_id")

print(stops_df[stops_df.walk_access == 0].shape[0])

print(v_00_scenario.road_net.links_df[
    (v_00_scenario.road_net.links_df.A.isin(stops_df[stops_df.walk_access == 0].model_node_id)) |
    (v_00_scenario.road_net.links_df.B.isin(stops_df[stops_df.walk_access == 0].model_node_id))].roadway.value_counts())

# Explore TAP creation options (not required for creating the network)

This part tries three options to create TAPs and TAP links (k-means option 1, k-means option 2, kmeans final option), and compare the results with the TAPs and TAP links of the legacy TM2 network (non-Marin version). 

### k-means option 1

In [None]:
%%time
kmeans_loc_taps_gdf, stops_loc_taps_df = mtc.create_taps_kmeans_location_based(
    transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    bus_clusters = 6000,
)

In [None]:
kmeans_loc_taps_gdf

In [None]:
stops_loc_taps_df

### k-means option 2

In [None]:
%%time
kmeans_fre_taps_gdf, stops_fre_taps_df = mtc.create_taps_kmeans_frequency_based(
    transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    bus_clusters = 6000,
)

In [None]:
kmeans_fre_taps_gdf

In [None]:
stops_fre_taps_df

### kmeans final option

In [None]:
%%time
kmeans_taps_gdf, stops_taps_df = mtc.create_taps_kmeans(
    transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    clusters = 6000,
)

In [None]:
kmeans_taps_gdf

In [None]:
stops_taps_df

### TM2 legacy network taps and tap links

In [None]:
# read legacy TM2 network nodes nonMarin version
existing_network_node_gdf = gpd.read_file(os.path.join(input_data_external_dir,"TM2_nonMarin","tm2_nodes.shp"))

existing_network_node_gdf.crs = CRS("ESRI:102646")
existing_network_node_gdf = existing_network_node_gdf.to_crs(CRS("EPSG:4326"))

In [None]:
# get TAPS from the legacy network

TAP_N_list = list(range(90001, 99999)) + list(range(190001, 199999)) + list(range(290001, 299999))\
+ list(range(390001, 399999))\
+ list(range(490001, 499999)) + list(range(590001, 599999)) + list(range(690001, 699999)) + list(range(790001, 799999))\
+ list(range(890001, 899999))

existing_network_node_gdf["X"] = existing_network_node_gdf["geometry"].apply(lambda g: g.x)
existing_network_node_gdf["Y"] = existing_network_node_gdf["geometry"].apply(lambda g: g.y)

existing_taps_gdf = existing_network_node_gdf[existing_network_node_gdf.N.isin(TAP_N_list)].copy()

existing_taps_gdf.rename(columns = {"N" : "tap_id_tm2"}, inplace = True)

existing_taps_gdf[["tap_id_tm2", "X", "Y", "geometry"]]

In [None]:
%%time
# read legacy TM2 network links nonMarin version
existing_network_link_gdf = gpd.read_file(os.path.join(input_data_external_dir,"TM2_nonMarin","tm2_links.shp"))

existing_network_link_gdf.crs = CRS("ESRI:102646")
existing_network_link_gdf = existing_network_link_gdf.to_crs(CRS("EPSG:4326"))

In [None]:
# existing_network_link_gdf.ASSIGNABLE.value_counts()

In [None]:
# existing_network_link_gdf.DELETE.value_counts()

In [None]:
# pd.crosstab([existing_network_link_gdf.CNTYPE, existing_network_link_gdf.FT, existing_network_link_gdf.ASSIGNABLE], 
#             existing_network_link_gdf.DELETE)

In [None]:
# pd.crosstab(existing_network_link_gdf.ASSIGNABLE, existing_network_link_gdf.DELETE)

In [None]:
# get TAP links from the legacy network

existing_taps_links_gdf = existing_network_link_gdf[existing_network_link_gdf.CNTYPE == "TAP"].copy()

# calculate link length, which represents distance to TAPs
geom_length = existing_taps_links_gdf[['geometry']].copy()
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

existing_taps_links_gdf["distance_to_tap"] = geom_length["length"]

print('max distance to tap: {} meters, or {} miles'.format(
    existing_taps_links_gdf["distance_to_tap"].max(),
    existing_taps_links_gdf["distance_to_tap"].max() * 0.000621371))

In [None]:
# tag tap_id and stop_id of TAP links

existing_taps_links_gdf["tap_id_tm2"] = np.where(existing_taps_links_gdf.A.isin(TAP_N_list), 
                                                 existing_taps_links_gdf.A, 
                                                 existing_taps_links_gdf.B)
existing_taps_links_gdf["stop_id"] = np.where(existing_taps_links_gdf.A.isin(TAP_N_list), 
                                              existing_taps_links_gdf.B, 
                                              existing_taps_links_gdf.A)

existing_taps_links_gdf = existing_taps_links_gdf.drop_duplicates(subset = ["tap_id_tm2", "stop_id"])

In [None]:
# add other node attributes for the "stop" end of the TAP links
existing_taps_links_gdf = pd.merge(existing_taps_links_gdf,
                                   existing_network_node_gdf[["N", "X", "Y"]].rename(columns = {"N" : "stop_id"}),
                                   how = "left",
                                   on = "stop_id")

In [None]:
# add link type and stop_source (for later comparison)
existing_taps_links_gdf["type"] = "stops"
existing_taps_links_gdf["stop_source"] = "tm2"

In [None]:
existing_taps_links_gdf[["tap_id_tm2", "stop_id", "X", "Y", "geometry", "distance_to_tap","type", "stop_source"]]

### assemble data

In [None]:
# merge tap links (stop-tap mapping) of 3 k-mean calculations with stops_df from the working_scenario

consolidate_stops_taps_df = pd.merge(
    stops_df, 
    stops_loc_taps_df[["stop_id", "tap_id"]].rename(
        columns = {"tap_id" : "tap_id_location_based"}),     # k-mean option 1
    how = "left",
    on = "stop_id"
)

consolidate_stops_taps_df = pd.merge(
    consolidate_stops_taps_df, 
    stops_fre_taps_df[["stop_id", "num_trip", "tap_id"]].rename(
        columns = {"tap_id" : "tap_id_frequency_based"}),    # k-mean option 2
    how = "left",
    on = "stop_id"
)

consolidate_stops_taps_df = pd.merge(
    consolidate_stops_taps_df, 
    stops_taps_df[["stop_id", "tap_id"]].rename(
        columns = {"tap_id" : "tap_id_kmeans"}),            # kmeans final option
    how = "left",
    on = "stop_id"
)

consolidate_stops_taps_df["type"] = "stops"
consolidate_stops_taps_df["stop_source"] = "new"

In [None]:
consolidate_stops_taps_df

In [None]:
consolidate_stops_taps_df.columns

In [None]:
# merge taps of 3 k-mean calculations with existing taps in the legacy TM2 network

kmeans_loc_taps_gdf["tap_source"] = "kmeans_location_based"
kmeans_loc_taps_gdf.rename(columns = {"tap_id" :"tap_id_location_based"}, inplace = True)

kmeans_fre_taps_gdf["tap_source"] = "kmeans_frequency_based"
kmeans_fre_taps_gdf.rename(columns = {"tap_id" :"tap_id_frequency_based"}, inplace = True)

kmeans_taps_gdf["tap_source"] = "kmeans"
kmeans_taps_gdf.rename(columns = {"tap_id" :"tap_id_kmeans"}, inplace = True)

existing_taps_gdf["tap_source"] = "tm2"


taps_df = pd.concat([kmeans_loc_taps_gdf,
                     kmeans_fre_taps_gdf,
                     existing_taps_gdf[["tap_id_tm2", "X", "Y", "geometry", "tap_source"]],
                     kmeans_taps_gdf],
                   sort = False,
                   ignore_index = True)

taps_df["type"] = "taps"

In [None]:
# combine consolidated taps attributes with consolidated tap links
# the resulting dataframe has both stops (with attributes of the corresponding tap links) and taps (with only tap attributes)
out_df = pd.concat(
    [consolidate_stops_taps_df[["X", "Y", "stop_id", "stop_name", "num_trip",
                    "tap_id_location_based", "tap_id_frequency_based", 'tap_id_kmeans',"type", "stop_source"]],
    taps_df[["tap_id_location_based", "tap_id_frequency_based", 'tap_id_kmeans', "tap_id_tm2", "X", "Y", "tap_source", "type"]]],
    sort = False,
    ignore_index = True
)

In [None]:
# some stats:
print(out_df['type'].value_counts())

display(out_df.groupby(['type', 'tap_source'])['X'].count().reset_index())

print(out_df.num_trip.max())

print(out_df.columns)

In [None]:
# add tap link distance - based on the length of the lingstring, not distance along the network 

out_df = pd.merge(out_df, 
                  kmeans_taps_gdf[["tap_id_kmeans", "X", "Y"]].rename(columns = {"X" : "tap_X", "Y" : "tap_Y"}),
                  how = 'left',
                  on = ["tap_id_kmeans"])

out_df["geometry"] = out_df.apply(lambda x: LineString([Point(x.X, x.Y), Point(x.tap_X, x.tap_Y)]), axis = 1)

out_df = gpd.GeoDataFrame(out_df, geometry = out_df["geometry"], crs = CRS("EPSG:4326"))

geom_length = out_df[['geometry']].copy()
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

out_df["distance_to_tap"] = geom_length["length"]
out_df["distance_to_tap"] = np.where(out_df["type"] == "stops", 
                                     out_df["distance_to_tap"], 
                                     99999)

out_df.drop(["tap_X", "tap_Y", "geometry"], axis = 1, inplace = True)

In [None]:
# append tm2 tap distance

out_df = pd.concat([out_df, 
                    existing_taps_links_gdf[["tap_id_tm2", "stop_id", "X", "Y", "distance_to_tap", "type", "stop_source"]]],
                  sort = False,
                  ignore_index = True)

In [None]:
out_df.info()

display(out_df)

In [None]:
%%time
# add county

county_file = os.path.join(os.path.join(input_data_external_dir,"county_boundaries","cb_2018_us_county_500k",
                                        "cb_2018_us_county_500k.shp"))
county_gdf = gpd.read_file(county_file)
county_gdf = county_gdf.to_crs("EPSG:4326")

out_df["geometry"] = out_df.apply(lambda x: Point(x.X, x.Y), axis = 1)
out_df = gpd.GeoDataFrame(out_df, geometry = out_df["geometry"], crs = CRS("EPSG:4326"))

out_df = gpd.sjoin(out_df, county_gdf[["NAME", "geometry"]], how = "left", op = "intersects")

In [None]:
out_df.NAME.value_counts()

In [None]:
# export for analysis
out_df.to_csv(os.path.join(output_data_interim_dir,"stops_and_taps.csv"), index = False)

# Use the chosen method to create TAPs and write out 

In [None]:
%%time
tap_nodes_gdf, tap_links_gdf, tap_shapes_gdf = mtc.create_tap_nodes_and_links(
transit_network = v_00_scenario.transit_net,
    roadway_network = v_00_scenario.road_net,
    parameters = parameters,
    num_taps = 6000,
)

In [None]:
print("-------write out pickle---------")

tap_links_gdf.to_pickle( os.path.join(output_data_interim_dir, "tap_link.pickle"))
tap_shapes_gdf.to_pickle(os.path.join(output_data_interim_dir, "tap_shape.pickle"))
tap_nodes_gdf.to_pickle( os.path.join(output_data_interim_dir, "tap_node.pickle"))

# TAP file

In [None]:
tap_node_gdf = pd.read_pickle(os.path.join(output_data_interim_dir, "tap_node.pickle"))

In [None]:
tap_shape_gdf = pd.read_pickle(os.path.join(output_data_interim_dir, "tap_shape.pickle"))

In [None]:
mode_crosswalk = pd.read_csv(parameters.mode_crosswalk_file)
display(mode_crosswalk.head())
mode_crosswalk.drop_duplicates(subset = ["agency_raw_name", "route_type", "is_express_bus"], inplace = True)

In [None]:
v_00_scenario.transit_net.feed.routes.info()

In [None]:
trip_df = v_00_scenario.transit_net.feed.trips.copy()

trip_df = pd.merge(trip_df, v_00_scenario.transit_net.feed.routes.drop("agency_raw_name", axis = 1), how="left", on="route_id")

trip_df = pd.merge(trip_df, v_00_scenario.transit_net.feed.agency[["agency_name", "agency_raw_name", "agency_id"]], 
                   how = "left", on = ["agency_raw_name", "agency_id"])

# identify express bus
trip_df["is_express_bus"] = trip_df.apply(lambda x: mtc._is_express_bus(x), axis = 1)
trip_df.drop("agency_name", axis = 1 , inplace = True)

trip_df = pd.merge(
        trip_df,
        mode_crosswalk.drop("agency_id", axis = 1),
        how = "left",
        on = ["agency_raw_name", "route_type", "is_express_bus"]
    )

In [None]:
stop_times_df = v_00_scenario.transit_net.feed.stop_times.copy()

stops_df = stop_times_df.groupby(["stop_id", "trip_id"])["stop_sequence"].count().reset_index().drop("stop_sequence", axis = 1)

In [None]:
stops_df = pd.merge(stops_df, trip_df[["trip_id", "TM2_line_haul_name"]], how = "left", on = ["trip_id"])

In [None]:
stops_df = pd.merge(stops_df,
                    v_00_scenario.transit_net.feed.stops[["stop_id", "model_node_id"]],
                   how = "left",
                   on = "stop_id")

In [None]:
stops_df.info()

In [None]:
stops_df["model_node_id"] = stops_df["model_node_id"].astype(int)

In [None]:
tap_df = pd.merge(tap_node_gdf, 
                  tap_shape_gdf[["tap_id", "model_node_id"]].rename(columns = {"model_node_id" : "stop_model_node_id"}), 
                  how = "left", on = "tap_id")

tap_df = pd.merge(tap_df,
                  stops_df[["model_node_id", "TM2_line_haul_name"]].rename(columns = {"model_node_id" : "stop_model_node_id"}),
                  how = "left",
                  on = "stop_model_node_id")

In [None]:
tap_df.info()

In [None]:
tap_df.TM2_line_haul_name.unique()

In [None]:
line_haul_name_dict = {'Local bus' : 1, 'Express bus' : 2, 'Commuter rail' : 6, 'Light rail' : 4,
       'Heavy rail' : 5, 'Ferry service' : 3}

tap_df["mode"] = tap_df["TM2_line_haul_name"].map(line_haul_name_dict)

out_df = tap_df.groupby(["tap_id", "county", "model_node_id", "X", "Y"])["mode"].apply(lambda x: list(set(x))).reset_index()

out_df = out_df.sort_values(by = "model_node_id")

out_df['county'] = out_df['county'].map(parameters.county_code_dict)

out_df.rename(columns = {"tap_id" : "OBJECTID", "model_node_id" : "N", "X" : "long", "Y" : "lat"},inplace = True)

out_df["mode"] = out_df["mode"].apply(lambda x: ",".join(map(str, x)))

In [None]:
out_df.to_csv(os.path.join(output_data_interim_dir, "tap_node.csv"), index = False)