# This notebook goes through building drive, walk, and bike centroids and centorid connectors

1. centroid nodes come from existing network
2. drive centroid connector built by finding the new non-freeway drive node closest to the existing network's drive loading point
3. bike and walk centroid connector built by finding the closest new walk and bike node to the centroid

In [3]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, shape, LineString
from scipy.spatial import cKDTree
import json

In [261]:
# read input sf county network standard and existing network

link_file = "../tests/networkstandard/step2_transit/sf_link.json"
with open(link_file) as f:
    link_json = json.load(f)
link_df = pd.DataFrame(link_json)

node_file = "../tests/networkstandard/step2_transit/sf_node.geojson"
node_gdf = gpd.read_file(node_file)

shape_gdf = gpd.read_file("../tests/networkstandard/step2_transit/sf_shape.geojson")

existing_network_node_gdf = gpd.read_file("../exisiting_network/network_nodes.shp")
existing_network_link_gdf = gpd.read_file("../exisiting_network/network_links.shp")

In [88]:
# build centroid conenctors for SF county as an example

sf_boundry = gpd.read_file("../external_data/county/boundary_4.geojson")
sf_boundry.crs

existing_network_link_gdf = existing_network_link_gdf.to_crs(sf_boundry.crs)
existing_network_node_gdf = existing_network_node_gdf.to_crs(sf_boundry.crs)

sf_existing_network_link_gdf = existing_network_link_gdf[
                                        existing_network_link_gdf.within(sf_boundry.geometry.unary_union)
                                        ].copy()
sf_existing_network_node_gdf = existing_network_node_gdf[
                                        existing_network_node_gdf.within(sf_boundry.geometry.unary_union)
                                        ]

In [90]:
print(sf_existing_network_node_gdf.columns)
print(sf_existing_network_node_gdf.crs)

Index(['N', 'FAREZONE', 'X', 'Y', 'COUNTY', 'MODE', 'TYPE', 'ID', 'PNR_CAP',
       'PNR1', 'PNR_FEE1', 'PNR2', 'PNR_FEE2', 'PNR3', 'PNR_FEE3', 'PNR4',
       'PNR_FEE4', 'PNR5', 'PNR_FEE5', 'STOP', 'RTDMODE', 'TAZSEQ', 'MAZSEQ',
       'TAPSEQ', 'EXTSEQ', 'NEW_NODE', 'OLD_NODE', 'TEMP', 'TEMP2',
       'geometry'],
      dtype='object')
{'init': 'epsg:4326'}


In [91]:
sf_existing_network_link_gdf.CNTYPE.value_counts()

TANA     40411
PED      27161
MAZ      20008
TAZ       8938
TAP       7568
LRAIL      616
BIKE       112
FERRY       14
HRAIL       14
USE          4
CRAIL        4
Name: CNTYPE, dtype: int64

In [92]:
def reproject(link, node, epsg):
    """
    reporoject link and node geodataframes
    
    for nodes, update X and Y columns
    
    """
    
    link = link.to_crs(epsg = epsg)
    node = node.to_crs(epsg = epsg)
    
    node['X'] = node['geometry'].apply(lambda p: p.x)
    node['Y'] = node['geometry'].apply(lambda p: p.y)

    return link, node

In [185]:
# covert networks to espg 26915 for nearest node operation

link_gdf = pd.merge(link_df,
                   shape_gdf,
                   how = "left",
                   left_on = "shstGeometryId",
                   right_on = "id")
link_gdf = gpd.GeoDataFrame(link_gdf)
link_gdf.crs = shape_gdf.crs
print(link_gdf.info())

link_gdf, node_gdf = reproject(link_gdf, 
                                node_gdf, 
                                26915)

sf_existing_network_link_gdf, sf_existing_network_node_gdf = reproject(sf_existing_network_link_gdf,
                                                                        sf_existing_network_node_gdf,
                                                                        26915)

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 74882 entries, 0 to 74881
Data columns (total 44 columns):
LANES                   74882 non-null int64
access                  74882 non-null object
area                    74882 non-null object
bike_access             74882 non-null int64
bridge                  74882 non-null object
drive_access            74882 non-null int64
est_width               74882 non-null object
fromIntersectionId_x    74882 non-null object
highway                 74882 non-null object
id_x                    74882 non-null object
junction                74882 non-null object
key                     74882 non-null object
landuse                 74882 non-null object
lanes                   74882 non-null object
length                  74882 non-null float64
link                    74882 non-null object
maxspeed                74882 non-null object
name                    74882 non-null object
nodeIds                 74882 non-null object
oneWay     

In [94]:
# existing taz level centroids
taz_node_gdf = sf_existing_network_node_gdf[sf_existing_network_node_gdf.TAZSEQ > 0].copy()

# existing taz level centroid connectors
taz_connectors_gdf = sf_existing_network_link_gdf[sf_existing_network_link_gdf.CNTYPE == "TAZ"].copy()

taz_connectors_gdf = taz_connectors_gdf[(taz_connectors_gdf.A.isin(taz_node_gdf.N.tolist())) | 
                                       (taz_connectors_gdf.B.isin(taz_node_gdf.N.tolist()))].copy()

# identify centroid (c) and non-centroid (non-c) for each centroid connector
taz_connectors_gdf["c"] = taz_connectors_gdf.apply(lambda x: x.A if x.A in taz_node_gdf.N.tolist() else x.B,
                                                  axis = 1)
taz_connectors_gdf["non_c"] = taz_connectors_gdf.apply(lambda x: x.B if x.A in taz_node_gdf.N.tolist() else x.A,
                                                      axis = 1)

In [96]:
taz_connectors_gdf

Unnamed: 0,A,B,F_JNCTID,T_JNCTID,FRC,NAME,FREEWAY,TOLLRD,ONEWAY,KPH,...,NUMLANES,USECLASS,TOLLBOOTH,DANGLING,HASTRANSIT,DELETE,TOLLSEG,geometry,c,non_c
0,1,1002463,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1002463
1,1,1002702,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1002702
2,1,1003078,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1003078
3,1,1004400,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1004400
4,1,1009833,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1009833
5,1,1011503,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1011503
6,1,1012865,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1012865
7,2,1007742,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116216.80485146 4612656.45169246...,2,1007742
8,2,1008103,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116216.80485146 4612656.45169246...,2,1008103
9,2,1011332,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116216.80485146 4612656.45169246...,2,1011332


In [113]:
taz_node_gdf

Unnamed: 0,N,FAREZONE,X,Y,COUNTY,MODE,TYPE,ID,PNR_CAP,PNR1,...,RTDMODE,TAZSEQ,MAZSEQ,TAPSEQ,EXTSEQ,NEW_NODE,OLD_NODE,TEMP,TEMP2,geometry
0,1,0,-2.116430e+06,4.613514e+06,1,0,0,0,0,0,...,0,1,0,0,0,1,1,0,0,POINT (-2116430.484597242 4613514.384098412)
1,2,0,-2.116217e+06,4.612656e+06,1,0,0,0,0,0,...,0,2,0,0,0,2,2,0,0,POINT (-2116216.80485146 4612656.451692463)
2,3,0,-2.116611e+06,4.613233e+06,1,0,0,0,0,0,...,0,3,0,0,0,3,3,0,0,POINT (-2116610.709787361 4613233.449967336)
3,4,0,-2.109646e+06,4.618177e+06,1,0,0,0,0,0,...,0,4,0,0,0,4,4,0,0,POINT (-2109646.490261332 4618177.028216498)
4,5,0,-2.116950e+06,4.613194e+06,1,0,0,0,0,0,...,0,5,0,0,0,5,5,0,0,POINT (-2116949.780112024 4613194.362518021)
5,6,0,-2.117070e+06,4.612880e+06,1,0,0,0,0,0,...,0,6,0,0,0,6,6,0,0,POINT (-2117069.746275438 4612880.405780473)
6,7,0,-2.117248e+06,4.613152e+06,1,0,0,0,0,0,...,0,7,0,0,0,7,7,0,0,POINT (-2117247.897178257 4613152.340489607)
7,8,0,-2.117917e+06,4.612903e+06,1,0,0,0,0,0,...,0,8,0,0,0,8,8,0,0,POINT (-2117916.577099858 4612902.9825645)
8,9,0,-2.117990e+06,4.612622e+06,1,0,0,0,0,0,...,0,9,0,0,0,9,9,0,0,POINT (-2117989.68301905 4612621.748287487)
9,10,0,-2.118248e+06,4.612468e+06,1,0,0,0,0,0,...,0,10,0,0,0,10,10,0,0,POINT (-2118247.658274216 4612467.744353986)


In [116]:
def num_of_drive_loadpoint_per_centroid(existing_drive_cc_df, existing_node_gdf):
    """
    decide number of loading point for drive access per centroid
    
    logic: for drive, find the closest points to the existing loading point
    
    return: 
    dataframe
    for each existing drive loading point, number of new loading point needs to be generated. currently set to 1.
    
    """
    existing_pairs_of_centroid_loadpoint_df = existing_drive_cc_df.groupby(['c', 'non_c']).count().reset_index().drop(['A','B'], axis = 1)
    
    existing_num_of_loadpoint_per_c_df = existing_drive_cc_df.groupby(['c', 'non_c']).count().groupby('c').count()[['A']].rename(columns = {'A':'abm_num_load'}).reset_index()
    
    num_drive_loadpoint_new_near_old = pd.merge(existing_pairs_of_centroid_loadpoint_df,
                                                        existing_num_of_loadpoint_per_c_df,
                                                        how = 'left',
                                                        on = 'c')
    
    num_drive_loadpoint_new_near_old['osm_num_load'] = 1
    
    num_drive_loadpoint_new_near_old = pd.merge(num_drive_loadpoint_new_near_old,
                                                        existing_node_gdf[['N', 'X', 'Y']],
                                                        how = 'left',
                                                        left_on = 'non_c',
                                                        right_on = 'N')
    return num_drive_loadpoint_new_near_old


def num_of_walk_bike_loadpoint_per_centroid(existing_centroid_df):
    """
    decide number of loading point for walk and bike access per centroid
    
    logic: find 5 closest points to centroid
    
    return: 
    dataframe
    for each centroid, number of loading point needs to be generated.
    
    """
    
    num_loadpoint = existing_centroid_df[['N', 'X', 'Y']].copy()
    num_loadpoint['osm_num_load'] = np.int(1)
    num_loadpoint.rename(columns = {'N':'c'}, inplace = True)
    
    return num_loadpoint

In [19]:
def find_new_load_point(abm_load_ref_df, all_node):
    """
    find the loading points in osm nodes
    
    input: osm node, loading point reference input
    
    output:  dataframe of pairs of centroid and loading point, with point geometry of loading point
    
    works in epsg = 26915
    
    """
    
    all_node_gdf = all_node.copy()
    
    inventory_node_ref = all_node_gdf[["X", "Y"]].values
    tree = cKDTree(inventory_node_ref)
    
    new_load_point_gdf = gpd.GeoDataFrame()
    
    for i in range(len(abm_load_ref_df)):
        point = abm_load_ref_df.iloc[i][['X', 'Y']].values
        n_neigh = abm_load_ref_df.iloc[i]['osm_num_load']
        dd, ii = tree.query(point, k = n_neigh)
        if n_neigh == 1:
            add_gdf = gpd.GeoDataFrame(all_node_gdf[['osm_node_id', "shst_node_id", 'geometry']].iloc[ii])\
                            .transpose().reset_index(drop = True)
        else:
            add_gdf = gpd.GeoDataFrame(all_node_gdf[['osm_node_id', "shst_node_id", 'geometry']].iloc[ii])\
                            .reset_index(drop = True)
        add_gdf['c'] = int(abm_load_ref_df.iloc[i]['c'])
        if i == 0:
            new_load_point_gdf = add_gdf.copy()
        else:
            new_load_point_gdf = new_load_point_gdf.append(add_gdf, ignore_index=True, sort=False)
        
    return new_load_point_gdf.rename(columns = {'geometry' : 'geometry_ld'})

In [186]:
def generate_centroid_connectors(run_type, existing_drive_cc_df, node_gdf, existing_node_df):
    """
    calls function to generate loading point reference table, 
    and calls function to find loading points
    
    build linestring based on pairs of centroid and loading point
    
    return centroid connectors and centroids
    """
    
    if run_type == 'drive':
        abm_load_ref_df = num_of_drive_loadpoint_per_centroid(existing_drive_cc_df, existing_node_df)
    if (run_type == 'walk')|(run_type == 'bike'):
        abm_load_ref_df = num_of_walk_bike_loadpoint_per_centroid(existing_node_df)

    new_load_point_gdf = find_new_load_point(abm_load_ref_df, node_gdf)
    new_load_point_gdf = pd.merge(new_load_point_gdf,
                                 existing_node_df[['N', 'X', 'Y']],
                                 how = 'left', 
                                 left_on = 'c',
                                 right_on = 'N')
    new_load_point_gdf['geometry_c'] = [Point(xy) for xy in zip(new_load_point_gdf['X'], new_load_point_gdf['Y'])]
    new_load_point_gdf.drop(['N', 'X', 'Y'], axis = 1, inplace = True)
    print(new_load_point_gdf.shape)
    
    #centroid coordinates
    new_centroid_gdf = new_load_point_gdf.copy()[['c', 'geometry_c']]
    new_centroid_gdf.rename(columns = {'c' : 'osm_node_id', 'geometry_c' : 'geometry'}, inplace = True)
    new_centroid_gdf.drop_duplicates(['osm_node_id'], inplace = True)

    new_centroid_gdf = gpd.GeoDataFrame(new_centroid_gdf)
    
    #inbound cc
    new_cc_gdf = new_load_point_gdf.copy()
    new_cc_gdf['geometry'] = [LineString(xy) for xy in zip(new_cc_gdf['geometry_ld'], new_cc_gdf['geometry_c'])]

    new_cc_gdf["fromIntersectionId"] = new_cc_gdf['shst_node_id']
    new_cc_gdf["shstGeometryId"] = range(1, 1+len(new_cc_gdf))
    new_cc_gdf["shstGeometryId"] = new_cc_gdf["shstGeometryId"].apply(lambda x: "cc" + str(x))
    new_cc_gdf["id"] = new_cc_gdf["shstGeometryId"]
    
    new_cc_gdf = new_cc_gdf.rename(columns = {'osm_node_id' : 'u', 'c' : 'v'})
    
    #remove duplicates
    new_cc_gdf.drop_duplicates(['u', 'v'], inplace = True)
    
    new_cc_gdf.crs = {'init' : 'epsg:26915'}
    new_cc_gdf = new_cc_gdf.to_crs(epsg = 4326)
    new_centroid_gdf.crs = {'init' : 'epsg:26915'}
    new_centroid_gdf = new_centroid_gdf.to_crs(epsg = 4326)
    
    return new_cc_gdf, new_centroid_gdf

In [98]:
link_df.roadway.unique()

array(['residential', 'primary', 'service', 'footway', 'cycleway',
       'secondary', 'tertiary', 'motorway_link', 'trunk',
       'secondary_link', 'trunk_link', 'primary_link', 'motorway',
       'tertiary_link', ''], dtype=object)

In [187]:
highway_links_df = link_gdf[link_gdf.roadway.isin(["motorway_link", "motorway", "trunk", "truck_link"])]

drive_node_gdf = node_gdf[(node_gdf.drive_access == 1) & ~(node_gdf.osm_node_id.isin(highway_links_df.u.tolist() + 
                                                                                 highway_links_df.v.tolist()))]

new_drive_cc_gdf, new_drive_centroid_gdf = generate_centroid_connectors('drive',
                                                                        taz_connectors_gdf,
                                                                        drive_node_gdf, 
                                                                        sf_existing_network_node_gdf)

(4457, 5)


In [188]:
taz_connectors_gdf.c.nunique()
new_drive_cc_gdf

Unnamed: 0,u,shst_node_id,geometry_ld,v,geometry_c,geometry,fromIntersectionId,shstGeometryId,id
0,65292682,451ee457939515a4ccddc83c971b3b14,POINT (-2116720.085494713 4613508.869571591),1,POINT (-2116430.484597242 4613514.384098412),"LINESTRING (-122.4263994 37.76963230000002, -1...",451ee457939515a4ccddc83c971b3b14,cc1,cc1
1,5437876960,0166709338db50c787ff1f0cbde03108,POINT (-2116335.318325921 4613451.919851268),1,POINT (-2116430.484597242 4613514.384098412),"LINESTRING (-122.4224009 37.7702293, -122.4235...",0166709338db50c787ff1f0cbde03108,cc2,cc2
2,65292019,47607b5b93cfe3112c30ca77905ed17c,POINT (-2116776.151573973 4613447.413254976),1,POINT (-2116430.484597242 4613514.384098412),"LINESTRING (-122.4267436 37.76899790000001, -1...",47607b5b93cfe3112c30ca77905ed17c,cc3,cc3
3,65292030,703354740cb1c8b47b18a7692081b3de,POINT (-2116565.645231673 4613394.907671681),1,POINT (-2116430.484597242 4613514.384098412),"LINESTRING (-122.4244831 37.76915699999999, -1...",703354740cb1c8b47b18a7692081b3de,cc4,cc4
4,4087590310,f59e05dcd867cb660a9dfd24bedaf2ad,POINT (-2116735.490195741 4613483.039310105),1,POINT (-2116430.484597242 4613514.384098412),"LINESTRING (-122.4264634 37.76938780000001, -1...",f59e05dcd867cb660a9dfd24bedaf2ad,cc5,cc5
5,378431868,e7e0b5fba6a4d11dd2407eb28bf4e90f,POINT (-2116405.388054292 4613642.497189956),1,POINT (-2116430.484597242 4613514.384098412),"LINESTRING (-122.423745 37.7715348, -122.42355...",e7e0b5fba6a4d11dd2407eb28bf4e90f,cc6,cc6
6,295083423,0ed7b96215de420ad456a1355e3350d8,POINT (-2116574.605524845 4613393.596458437),1,POINT (-2116430.484597242 4613514.384098412),"LINESTRING (-122.4245672 37.7691224, -122.4235...",0ed7b96215de420ad456a1355e3350d8,cc7,cc7
7,65317380,9129829c684c100b57453fe8890019f2,POINT (-2116140.768206474 4612536.371583758),2,POINT (-2116216.80485146 4612656.451692463),"LINESTRING (-122.4173491 37.76357180000001, -1...",9129829c684c100b57453fe8890019f2,cc8,cc8
8,65317391,02e90282870c5ce878d24aa2fba84dd5,POINT (-2116548.02061476 4612643.149954448),2,POINT (-2116216.80485146 4612656.451692463),"LINESTRING (-122.4217399 37.7633052, -122.4185...",02e90282870c5ce878d24aa2fba84dd5,cc9,cc9
9,65319662,50afb80dfbd5248549c8ba4d24f043ad,POINT (-2116448.231241219 4613020.846910714),2,POINT (-2116216.80485146 4612656.451692463),"LINESTRING (-122.422044 37.76654, -122.4185110...",50afb80dfbd5248549c8ba4d24f043ad,cc10,cc10


In [189]:
new_drive_centroid_gdf

Unnamed: 0,osm_node_id,geometry
0,1,POINT (-122.4235552319322 37.77046131552276)
7,2,POINT (-122.4185110422301 37.76430798000693)
15,3,POINT (-122.4243767913186 37.76776767416839)
23,4,POINT (-122.3723843343677 37.82546378519586)
26,5,POINT (-122.427595112977 37.76654100560773)
34,6,POINT (-122.4277080104422 37.76375177817376)
42,7,POINT (-122.4303984160497 37.76540237343509)
50,8,POINT (-122.4361556014382 37.76163121723891)
58,9,POINT (-122.4359169188192 37.75922606378389)
66,10,POINT (-122.4379402266788 37.75731763327692)


In [190]:
walk_node_gdf = node_gdf[(node_gdf.walk_access == 1) & (node_gdf.transit_access != 1)]

new_walk_cc_gdf, new_walk_centroid_gdf = generate_centroid_connectors('walk',
                                                                        taz_connectors_gdf,
                                                                        walk_node_gdf, 
                                                                        taz_node_gdf)

(629, 5)


In [191]:
new_walk_centroid_gdf
new_walk_cc_gdf

Unnamed: 0,u,shst_node_id,geometry_ld,v,geometry_c,geometry,fromIntersectionId,shstGeometryId,id
0,65292400,d75a8bddcc8ec179834e375b25efb5d3,POINT (-2116454.234076546 4613548.052463458),1,POINT (-2116430.484597242 4613514.384098412),"LINESTRING (-122.4239051 37.77066110000001, -1...",d75a8bddcc8ec179834e375b25efb5d3,cc1,cc1
1,5438558049,98baedcca52b7987071a37b43c7f7675,POINT (-2116293.069873171 4612614.629245179),2,POINT (-2116216.80485146 4612656.451692463),"LINESTRING (-122.4191221 37.7637729, -122.4185...",98baedcca52b7987071a37b43c7f7675,cc2,cc2
2,258968240,277bfcb2225d7dbfe9d55357c7b944fb,POINT (-2116599.839772934 4613261.50107494),3,POINT (-2116610.709787361 4613233.449967336),"LINESTRING (-122.4243652 37.7680173, -122.4243...",277bfcb2225d7dbfe9d55357c7b944fb,cc3,cc3
3,4904249926,62329e3f673a7b37f27cd1794f4e5cba,POINT (-2109656.463611458 4618186.282842863),4,POINT (-2109646.490261332 4618177.028216498),"LINESTRING (-122.3725147 37.8255094, -122.3723...",62329e3f673a7b37f27cd1794f4e5cba,cc4,cc4
4,65366224,7f1c6b4b5f88a6b34d1da588a22a4579,POINT (-2116973.21991975 4613159.215980857),5,POINT (-2116949.780112024 4613194.362518021),"LINESTRING (-122.4277067 37.7662016, -122.4275...",7f1c6b4b5f88a6b34d1da588a22a4579,cc5,cc5
5,65329643,33a16272b7a91eee7d7b8c9422024715,POINT (-2117025.943614534 4612872.235544078),6,POINT (-2117069.746275438 4612880.405780473),"LINESTRING (-122.4272471 37.76380649999999, -1...",33a16272b7a91eee7d7b8c9422024715,cc6,cc6
6,1563088532,bfc1d33cb5605403befce7113bd5160f,POINT (-2117307.161061312 4613195.162663283),7,POINT (-2117247.897178257 4613152.340489607),"LINESTRING (-122.4311306 37.76557759999999, -1...",bfc1d33cb5605403befce7113bd5160f,cc7,cc7
7,5438557788,1199a3be4118a5f17ca7c0164c6ff446,POINT (-2117934.076462286 4612850.829645032),8,POINT (-2117916.577099858 4612902.9825645),"LINESTRING (-122.4361503 37.76117450000001, -1...",1199a3be4118a5f17ca7c0164c6ff446,cc8,cc8
8,65293657,b0a66c68eaf6ced4ed0b7c690bd97493,POINT (-2117994.09663468 4612624.80844157),9,POINT (-2117989.68301905 4612621.748287487),"LINESTRING (-122.435971 37.7592381, -122.43591...",b0a66c68eaf6ced4ed0b7c690bd97493,cc9,cc9
9,65312123,ad69d54806fef32d5c4378d8ee1b4803,POINT (-2118243.603348401 4612488.678682479),10,POINT (-2118247.658274216 4612467.744353986),"LINESTRING (-122.4379717 37.7574929, -122.4379...",ad69d54806fef32d5c4378d8ee1b4803,cc10,cc10


In [192]:
bike_node_gdf = node_gdf[(node_gdf.bike_access == 1)]

new_bike_cc_gdf, new_bike_centroid_gdf = generate_centroid_connectors('bike',
                                                                        taz_connectors_gdf,
                                                                        bike_node_gdf, 
                                                                        taz_node_gdf)

(629, 5)


In [193]:
new_bike_centroid_gdf

Unnamed: 0,osm_node_id,geometry
0,1,POINT (-122.4235552319322 37.77046131552276)
1,2,POINT (-122.4185110422301 37.76430798000693)
2,3,POINT (-122.4243767913186 37.76776767416839)
3,4,POINT (-122.3723843343677 37.82546378519586)
4,5,POINT (-122.427595112977 37.76654100560773)
5,6,POINT (-122.4277080104422 37.76375177817376)
6,7,POINT (-122.4303984160497 37.76540237343509)
7,8,POINT (-122.4361556014382 37.76163121723891)
8,9,POINT (-122.4359169188192 37.75922606378389)
9,10,POINT (-122.4379402266788 37.75731763327692)


In [194]:
new_bike_cc_gdf

Unnamed: 0,u,shst_node_id,geometry_ld,v,geometry_c,geometry,fromIntersectionId,shstGeometryId,id
0,65292400,d75a8bddcc8ec179834e375b25efb5d3,POINT (-2116454.234076546 4613548.052463458),1,POINT (-2116430.484597242 4613514.384098412),"LINESTRING (-122.4239051 37.77066110000001, -1...",d75a8bddcc8ec179834e375b25efb5d3,cc1,cc1
1,5438558049,98baedcca52b7987071a37b43c7f7675,POINT (-2116293.069873171 4612614.629245179),2,POINT (-2116216.80485146 4612656.451692463),"LINESTRING (-122.4191221 37.7637729, -122.4185...",98baedcca52b7987071a37b43c7f7675,cc2,cc2
2,258968240,277bfcb2225d7dbfe9d55357c7b944fb,POINT (-2116599.839772934 4613261.50107494),3,POINT (-2116610.709787361 4613233.449967336),"LINESTRING (-122.4243652 37.7680173, -122.4243...",277bfcb2225d7dbfe9d55357c7b944fb,cc3,cc3
3,4904249926,62329e3f673a7b37f27cd1794f4e5cba,POINT (-2109656.463611458 4618186.282842863),4,POINT (-2109646.490261332 4618177.028216498),"LINESTRING (-122.3725147 37.8255094, -122.3723...",62329e3f673a7b37f27cd1794f4e5cba,cc4,cc4
4,65366224,7f1c6b4b5f88a6b34d1da588a22a4579,POINT (-2116973.21991975 4613159.215980857),5,POINT (-2116949.780112024 4613194.362518021),"LINESTRING (-122.4277067 37.7662016, -122.4275...",7f1c6b4b5f88a6b34d1da588a22a4579,cc5,cc5
5,65329643,33a16272b7a91eee7d7b8c9422024715,POINT (-2117025.943614534 4612872.235544078),6,POINT (-2117069.746275438 4612880.405780473),"LINESTRING (-122.4272471 37.76380649999999, -1...",33a16272b7a91eee7d7b8c9422024715,cc6,cc6
6,260494510,ccc6248ea42448c487a8eba61b6cc2c5,POINT (-2117290.648524378 4613214.098785088),7,POINT (-2117247.897178257 4613152.340489607),"LINESTRING (-122.4310321 37.765771, -122.43039...",ccc6248ea42448c487a8eba61b6cc2c5,cc7,cc7
7,5438557788,1199a3be4118a5f17ca7c0164c6ff446,POINT (-2117934.076462286 4612850.829645032),8,POINT (-2117916.577099858 4612902.9825645),"LINESTRING (-122.4361503 37.76117450000001, -1...",1199a3be4118a5f17ca7c0164c6ff446,cc8,cc8
8,65293657,b0a66c68eaf6ced4ed0b7c690bd97493,POINT (-2117994.09663468 4612624.80844157),9,POINT (-2117989.68301905 4612621.748287487),"LINESTRING (-122.435971 37.7592381, -122.43591...",b0a66c68eaf6ced4ed0b7c690bd97493,cc9,cc9
9,65312123,ad69d54806fef32d5c4378d8ee1b4803,POINT (-2118243.603348401 4612488.678682479),10,POINT (-2118247.658274216 4612467.744353986),"LINESTRING (-122.4379717 37.7574929, -122.4379...",ad69d54806fef32d5c4378d8ee1b4803,cc10,cc10


In [276]:
def consolidate_cc(new_drive_cc, new_walk_cc, new_bike_cc, link, drive_centroid, node):
    
    link_gdf = link.copy()
    node_gdf = node.copy()
    drive_centroid_gdf = drive_centroid.copy()
    new_drive_cc_gdf = new_drive_cc.copy()
    new_walk_cc_gdf = new_walk_cc.copy()
    new_bike_cc_gdf = new_bike_cc.copy()
    
    new_drive_cc_gdf["drive_access"] = int(1)
    new_walk_cc_gdf["walk_access"] = int(1)
    new_bike_cc_gdf["bike_access"] = int(1)
    
    new_cc_gdf = pd.concat([new_drive_cc_gdf,
                           new_walk_cc_gdf,
                           new_bike_cc_gdf],
                          sort = False,
                          ignore_index = True)
    
    new_cc_geometry_gdf = new_cc_gdf[["u", "v", "geometry", "fromIntersectionId"]]\
                                .drop_duplicates(subset = ["u", "v"]).copy()
    
    new_cc_geometry_gdf["shstGeometryId"] = range(1, 1 + len(new_cc_geometry_gdf))
    new_cc_geometry_gdf["shstGeometryId"] = new_cc_geometry_gdf["shstGeometryId"].apply(lambda x: "cc" + str(x))
    new_cc_geometry_gdf["id"] = new_cc_geometry_gdf["shstGeometryId"]
    
    unique_cc_gdf = new_cc_gdf.groupby(["u", "v"]).agg({"drive_access" : "max",
                                                    "walk_access" : "max",
                                                    "bike_access" : "max"}).reset_index()
    
    unique_cc_gdf = pd.merge(unique_cc_gdf,
                            new_cc_geometry_gdf,
                            how = "left",
                            on = ["u", "v"])
    
    # add the other direction
    cc_gdf = pd.concat([unique_cc_gdf,
                       unique_cc_gdf.rename(columns = {
                                            "u" : "v",
                                            "v" : "u"})],
                      ignore_index = True,
                      sort = False)
    
    cc_link_columns_list = ["u", "v", "drive_access", "walk_access", "bike_access", "shstGeometryId", "id"]
    cc_link_df = cc_gdf[cc_link_columns_list].copy()
    
    cc_shape_columns_list = ["id", "geometry", "fromIntersectionId"]
    cc_shape_gdf = cc_gdf[cc_shape_columns_list].drop_duplicates(subset = ["id"]).copy()
            
    return cc_link_df, cc_shape_gdf

In [277]:
cc_link_df, cc_shape_gdf = consolidate_cc(new_drive_cc_gdf, 
                                             new_walk_cc_gdf, 
                                             new_bike_cc_gdf, 
                                             link_gdf,
                                             new_drive_centroid_gdf,
                                             node_gdf)

In [278]:
cc_link_df.info()
cc_shape_gdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10104 entries, 0 to 10103
Data columns (total 7 columns):
u                 10104 non-null object
v                 10104 non-null object
drive_access      8856 non-null float64
walk_access       1258 non-null float64
bike_access       1258 non-null float64
shstGeometryId    10104 non-null object
id                10104 non-null object
dtypes: float64(3), object(4)
memory usage: 552.6+ KB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 5052 entries, 0 to 5051
Data columns (total 3 columns):
id                    5052 non-null object
geometry              5052 non-null object
fromIntersectionId    5052 non-null object
dtypes: object(3)
memory usage: 157.9+ KB


In [280]:
# covert networks to espg 26915 for nearest node operation

all_link_df = pd.concat([link_df,
                        cc_link_df],
                       sort = False,
                       ignore_index = True)

all_shape_gdf = pd.concat([shape_gdf,
                          cc_shape_gdf],
                         sort = False,
                         ignore_index = True)

all_node_gdf = pd.concat([node_gdf,
                         new_drive_centroid_gdf],
                        sort = False,
                        ignore_index = True)

In [236]:
def link_df_to_geojson(df, properties):
    """
    Author: Geoff Boeing:
    https://geoffboeing.com/2015/10/exporting-python-data-geojson/
    """
    geojson = {"type":"FeatureCollection", "features":[]}
    for _, row in df.iterrows():
        feature = {"type":"Feature",
                   "properties":{},
                   "geometry":{"type":"LineString",
                               "coordinates":[]}}
        feature["geometry"]["coordinates"] = [[x, y] for (x,y) in list(row["geometry"].coords)]
        for prop in properties:
            feature["properties"][prop] = row[prop]
        geojson["features"].append(feature)
    return geojson

def point_df_to_geojson(df: pd.DataFrame, properties: list):
    """
    Author: Geoff Boeing:
    https://geoffboeing.com/2015/10/exporting-python-data-geojson/
    """
    
    geojson = {"type": "FeatureCollection", "features": []}
    for _, row in df.iterrows():
        feature = {
            "type": "Feature",
            "properties": {},
            "geometry": {"type": "Point", "coordinates": []},
        }
        feature["geometry"]["coordinates"] = [row["geometry"].x, row["geometry"].y]
        for prop in properties:
            feature["properties"][prop] = row[prop]
        geojson["features"].append(feature)
    return geojson

def fill_na(df_na):
    """
    fill str NaN with ""
    fill numeric NaN with 0
    """
    df = df_na.copy()
    num_col = list(df.select_dtypes([np.number]).columns)
    print("numeric columns: ", num_col)
    object_col = list(df.select_dtypes(['object']).columns)
    print("str columns: ", object_col)
    
    for x in list(df.columns):
        if x in num_col:
            df[x].fillna(0, inplace = True)
        elif x in object_col:
            df[x].fillna("", inplace = True)
    
    return df

In [281]:
all_shape_gdf = fill_na(all_shape_gdf)

int_col = ["bike_access", "walk_access", "drive_access", "transit_access", "LANES"]
for c in int_col:
    all_link_df[c] = all_link_df[c].fillna(0).astype(np.int64)
    
all_link_df = fill_na(all_link_df)

int_col = ["bike_access", "walk_access", "drive_access", "transit_access"]
for c in int_col:
    all_node_gdf[c] = all_node_gdf[c].fillna(0).astype(np.int64)
    
all_node_gdf = fill_na(all_node_gdf)

numeric columns:  []
str columns:  ['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId', 'geometry']
numeric columns:  ['LANES', 'bike_access', 'drive_access', 'length', 'rail_traveltime', 'tomtom_f_jnctid', 'tomtom_id', 'tomtom_t_jnctid', 'transit_access', 'walk_access']
str columns:  ['access', 'area', 'bridge', 'est_width', 'fromIntersectionId', 'highway', 'id', 'junction', 'key', 'landuse', 'lanes', 'link', 'maxspeed', 'name', 'nodeIds', 'oneWay', 'ref', 'roadway', 'roundabout', 'service', 'shstGeometryId', 'shstReferenceId', 'toIntersectionId', 'tunnel', 'u', 'v', 'wayId', 'width']
numeric columns:  ['osm_node_id', 'drive_access', 'walk_access', 'bike_access', 'transit_access']
str columns:  ['shst_node_id', 'geometry']


In [282]:
all_link_df.info()
all_node_gdf.info()
all_shape_gdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84986 entries, 0 to 84985
Data columns (total 38 columns):
LANES                 84986 non-null int64
access                84986 non-null object
area                  84986 non-null object
bike_access           84986 non-null int64
bridge                84986 non-null object
drive_access          84986 non-null int64
est_width             84986 non-null object
fromIntersectionId    84986 non-null object
highway               84986 non-null object
id                    84986 non-null object
junction              84986 non-null object
key                   84986 non-null object
landuse               84986 non-null object
lanes                 84986 non-null object
length                84986 non-null float64
link                  84986 non-null object
maxspeed              84986 non-null object
name                  84986 non-null object
nodeIds               84986 non-null object
oneWay                84986 non-null object
rail_travelti

In [239]:
%%time

print("-------write out link shape geojson---------")

shape_prop = ['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId']
shape_geojson = link_df_to_geojson(all_shape_gdf, shape_prop)

with open("../tests/networkstandard/step3_centroid_connector/sf_shape.geojson", "w") as f:
    json.dump(shape_geojson, f)

-------write out link shape geojson---------
Wall time: 18.5 s


In [283]:
%%time

# write out link variable json
# link unique handle "shstReferenceId" + "shstGeometryId"

print("-------write out link json---------")

link_prop = all_link_df.columns.tolist()

out = all_link_df[link_prop].to_json(orient = "records")

with open('../tests/networkstandard/step3_centroid_connector/sf_link.json', 'w') as f:
    f.write(out)

-------write out link json---------
Wall time: 2.25 s


In [241]:
%%time

print("-------write out node geojson---------")

node_prop = all_node_gdf.drop("geometry", axis = 1).columns.tolist()
node_geojson = point_df_to_geojson(all_node_gdf, node_prop)

with open("../tests/networkstandard/step3_centroid_connector/sf_node.geojson", "w") as f:
    json.dump(node_geojson, f)

-------write out node geojson---------
Wall time: 7.02 s


46815

In [284]:
# write out QA/QC link shape

all_link_gdf = gpd.GeoDataFrame(pd.merge(all_link_df,
                       all_shape_gdf[["id", "geometry"]],
                       how = "left",
                       on = "id"))
all_link_gdf.crs = all_node_gdf.crs

print("-------write out link shape geojson---------")

link_shape_prop = all_link_gdf.drop("geometry", axis = 1).columns.tolist()
link_shape_geojson = link_df_to_geojson(all_link_gdf, link_shape_prop)

with open("../tests/networkstandard/step3_centroid_connector/sf_link_with_shape.geojson", "w") as f:
    json.dump(link_shape_geojson, f)

-------write out link shape geojson---------


In [286]:
shape_gdf.id.nunique()

41763

In [287]:
link_df.id.nunique()

41763

In [288]:
all_link_df.id.nunique()

46815

In [289]:
all_shape_gdf.id.nunique()

46815

In [291]:
all_link_gdf[all_link_gdf.id == "cc3030"]

Unnamed: 0,LANES,access,area,bike_access,bridge,drive_access,est_width,fromIntersectionId,highway,id,...,tomtom_id,tomtom_t_jnctid,transit_access,tunnel,u,v,walk_access,wayId,width,geometry
77186,0,,,0,,1,,,,cc3030,...,0.0,0.0,0,,65319956,425,0,,,"LINESTRING (-122.4153867 37.7893258, -122.4135..."
82238,0,,,0,,1,,,,cc3030,...,0.0,0.0,0,,425,65319956,0,,,"LINESTRING (-122.4153867 37.7893258, -122.4135..."
