# This notebook goes through building drive, walk, and bike centroids and centorid connectors

1. centroid nodes come from existing network
2. drive centroid connector built by finding the new non-freeway drive node closest to the existing network's drive loading point
3. bike and walk centroid connector built by finding the closest new walk and bike node to the centroid

In [3]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, shape, LineString
from scipy.spatial import cKDTree
import json

In [None]:
step2_output_folder = "../data/processed/step2_transit/"
existing_network_folder = "../data/external/existing_network/"
county_shape_folder = "../data/external/county/"
output_folder = "../data/processed/step3_centroid_connector/"

In [457]:
# read input sf county network standard and existing network

link_file = step2_output_folder + "sf_link.json"
with open(link_file) as f:
    link_json = json.load(f)
link_df = pd.DataFrame(link_json)

node_file = step2_output_folder + "sf_node.geojson"
node_gdf = gpd.read_file(node_file)

shape_gdf = gpd.read_file(step2_output_folder + "sf_shape.geojson")

existing_network_node_gdf = gpd.read_file(existing_network_folder + "network_nodes.shp")
existing_network_link_gdf = gpd.read_file(existing_network_folder + "network_links.shp")

In [88]:
# build centroid conenctors for SF county as an example

sf_boundry = gpd.read_file(county_shape_folder + "boundary_4.geojson")
sf_boundry.crs

existing_network_link_gdf = existing_network_link_gdf.to_crs(sf_boundry.crs)
existing_network_node_gdf = existing_network_node_gdf.to_crs(sf_boundry.crs)

sf_existing_network_link_gdf = existing_network_link_gdf[
                                        existing_network_link_gdf.within(sf_boundry.geometry.unary_union)
                                        ].copy()
sf_existing_network_node_gdf = existing_network_node_gdf[
                                        existing_network_node_gdf.within(sf_boundry.geometry.unary_union)
                                        ]

In [305]:
print(sf_existing_network_node_gdf.columns)
print(sf_existing_network_node_gdf.crs)

Index(['N', 'FAREZONE', 'X', 'Y', 'COUNTY', 'MODE', 'TYPE', 'ID', 'PNR_CAP',
       'PNR1', 'PNR_FEE1', 'PNR2', 'PNR_FEE2', 'PNR3', 'PNR_FEE3', 'PNR4',
       'PNR_FEE4', 'PNR5', 'PNR_FEE5', 'STOP', 'RTDMODE', 'TAZSEQ', 'MAZSEQ',
       'TAPSEQ', 'EXTSEQ', 'NEW_NODE', 'OLD_NODE', 'TEMP', 'TEMP2',
       'geometry'],
      dtype='object')
{'init': 'epsg:26915', 'no_defs': True}


In [91]:
sf_existing_network_link_gdf.CNTYPE.value_counts()

TANA     40411
PED      27161
MAZ      20008
TAZ       8938
TAP       7568
LRAIL      616
BIKE       112
FERRY       14
HRAIL       14
USE          4
CRAIL        4
Name: CNTYPE, dtype: int64

In [92]:
def reproject(link, node, epsg):
    """
    reporoject link and node geodataframes
    
    for nodes, update X and Y columns
    
    """
    
    link = link.to_crs(epsg = epsg)
    node = node.to_crs(epsg = epsg)
    
    node['X'] = node['geometry'].apply(lambda p: p.x)
    node['Y'] = node['geometry'].apply(lambda p: p.y)

    return link, node

In [458]:
# covert networks to espg 26915 for nearest node operation

link_gdf = pd.merge(link_df,
                   shape_gdf,
                   how = "left",
                   left_on = "shstGeometryId",
                   right_on = "id")
link_gdf = gpd.GeoDataFrame(link_gdf)
link_gdf.crs = shape_gdf.crs
print(link_gdf.info())

link_gdf, node_gdf = reproject(link_gdf, 
                                node_gdf, 
                                26915)

sf_existing_network_link_gdf, sf_existing_network_node_gdf = reproject(sf_existing_network_link_gdf,
                                                                        sf_existing_network_node_gdf,
                                                                        26915)

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 75497 entries, 0 to 75496
Data columns (total 46 columns):
A                       75497 non-null int64
B                       75497 non-null int64
LANES                   75497 non-null int64
access                  75497 non-null object
area                    75497 non-null object
bike_access             75497 non-null int64
bridge                  75497 non-null object
drive_access            75497 non-null int64
est_width               75497 non-null object
fromIntersectionId_x    75497 non-null object
highway                 75497 non-null object
id_x                    75497 non-null object
junction                75497 non-null object
key                     75497 non-null object
landuse                 75497 non-null object
lanes                   75497 non-null object
length                  75497 non-null float64
link                    75497 non-null object
maxspeed                75497 non-null object
name         

In [94]:
# existing taz level centroids
taz_node_gdf = sf_existing_network_node_gdf[sf_existing_network_node_gdf.TAZSEQ > 0].copy()

# existing taz level centroid connectors
taz_connectors_gdf = sf_existing_network_link_gdf[sf_existing_network_link_gdf.CNTYPE == "TAZ"].copy()

taz_connectors_gdf = taz_connectors_gdf[(taz_connectors_gdf.A.isin(taz_node_gdf.N.tolist())) | 
                                       (taz_connectors_gdf.B.isin(taz_node_gdf.N.tolist()))].copy()

# identify centroid (c) and non-centroid (non-c) for each centroid connector
taz_connectors_gdf["c"] = taz_connectors_gdf.apply(lambda x: x.A if x.A in taz_node_gdf.N.tolist() else x.B,
                                                  axis = 1)
taz_connectors_gdf["non_c"] = taz_connectors_gdf.apply(lambda x: x.B if x.A in taz_node_gdf.N.tolist() else x.A,
                                                      axis = 1)

In [309]:
taz_connectors_gdf

Unnamed: 0,A,B,F_JNCTID,T_JNCTID,FRC,NAME,FREEWAY,TOLLRD,ONEWAY,KPH,...,NUMLANES,USECLASS,TOLLBOOTH,DANGLING,HASTRANSIT,DELETE,TOLLSEG,geometry,c,non_c
0,1,1002463,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1002463
1,1,1002702,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1002702
2,1,1003078,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1003078
3,1,1004400,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1004400
4,1,1009833,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1009833
5,1,1011503,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1011503
6,1,1012865,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116430.484597242 4613514.3840984...,1,1012865
7,2,1007742,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116216.80485146 4612656.45169246...,2,1007742
8,2,1008103,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116216.80485146 4612656.45169246...,2,1008103
9,2,1011332,0,0,0,,0,,,0,...,0,0,0,0,0,0,0,LINESTRING (-2116216.80485146 4612656.45169246...,2,1011332


In [310]:
taz_node_gdf

Unnamed: 0,N,FAREZONE,X,Y,COUNTY,MODE,TYPE,ID,PNR_CAP,PNR1,...,RTDMODE,TAZSEQ,MAZSEQ,TAPSEQ,EXTSEQ,NEW_NODE,OLD_NODE,TEMP,TEMP2,geometry
0,1,0,-2.116430e+06,4.613514e+06,1,0,0,0,0,0,...,0,1,0,0,0,1,1,0,0,POINT (-2116430.484597242 4613514.384098412)
1,2,0,-2.116217e+06,4.612656e+06,1,0,0,0,0,0,...,0,2,0,0,0,2,2,0,0,POINT (-2116216.80485146 4612656.451692463)
2,3,0,-2.116611e+06,4.613233e+06,1,0,0,0,0,0,...,0,3,0,0,0,3,3,0,0,POINT (-2116610.709787361 4613233.449967336)
3,4,0,-2.109646e+06,4.618177e+06,1,0,0,0,0,0,...,0,4,0,0,0,4,4,0,0,POINT (-2109646.490261332 4618177.028216498)
4,5,0,-2.116950e+06,4.613194e+06,1,0,0,0,0,0,...,0,5,0,0,0,5,5,0,0,POINT (-2116949.780112024 4613194.362518021)
5,6,0,-2.117070e+06,4.612880e+06,1,0,0,0,0,0,...,0,6,0,0,0,6,6,0,0,POINT (-2117069.746275438 4612880.405780473)
6,7,0,-2.117248e+06,4.613152e+06,1,0,0,0,0,0,...,0,7,0,0,0,7,7,0,0,POINT (-2117247.897178257 4613152.340489607)
7,8,0,-2.117917e+06,4.612903e+06,1,0,0,0,0,0,...,0,8,0,0,0,8,8,0,0,POINT (-2117916.577099858 4612902.9825645)
8,9,0,-2.117990e+06,4.612622e+06,1,0,0,0,0,0,...,0,9,0,0,0,9,9,0,0,POINT (-2117989.68301905 4612621.748287487)
9,10,0,-2.118248e+06,4.612468e+06,1,0,0,0,0,0,...,0,10,0,0,0,10,10,0,0,POINT (-2118247.658274216 4612467.744353986)


In [371]:
def num_of_drive_loadpoint_per_centroid(existing_drive_cc_df, existing_node_gdf):
    """
    decide number of loading point for drive access per centroid
    
    logic: for drive, find the closest points to the existing loading point
    
    return: 
    dataframe
    for each existing drive loading point, number of new loading point needs to be generated. currently set to 1.
    
    """
    existing_pairs_of_centroid_loadpoint_df = existing_drive_cc_df.groupby(['c', 'non_c']).count().reset_index().drop(['A','B'], axis = 1)
    
    existing_num_of_loadpoint_per_c_df = existing_drive_cc_df.groupby(['c', 'non_c']).count().groupby('c').count()[['A']].rename(columns = {'A':'abm_num_load'}).reset_index()
    
    num_drive_loadpoint_new_near_old = pd.merge(existing_pairs_of_centroid_loadpoint_df,
                                                        existing_num_of_loadpoint_per_c_df,
                                                        how = 'left',
                                                        on = 'c')
    
    num_drive_loadpoint_new_near_old['osm_num_load'] = 1
    
    num_drive_loadpoint_new_near_old = pd.merge(num_drive_loadpoint_new_near_old,
                                                        existing_node_gdf[['N', 'X', 'Y']],
                                                        how = 'left',
                                                        left_on = 'non_c',
                                                        right_on = 'N')
    return num_drive_loadpoint_new_near_old


def num_of_walk_bike_loadpoint_per_centroid(existing_centroid_df):
    """
    decide number of loading point for walk and bike access per centroid
    
    logic: find 5 closest points to centroid
    
    return: 
    dataframe
    for each centroid, number of loading point needs to be generated.
    
    """
    
    num_loadpoint = existing_centroid_df[['N', 'X', 'Y']].copy()
    num_loadpoint['osm_num_load'] = np.int(1)
    num_loadpoint.rename(columns = {'N':'c'}, inplace = True)
    
    return num_loadpoint

In [375]:
def find_new_load_point(abm_load_ref_df, all_node):
    """
    find the loading points in osm nodes
    
    input: osm node, loading point reference input
    
    output:  dataframe of pairs of centroid and loading point, with point geometry of loading point
    
    works in epsg = 26915
    
    """
    
    all_node_gdf = all_node.copy()
    
    inventory_node_ref = all_node_gdf[["X", "Y"]].values
    tree = cKDTree(inventory_node_ref)
    
    new_load_point_gdf = gpd.GeoDataFrame()
    
    for i in range(len(abm_load_ref_df)):
        point = abm_load_ref_df.iloc[i][['X', 'Y']].values
        n_neigh = abm_load_ref_df.iloc[i]['osm_num_load']
        dd, ii = tree.query(point, k = n_neigh)
        if n_neigh == 1:
            add_gdf = gpd.GeoDataFrame(all_node_gdf[['osm_node_id', "shst_node_id", "model_node_id", 'geometry']].iloc[ii])\
                            .transpose().reset_index(drop = True)
        else:
            add_gdf = gpd.GeoDataFrame(all_node_gdf[['osm_node_id', "shst_node_id", "model_node_id", 'geometry']].iloc[ii])\
                            .reset_index(drop = True)
        add_gdf['c'] = int(abm_load_ref_df.iloc[i]['c'])
        if i == 0:
            new_load_point_gdf = add_gdf.copy()
             
        else:
            new_load_point_gdf = new_load_point_gdf.append(add_gdf, ignore_index=True, sort=False)
    
    return new_load_point_gdf.rename(columns = {'geometry' : 'geometry_ld'})

In [376]:
def generate_centroid_connectors(run_type, existing_drive_cc_df, node_gdf, existing_node_df):
    """
    calls function to generate loading point reference table, 
    and calls function to find loading points
    
    build linestring based on pairs of centroid and loading point
    
    return centroid connectors and centroids
    """
    
    if run_type == 'drive':
        abm_load_ref_df = num_of_drive_loadpoint_per_centroid(existing_drive_cc_df, existing_node_df)
    if (run_type == 'walk')|(run_type == 'bike'):
        abm_load_ref_df = num_of_walk_bike_loadpoint_per_centroid(existing_node_df)

    new_load_point_gdf = find_new_load_point(abm_load_ref_df, node_gdf)
    new_load_point_gdf = pd.merge(new_load_point_gdf,
                                 existing_node_df[['N', 'X', 'Y']],
                                 how = 'left', 
                                 left_on = 'c',
                                 right_on = 'N')
    new_load_point_gdf['geometry_c'] = [Point(xy) for xy in zip(new_load_point_gdf['X'], new_load_point_gdf['Y'])]
    new_load_point_gdf.drop(['N', 'X', 'Y'], axis = 1, inplace = True)
    
    #centroid coordinates
    new_centroid_gdf = new_load_point_gdf.copy()[['c', 'geometry_c']]
    new_centroid_gdf.rename(columns = {'c' : 'model_node_id', 'geometry_c' : 'geometry'}, inplace = True)
    new_centroid_gdf.drop_duplicates(['model_node_id'], inplace = True)

    new_centroid_gdf = gpd.GeoDataFrame(new_centroid_gdf)
    
    #inbound cc
    new_cc_gdf = new_load_point_gdf.copy()
    new_cc_gdf['geometry'] = [LineString(xy) for xy in zip(new_cc_gdf['geometry_ld'], new_cc_gdf['geometry_c'])]

    new_cc_gdf["fromIntersectionId"] = new_cc_gdf['shst_node_id']
    new_cc_gdf["shstGeometryId"] = range(1, 1+len(new_cc_gdf))
    new_cc_gdf["shstGeometryId"] = new_cc_gdf["shstGeometryId"].apply(lambda x: "cc" + str(x))
    new_cc_gdf["id"] = new_cc_gdf["shstGeometryId"]
    
    new_cc_gdf = new_cc_gdf.rename(columns = {'model_node_id' : 'A', 
                                              'c' : 'B',
                                             "osm_node_id" : "u"})
    
    #remove duplicates
    new_cc_gdf.drop_duplicates(['A', 'B'], inplace = True)
    
    new_cc_gdf.crs = {'init' : 'epsg:26915'}
    new_cc_gdf = new_cc_gdf.to_crs(epsg = 4326)
    new_centroid_gdf.crs = {'init' : 'epsg:26915'}
    new_centroid_gdf = new_centroid_gdf.to_crs(epsg = 4326)
    
    return new_cc_gdf, new_centroid_gdf

In [459]:
highway_links_df = link_gdf[link_gdf.roadway.isin(["motorway_link", "motorway", "trunk", "truck_link"])]

drive_node_gdf = node_gdf[(node_gdf.drive_access == 1) & ~(node_gdf.osm_node_id.isin(highway_links_df.u.tolist() + 
                                                                                 highway_links_df.v.tolist()))]

new_drive_cc_gdf, new_drive_centroid_gdf = generate_centroid_connectors('drive',
                                                                        taz_connectors_gdf,
                                                                        drive_node_gdf, 
                                                                        sf_existing_network_node_gdf)

In [460]:
drive_node_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 20725 entries, 0 to 27699
Data columns (total 10 columns):
osm_node_id       20725 non-null int64
shst_node_id      20725 non-null object
drive_access      20725 non-null int64
walk_access       20725 non-null int64
bike_access       20725 non-null int64
model_node_id     20725 non-null int64
transit_access    20725 non-null int64
geometry          20725 non-null object
X                 20725 non-null float64
Y                 20725 non-null float64
dtypes: float64(2), int64(6), object(2)
memory usage: 1.7+ MB


In [461]:
new_drive_cc_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 4428 entries, 0 to 4456
Data columns (total 10 columns):
u                     4428 non-null object
shst_node_id          4428 non-null object
A                     4428 non-null object
geometry_ld           4428 non-null object
B                     4428 non-null int64
geometry_c            4428 non-null object
geometry              4428 non-null object
fromIntersectionId    4428 non-null object
shstGeometryId        4428 non-null object
id                    4428 non-null object
dtypes: int64(1), object(9)
memory usage: 380.5+ KB


In [462]:
new_drive_centroid_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 629 entries, 0 to 4453
Data columns (total 2 columns):
model_node_id    629 non-null int64
geometry         629 non-null object
dtypes: int64(1), object(1)
memory usage: 14.7+ KB


In [463]:
walk_node_gdf = node_gdf[(node_gdf.walk_access == 1) & (node_gdf.transit_access != 1)]

new_walk_cc_gdf, new_walk_centroid_gdf = generate_centroid_connectors('walk',
                                                                        taz_connectors_gdf,
                                                                        walk_node_gdf, 
                                                                        taz_node_gdf)

In [464]:
new_walk_centroid_gdf
new_walk_cc_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 629 entries, 0 to 628
Data columns (total 10 columns):
u                     629 non-null object
shst_node_id          629 non-null object
A                     629 non-null object
geometry_ld           629 non-null object
B                     629 non-null int64
geometry_c            629 non-null object
geometry              629 non-null object
fromIntersectionId    629 non-null object
shstGeometryId        629 non-null object
id                    629 non-null object
dtypes: int64(1), object(9)
memory usage: 54.1+ KB


In [465]:
bike_node_gdf = node_gdf[(node_gdf.bike_access == 1)]

new_bike_cc_gdf, new_bike_centroid_gdf = generate_centroid_connectors('bike',
                                                                        taz_connectors_gdf,
                                                                        bike_node_gdf, 
                                                                        taz_node_gdf)

In [466]:
new_bike_centroid_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 629 entries, 0 to 628
Data columns (total 2 columns):
model_node_id    629 non-null int64
geometry         629 non-null object
dtypes: int64(1), object(1)
memory usage: 14.7+ KB


In [467]:
new_bike_cc_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 629 entries, 0 to 628
Data columns (total 10 columns):
u                     629 non-null object
shst_node_id          629 non-null object
A                     629 non-null object
geometry_ld           629 non-null object
B                     629 non-null int64
geometry_c            629 non-null object
geometry              629 non-null object
fromIntersectionId    629 non-null object
shstGeometryId        629 non-null object
id                    629 non-null object
dtypes: int64(1), object(9)
memory usage: 54.1+ KB


In [468]:
def consolidate_cc(new_drive_cc, new_walk_cc, new_bike_cc, link, drive_centroid, node):
    
    link_gdf = link.copy()
    node_gdf = node.copy()
    drive_centroid_gdf = drive_centroid.copy()
    new_drive_cc_gdf = new_drive_cc.copy()
    new_walk_cc_gdf = new_walk_cc.copy()
    new_bike_cc_gdf = new_bike_cc.copy()
    
    new_drive_cc_gdf["drive_access"] = int(1)
    new_walk_cc_gdf["walk_access"] = int(1)
    new_bike_cc_gdf["bike_access"] = int(1)
    
    new_cc_gdf = pd.concat([new_drive_cc_gdf,
                           new_walk_cc_gdf,
                           new_bike_cc_gdf],
                          sort = False,
                          ignore_index = True)
    
    new_cc_gdf["u"] = new_cc_gdf["u"].astype(np.int64)
    new_cc_gdf["A"] = new_cc_gdf["A"].astype(np.int64)
    
    new_cc_geometry_gdf = new_cc_gdf[["A", "B", "geometry", "fromIntersectionId", "u"]]\
                                .drop_duplicates(subset = ["A", "B"]).copy()
    
    new_cc_geometry_gdf["shstGeometryId"] = range(1, 1 + len(new_cc_geometry_gdf))
    new_cc_geometry_gdf["shstGeometryId"] = new_cc_geometry_gdf["shstGeometryId"].apply(lambda x: "cc" + str(x))
    new_cc_geometry_gdf["id"] = new_cc_geometry_gdf["shstGeometryId"]
    
    unique_cc_gdf = new_cc_gdf.groupby(["A", "B"]).agg({"drive_access" : "max",
                                                    "walk_access" : "max",
                                                    "bike_access" : "max"}).reset_index()
    
    unique_cc_gdf = pd.merge(unique_cc_gdf,
                            new_cc_geometry_gdf,
                            how = "left",
                            on = ["A", "B"])
    
    # add the other direction
    cc_gdf = pd.concat([unique_cc_gdf,
                       unique_cc_gdf.rename(columns = {
                                            "A" : "B",
                                            "B" : "A",
                                            "u" : "v",
                                            "fromIntersectionId" : "toIntersectionId"})],
                      ignore_index = True,
                      sort = False)
    
    cc_link_columns_list = ["A", "B", "drive_access", "walk_access", "bike_access", 
                            "shstGeometryId", "id", "u", "v", "fromIntersectionId", "toIntersectionId"]
    cc_link_df = cc_gdf[cc_link_columns_list].copy()
    
    cc_shape_columns_list = ["id", "geometry", "fromIntersectionId", "toIntersectionId"]
    cc_shape_gdf = cc_gdf[cc_shape_columns_list].drop_duplicates(subset = ["id"]).copy()
            
    return cc_link_df, cc_shape_gdf

In [469]:
cc_link_df, cc_shape_gdf = consolidate_cc(new_drive_cc_gdf, 
                                             new_walk_cc_gdf, 
                                             new_bike_cc_gdf, 
                                             link_gdf,
                                             new_drive_centroid_gdf,
                                             node_gdf)

In [470]:
cc_link_df.info()
cc_shape_gdf.info()
new_drive_centroid_gdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10104 entries, 0 to 10103
Data columns (total 11 columns):
A                     10104 non-null int64
B                     10104 non-null int64
drive_access          8856 non-null float64
walk_access           1258 non-null float64
bike_access           1258 non-null float64
shstGeometryId        10104 non-null object
id                    10104 non-null object
u                     5052 non-null float64
v                     5052 non-null float64
fromIntersectionId    5052 non-null object
toIntersectionId      5052 non-null object
dtypes: float64(5), int64(2), object(4)
memory usage: 868.4+ KB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 5052 entries, 0 to 5051
Data columns (total 4 columns):
id                    5052 non-null object
geometry              5052 non-null object
fromIntersectionId    5052 non-null object
toIntersectionId      0 non-null object
dtypes: object(4)
memory usage: 197.3+ KB
<class 'geopandas.geodataframe

In [471]:
# concat centroid and centroid connectors to network

all_link_df = pd.concat([link_df,
                        cc_link_df],
                       sort = False,
                       ignore_index = True)

all_shape_gdf = pd.concat([shape_gdf,
                          cc_shape_gdf],
                         sort = False,
                         ignore_index = True)

new_drive_centroid_gdf["drive_access"] = 1
new_drive_centroid_gdf['walk_access'] = 1
new_drive_centroid_gdf['bike_access'] = 1

node_gdf = node_gdf.to_crs(epsg = 4326)

all_node_gdf = pd.concat([node_gdf,
                         new_drive_centroid_gdf],
                        sort = False,
                        ignore_index = True)

all_node_gdf.drop(["X", "Y"], axis = 1, inplace = True)

In [472]:
def link_df_to_geojson(df, properties):
    """
    Author: Geoff Boeing:
    https://geoffboeing.com/2015/10/exporting-python-data-geojson/
    """
    geojson = {"type":"FeatureCollection", "features":[]}
    for _, row in df.iterrows():
        feature = {"type":"Feature",
                   "properties":{},
                   "geometry":{"type":"LineString",
                               "coordinates":[]}}
        feature["geometry"]["coordinates"] = [[x, y] for (x,y) in list(row["geometry"].coords)]
        for prop in properties:
            feature["properties"][prop] = row[prop]
        geojson["features"].append(feature)
    return geojson

def point_df_to_geojson(df: pd.DataFrame, properties: list):
    """
    Author: Geoff Boeing:
    https://geoffboeing.com/2015/10/exporting-python-data-geojson/
    """
    
    geojson = {"type": "FeatureCollection", "features": []}
    for _, row in df.iterrows():
        feature = {
            "type": "Feature",
            "properties": {},
            "geometry": {"type": "Point", "coordinates": []},
        }
        feature["geometry"]["coordinates"] = [row["geometry"].x, row["geometry"].y]
        for prop in properties:
            feature["properties"][prop] = row[prop]
        geojson["features"].append(feature)
    return geojson

def fill_na(df_na):
    """
    fill str NaN with ""
    fill numeric NaN with 0
    """
    df = df_na.copy()
    num_col = list(df.select_dtypes([np.number]).columns)
    print("numeric columns: ", num_col)
    object_col = list(df.select_dtypes(['object']).columns)
    print("str columns: ", object_col)
    
    for x in list(df.columns):
        if x in num_col:
            df[x].fillna(0, inplace = True)
        elif x in object_col:
            df[x].fillna("", inplace = True)
    
    return df

In [473]:
node_gdf[node_gdf.osm_node_id==5475039752]

Unnamed: 0,osm_node_id,shst_node_id,drive_access,walk_access,bike_access,model_node_id,transit_access,geometry,X,Y
26310,5475039752,4a0b7c9b7166aa9d642dc4821d2b6d47,1,1,1,1026310,0,POINT (-122.3662269 37.82226110000001),-2109217.0,4617621.0


In [474]:
all_shape_gdf = fill_na(all_shape_gdf)

int_col = ["bike_access", "walk_access", "drive_access", "transit_access", "LANES", "A", "B", "u", "v"]
for c in int_col:
    all_link_df[c] = all_link_df[c].fillna(0).astype(np.int64)
    
all_link_df = fill_na(all_link_df)

int_col = ["bike_access", "walk_access", "drive_access", "transit_access", "osm_node_id"]
for c in int_col:
    all_node_gdf[c] = all_node_gdf[c].fillna(0).astype(np.int64)
    
all_node_gdf = fill_na(all_node_gdf)

numeric columns:  []
str columns:  ['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId', 'geometry']
numeric columns:  ['A', 'B', 'LANES', 'bike_access', 'drive_access', 'length', 'rail_traveltime', 'tomtom_f_jnctid', 'tomtom_id', 'tomtom_t_jnctid', 'transit_access', 'u', 'v', 'walk_access']
str columns:  ['access', 'area', 'bridge', 'est_width', 'fromIntersectionId', 'highway', 'id', 'junction', 'key', 'landuse', 'lanes', 'link', 'maxspeed', 'name', 'nodeIds', 'oneWay', 'ref', 'roadway', 'roundabout', 'service', 'shstGeometryId', 'shstReferenceId', 'toIntersectionId', 'tunnel', 'wayId', 'width']
numeric columns:  ['osm_node_id', 'drive_access', 'walk_access', 'bike_access', 'model_node_id', 'transit_access']
str columns:  ['shst_node_id', 'geometry']


In [475]:
all_link_df.info()
all_node_gdf.info()
all_shape_gdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85601 entries, 0 to 85600
Data columns (total 40 columns):
A                     85601 non-null int64
B                     85601 non-null int64
LANES                 85601 non-null int64
access                85601 non-null object
area                  85601 non-null object
bike_access           85601 non-null int64
bridge                85601 non-null object
drive_access          85601 non-null int64
est_width             85601 non-null object
fromIntersectionId    85601 non-null object
highway               85601 non-null object
id                    85601 non-null object
junction              85601 non-null object
key                   85601 non-null object
landuse               85601 non-null object
lanes                 85601 non-null object
length                85601 non-null float64
link                  85601 non-null object
maxspeed              85601 non-null object
name                  85601 non-null object
nodeIds        

In [476]:
%%time

print("-------write out link shape geojson---------")

shape_prop = ['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId']
shape_geojson = link_df_to_geojson(all_shape_gdf, shape_prop)

with open(output_folder + "sf_shape.geojson", "w") as f:
    json.dump(shape_geojson, f)

-------write out link shape geojson---------
Wall time: 11.6 s


In [477]:
%%time

# write out link variable json
# link unique handle "shstReferenceId" + "shstGeometryId"

print("-------write out link json---------")

link_prop = all_link_df.columns.tolist()

out = all_link_df[link_prop].to_json(orient = "records")

with open(output_folder + "sf_link.json", 'w') as f:
    f.write(out)

-------write out link json---------
Wall time: 2.4 s


In [478]:
%%time

print("-------write out node geojson---------")

node_prop = all_node_gdf.drop("geometry", axis = 1).columns.tolist()
node_geojson = point_df_to_geojson(all_node_gdf, node_prop)

with open(output_folder + "sf_node.geojson", "w") as f:
    json.dump(node_geojson, f)

-------write out node geojson---------
Wall time: 6.67 s


In [479]:
all_node_gdf

Unnamed: 0,osm_node_id,shst_node_id,drive_access,walk_access,bike_access,model_node_id,transit_access,geometry
0,65308539,55d370526a55a5d348e23751aad86ac0,1,1,1,1000000,0,POINT (-122.4888978 37.77797810000001)
1,65344363,5c7c469988248e3572b82b39c894dfd6,1,1,1,1000001,0,POINT (-122.4035822 37.73440549999999)
2,4034014436,228e65c325522cb3d826c5afd828abcf,1,1,1,1000002,0,POINT (-122.4644317 37.76607470000001)
3,65288594,7eaf8516bbeb338c7fc1a89ec32accd2,1,1,1,1000003,0,POINT (-122.4231171 37.73735660000001)
4,5760414283,130a1e4c1d3a355511c7b01e24100273,1,1,1,1000004,0,POINT (-122.4495758 37.7983547)
5,5443128405,798cae7a1d2ac00f3460b6f1452ee01d,1,1,1,1000005,0,POINT (-122.3999294 37.72571270000001)
6,3065529300,2ceb892d1ee28b7a727941abe880a46f,0,1,1,1000006,0,POINT (-122.4718894 37.75600090000002)
7,315414051,efcddacf1e270c9f18e6142c588d66d9,0,1,1,1000007,0,POINT (-122.4910875 37.72551959999999)
8,295218284,da5017cf77a0ef3513b0a1dbfe6d0534,1,1,1,1000008,0,POINT (-122.4225309 37.74794210000001)
9,65305990,d86f4ace331185501ff11866cb3349df,1,1,1,1000009,0,POINT (-122.4089677 37.74563609999999)


In [480]:
# write out QA/QC link shape

all_link_gdf = gpd.GeoDataFrame(pd.merge(all_link_df,
                       all_shape_gdf[["id", "geometry"]],
                       how = "left",
                       on = "id"))
all_link_gdf.crs = all_node_gdf.crs

print("-------write out link shape geojson---------")

link_shape_prop = all_link_gdf.drop("geometry", axis = 1).columns.tolist()
link_shape_geojson = link_df_to_geojson(all_link_gdf, link_shape_prop)

with open(output_folder + "sf_link_with_shape.geojson", "w") as f:
    json.dump(link_shape_geojson, f)

-------write out link shape geojson---------


In [481]:
shape_gdf.id.nunique()

42149

In [482]:
link_df.id.nunique()

42149

In [483]:
all_link_df.id.nunique()

47201

In [484]:
all_shape_gdf.id.nunique()

47201

In [485]:
all_link_gdf[all_link_gdf.id == "cc3030"]

Unnamed: 0,A,B,LANES,access,area,bike_access,bridge,drive_access,est_width,fromIntersectionId,...,tomtom_id,tomtom_t_jnctid,transit_access,tunnel,u,v,walk_access,wayId,width,geometry
79387,1018475,425,0,,,0,,1,,5725cf4b71d807924f5397519dbb2c62,...,0.0,0.0,0,,65319956,0,0,,,"LINESTRING (-122.4153867 37.7893258, -122.4135..."
84439,425,1018475,0,,,0,,1,,,...,0.0,0.0,0,,0,65319956,0,,,"LINESTRING (-122.4153867 37.7893258, -122.4135..."


In [486]:
all_node_gdf[all_node_gdf.osm_node_id == 65362154]

Unnamed: 0,osm_node_id,shst_node_id,drive_access,walk_access,bike_access,model_node_id,transit_access,geometry
3369,65362154,bfba34d5b946b131ba470c4a2f5d9e5c,1,1,1,1003369,0,POINT (-122.4140702 37.7828026)
