# This notebook processes SHST extraction data to network standard, and conflates with OSM, TomTom

In [398]:
import pandas as pd
import geopandas as gpd
import numpy as np
import glob
import json
import geojson
from shapely.geometry import Point
import osmnx as ox
import networkx as nx

# methods

In [641]:
def extract_osm_link_from_shst_shape(x):
    """
    if len(x.get("metadata").get("osmMetadata").get("waySections")) > 1:
        link_df = pd.DataFrame()
        all_link_df = pd.DataFrame(x.get("metadata").get("osmMetadata").get("waySections"))
        '''
        link_df = pd.Series(data = {"nodeIds" : all_link_df.nodeIds.tolist(),
                               "wayId" : all_link_df.wayId.tolist(),
                               "roadClass" : all_link_df.roadClass.tolist(),
                               "oneWay" : all_link_df.oneWay.tolist(),
                               "name" : all_link_df.name.tolist()})
        link_df = pd.DataFrame(data = link_df)
        print(link_df)
        '''
        for c in all_link_df.columns.tolist():
            attr_list = all_link_df[c].tolist()
            final = []
            if c == "nodeIds":
                attr_list = [item for sublist in attr_list for item in sublist]
            link_df[c] = [attr_list] * 1
                
    else:    
        link_df = pd.DataFrame(x.get("metadata").get("osmMetadata").get("waySections"))
    """
    link_df = pd.DataFrame(x.get("metadata").get("osmMetadata").get("waySections"))
    link_df["geometryId"] = x.get("metadata").get("geometryId")
    
    shst_link_df_list.append(link_df)

"""
def extract_osm_link_from_shst_shape_pdconcat(x):
    
    link_df = pd.DataFrame(x.get("metadata").get("osmMetadata").get("waySections"))
    link_df["geometryId"] = x.get("metadata").get("geometryId")
    
    shst_link_df = pd.concat([shst_link_df, link_df], sort = False, ignore_index = True)
""" 
    
def osm_link_with_shst_info(link_df, shst_gdf):
    """
    get complete osm links with shst info
    1. two way shst to two osm links
    2. add u, v node
    
    Parameters
    --------------
    osm link from shst extraction
    
    return
    --------------
    complete osm link with shst info
    """
    osm_link_gdf = pd.merge(link_df, 
                            shst_gdf.drop(["roadClass", "metadata", "source"], axis = 1),
                           how = "left",
                           left_on = "geometryId",
                           right_on = "id")
    
    return osm_link_gdf
    
    
def add_two_way_osm(link_gdf, osmnx_link):
    """
    for osm with oneway = False, add the reverse direction to complete
    
    Parameters
    ------------
    osm link from shst extraction, plus shst info
    
    return
    ------------
    complete osm link
    """
    osm_link_gdf = link_gdf.copy()
    osm_link_gdf["wayId"] = osm_link_gdf["wayId"].astype(int)
    osm_link_gdf.drop("name", axis = 1, inplace = True)
    
    osmnx_link_gdf = osmnx_link.copy()
    
    """
    osmnx_link_gdf.rename(columns = {"u" : "u_for_osm_join",
                                     "v" : "v_for_osm_join"},
                         inplace = True)
    """
    osmnx_link_gdf.drop_duplicates(subset = ["osmid"], inplace = True)
    osmnx_link_gdf.drop(["length", "u", "v", "geometry"], axis = 1, inplace = True)
    
    print("shst extraction has geometry: ", osm_link_gdf.id.nunique())
    print("osm links from shst extraction: ", osm_link_gdf.shape[0])
    
    osm_link_gdf["u"] = osm_link_gdf.nodeIds.apply(lambda x: int(x[0]))
    osm_link_gdf["v"] = osm_link_gdf.nodeIds.apply(lambda x: int(x[-1]))
    
    print("---joining osm shst with osmnx data---")
    osm_link_gdf = pd.merge(osm_link_gdf,
                            osmnx_link_gdf,
                            left_on = ["wayId"],
                            right_on = ["osmid"],
                            how = "left")
    
    """
    #join on osmid, u, v
    osm_link_gdf["u_for_osm_join"] = osm_link_gdf.nodeIds.apply(lambda x: int(x[0]))
    osm_link_gdf["v_for_osm_join"] = osm_link_gdf.nodeIds.apply(lambda x: int(x[1]))
    
    """
    
    #osm_link_gdf["oneWay"] = osm_link_gdf.apply(lambda x: True if True in [x.oneWay, x.oneway] else x.oneWay,
     #                                          axis = 1)
    
    reverse_osm_link_gdf = osm_link_gdf[(osm_link_gdf.oneWay == False) & 
                                        (osm_link_gdf.forwardReferenceId != osm_link_gdf.backReferenceId) & 
                                        (osm_link_gdf.u != osm_link_gdf.v)].copy()
    
    print("which includes two way links:", reverse_osm_link_gdf.shape[0])
    print("and they are geometrys: ", reverse_osm_link_gdf.id.nunique())
    
    reverse_osm_link_gdf.rename(columns = {"u" : "v",
                                          "v" : "u",
                                           #"u_for_osm_join" : "v_for_osm_join",
                                           #"v_for_osm_join" : "u_for_osm_join",
                                          "forwardReferenceId" : "backReferenceId",
                                          "backReferenceId" : "forwardReferenceId",
                                          "fromIntersectionId" : "toIntersectionId",
                                          "toIntersectionId" : "fromIntersectionId"},
                               inplace = True)
    
    reverse_osm_link_gdf["reverse_out"] = 1
    
    osm_link_gdf = pd.concat([osm_link_gdf, reverse_osm_link_gdf],
                            sort = False,
                            ignore_index = True)
    
    osm_link_gdf.rename(columns = {"forwardReferenceId" : "shstReferenceId",
                                 "geometryId" : "shstGeometryId"},
                      inplace = True)
    
    osm_link_gdf.drop("backReferenceId",
                     axis = 1,
                     inplace = True)
    """
    # join with osmnx
    print("---joining osm shst with osmnx data---")
    #col_before_join = osm_link_gdf.columns.tolist()
    osm_link_gdf = pd.merge(osm_link_gdf,
                            osmnx_link_gdf,
                            left_on = ["wayId"],#, "u_for_osm_join", "v_for_osm_join"],
                            right_on = ["osmid"],#, "u_for_osm_join", "v_for_osm_join"],
                           how = "left")
    """
    """
    succ_osm_link_gdf = osm_link_gdf[osm_link_gdf.osmid.notnull()].copy()
    print("-----number of matched osm------- :", succ_osm_link_gdf.shape[0])
    
    fail_osm_link_gdf = osm_link_gdf[osm_link_gdf.osmid.isnull()].copy()
    fail_osm_link_gdf = fail_osm_link_gdf[col_before_join].copy()
    print("-----number of un-matched osm-------:", fail_osm_link_gdf.shape[0])
    
    fail_osm_link_gdf["u_for_osm_join"] = osm_link_gdf.nodeIds.apply(lambda x: int(x[-1]))
    fail_osm_link_gdf["v_for_osm_join"] = osm_link_gdf.nodeIds.apply(lambda x: int(x[-2]))
    
    fail_osm_link_gdf = pd.merge(fail_osm_link_gdf,
                            osmnx_link_gdf.drop("geometry", axis = 1),
                            left_on = ["wayId", "u_for_osm_join", "v_for_osm_join"],
                            right_on = ["osmid", "u_for_osm_join", "v_for_osm_join"],
                           how = "left")
    
    print("-----number of un-matched osm after rejoining-------:", fail_osm_link_gdf.shape[0])
    
    osm_link_gdf = pd.concat([succ_osm_link_gdf, fail_osm_link_gdf], ignore_index = True, sort = False)
    """
    
    
    print("after join, osm links from shst extraction: ", 
          len(osm_link_gdf), 
          " out of which there are ", 
          len(osm_link_gdf[osm_link_gdf.osmid.isnull()]), 
          " links that do not have osm info, due to shst extraction (default tile 181224) contains ", 
          osm_link_gdf[osm_link_gdf.osmid.isnull()].wayId.nunique(), 
          " osm ids that are not included in latest OSM extraction, e.g. private streets, closed streets.")
    print("after join, there are shst geometry # : ", osm_link_gdf.groupby(["shstReferenceId", "shstGeometryId"]).count().shape[0])
    
    return osm_link_gdf


def consolidate_osm_way_to_shst_link(osm_link):
    """
    if a shst link has more than one osm ways, aggregate info into one, e.g. series([1,2,3]) to cell value [1,2,3]
    
    Parameters
    ----------
    osm link with shst info
    
    return
    ----------
    shst link with osm info
    
    """
    osm_link_gdf = osm_link.copy()

    agg_dict = {"geometry" : lambda x: x.iloc[0],
                "u" : lambda x: x.iloc[0],
                "v" : lambda x: x.iloc[-1]}
    
    for c in ['link', 'nodeIds', 'oneWay', 'roadClass', 'roundabout', 'wayId', 'access', 'area', 'bridge',
              'est_width', 'highway', 'junction', 'key', 'landuse', 'lanes', 'maxspeed', 'name', 'oneway', 'ref', 'service', 
              'tunnel', 'width']:
        agg_dict.update({c : lambda x: list(x) if len(list(x)) > 1 else list(x)[0]})
    
    print("-----start aggregating osm segments to one shst link for forward links----------")
    forward_link_gdf = osm_link_gdf[osm_link_gdf.reverse_out == 0].copy()
    
    if len(forward_link_gdf) > 0:
        forward_link_gdf = forward_link_gdf.groupby(
                                        ["shstReferenceId", "id", "shstGeometryId", "fromIntersectionId", "toIntersectionId"]
                                        ).agg(agg_dict).reset_index()
        forward_link_gdf["forward"] = 1
    else:
        forward_link_gdf = None
    
    print("-----start aggregating osm segments to one shst link for backward links----------")
    
    backward_link_gdf = osm_link_gdf[osm_link_gdf.reverse_out==1].copy()
    
    if len(backward_link_gdf) > 0:
        agg_dict.update({"u" : lambda x: x.iloc[-1],
                     "v" : lambda x: x.iloc[0]})    

        backward_link_gdf = backward_link_gdf.groupby(
                                        ["shstReferenceId", "id", "shstGeometryId", "fromIntersectionId", "toIntersectionId"]
                                        ).agg(agg_dict).reset_index()
    else:
        backward_link_gdf = None
    
    shst_link_gdf = None
    
    if (forward_link_gdf is None):
        print("back")
        shst_link_gdf = backward_link_gdf
        
    if (backward_link_gdf is None):
        print("for")
        shst_link_gdf = forward_link_gdf
        
    if (forward_link_gdf is not None) and (backward_link_gdf is not None):
        print("all")
        shst_link_gdf = pd.concat([forward_link_gdf, backward_link_gdf],
                                  sort = False,
                                  ignore_index = True)
        
    shst_link_gdf = gpd.GeoDataFrame(shst_link_gdf,
                                    crs = {'init': 'epsg:4326'})
    
    return shst_link_gdf


def create_node_gdf(link_gdf):
    """
    create shst node gdf from shst geometry
    
    Paramters
    ---------
    link_gdf:  shst links with osm info
    
    return
    ---------
    shst nodes with osm info
    
    """
    print("-------start creating shst nodes--------")
    # geometry only matches for forward direction
    forward_link_gdf = link_gdf[link_gdf.forward == 1].copy()
    
    # create point geometry from shst linestring
    forward_link_gdf["u_point"] = forward_link_gdf.apply(lambda x: Point(list(x.geometry.coords)[0]), axis = 1)
    forward_link_gdf["v_point"] = forward_link_gdf.apply(lambda x: Point(list(x.geometry.coords)[-1]), axis = 1)
    
    # get from points
    point_gdf = forward_link_gdf[["u", "fromIntersectionId", "u_point"]].copy()
    
    point_gdf.rename(columns = {"u" : "osm_node_id",
                      "fromIntersectionId" : "shst_node_id",
                      "u_point" : "geometry"},
                    inplace = True)
    
    # append to points
    point_gdf = pd.concat([point_gdf, forward_link_gdf[["v", "toIntersectionId", "v_point"]].rename(columns = 
                     {"v" : "osm_node_id",
                      "toIntersectionId" : "shst_node_id",
                      "v_point" : "geometry"})],
                     sort = False,
                     ignore_index = True)
    
    # drop duplicates
    point_gdf.drop_duplicates(subset = ["osm_node_id", "shst_node_id"], inplace = True)
    
    point_gdf = gpd.GeoDataFrame(point_gdf,
                                 crs = {'init': 'epsg:4326'})
    
    return point_gdf



def link_df_to_geojson(df, properties):
    """
    Author: Geoff Boeing:
    https://geoffboeing.com/2015/10/exporting-python-data-geojson/
    """
    geojson = {"type":"FeatureCollection", "features":[]}
    for _, row in df.iterrows():
        feature = {"type":"Feature",
                   "properties":{},
                   "geometry":{"type":"LineString",
                               "coordinates":[]}}
        feature["geometry"]["coordinates"] = [[x, y] for (x,y) in list(row["geometry"].coords)]
        for prop in properties:
            feature["properties"][prop] = row[prop]
        geojson["features"].append(feature)
    return geojson


def point_df_to_geojson(df: pd.DataFrame, properties: list):
    """
    Author: Geoff Boeing:
    https://geoffboeing.com/2015/10/exporting-python-data-geojson/
    """
    
    geojson = {"type": "FeatureCollection", "features": []}
    for _, row in df.iterrows():
        feature = {
            "type": "Feature",
            "properties": {},
            "geometry": {"type": "Point", "coordinates": []},
        }
        feature["geometry"]["coordinates"] = [row["geometry"].x, row["geometry"].y]
        for prop in properties:
            feature["properties"][prop] = row[prop]
        geojson["features"].append(feature)
    return geojson

def fill_na(df_na):
    """
    fill str NaN with ""
    fill numeric NaN with 0
    """
    df = df_na.copy()
    num_col = list(df.select_dtypes([np.number]).columns)
    print("numeric columns: ", num_col)
    object_col = list(df.select_dtypes(['object']).columns)
    print("str columns: ", object_col)
    
    for x in list(df.columns):
        if x in num_col:
            df[x].fillna(0, inplace = True)
        elif x in object_col:
            df[x].fillna("", inplace = True)
    
    return df

def ox_graph(nodes_df, links_df):
    """
        create an osmnx-flavored network graph
        osmnx doesn't like values that are arrays, so remove the variables
        that have arrays.  osmnx also requires that certain variables
        be filled in, so do that too.
        Parameters
        ----------
        nodes_df : GeoDataFrame
        link_df : GeoDataFrame
        Returns
        -------
        networkx multidigraph
    """
    try:
        graph_nodes = nodes_df.drop(
                ["inboundReferenceId", "outboundReferenceId"], axis=1
            )
    except:
        graph_nodes = nodes_df

    graph_nodes.gdf_name = "network_nodes"
    graph_nodes['id'] = graph_nodes['osm_node_id']

    graph_links = links_df.copy()
    graph_links['id'] = graph_links['osm_link_id']
    graph_links['key'] = str(graph_links['osm_link_id'])+"_"+str(graph_links['model_link_id'])

    G = ox.gdfs_to_graph(graph_nodes, graph_links)

    return G

In [None]:
pd.options.display.max_columns = None

In [None]:
osmnx_data_folder = "../data/interim/"
shst_node_js_extraction_folder = "../data/interim/"
data_interim_folder = "../data/interim/"
data_process_param_folder  = "../data/processed/"
roadway_output_folder = "../data/processed/step1_roadway/"
county_shape_folder = "../data/external/county/"

# read OSM data

In [2]:
# input osm data
print("-------reading osmnx data---------")

osmnx_link_gdf = gpd.read_file(osmnx_data_folder + "link.geojson")
osmnx_node_gdf = gpd.read_file(osmnx_data_folder + "node.geojson")

print("-------finished reading osmnx data---------")

# read SHST extraction output, and process it to standard network

In [3]:
def read_shst_extract(path, suffix):
    shst_gdf = pd.DataFrame()
    
    shst_file = glob.glob(path + suffix)
    print("----------start reading shst extraction data-------------")
    for i in shst_file:
        print("reading shst extraction data : ", i)
        new = gpd.read_file(i)
        new['source'] = i
        shst_gdf = pd.concat([shst_gdf, new],
                             ignore_index = True,
                             sort = False)
    print("----------finished reading shst extraction data-------------")
    return shst_gdf

In [4]:
%%time
shst_link_gdf = read_shst_extract(shst_node_js_extraction_folder, "*.out.geojson")

----------start reading shst extraction data-------------
reading shst extraction data :  ../shst_node_js_extraction\mtc_14.out.geojson
reading shst extraction data :  ../shst_node_js_extraction\mtc_1.out.geojson
reading shst extraction data :  ../shst_node_js_extraction\mtc_2.out.geojson
reading shst extraction data :  ../shst_node_js_extraction\mtc_3.out.geojson
reading shst extraction data :  ../shst_node_js_extraction\mtc_4.out.geojson
reading shst extraction data :  ../shst_node_js_extraction\mtc_5.out.geojson
reading shst extraction data :  ../shst_node_js_extraction\mtc_6.out.geojson
reading shst extraction data :  ../shst_node_js_extraction\mtc_7.out.geojson
reading shst extraction data :  ../shst_node_js_extraction\mtc_8.out.geojson
reading shst extraction data :  ../shst_node_js_extraction\mtc_9.out.geojson
reading shst extraction data :  ../shst_node_js_extraction\mtc_10.out.geojson
reading shst extraction data :  ../shst_node_js_extraction\mtc_11.out.geojson
reading shst ex

In [491]:
print(shst_link_gdf.columns)

Index(['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId',
       'backReferenceId', 'roadClass', 'metadata', 'geometry', 'source'],
      dtype='object')


In [490]:
shst_link_gdf.head(3)

Unnamed: 0,id,fromIntersectionId,toIntersectionId,forwardReferenceId,backReferenceId,roadClass,metadata,geometry,source
0,09c04e14c22e115e6084c5d1eb44a7ca,05139c99d0877460767855ad7c958852,615fb80065cf56da5de8fc6b68739245,6f1c74831ae656412c6eb750567d2959,4c95b29b77679fb5d1609138e8fee0a0,Other,"{'gisMetadata': [], 'geometryId': '09c04e14c22...","LINESTRING (-121.970487 37.7731422, -121.97087...",../shst_node_js_extraction\mtc_14.out.geojson
1,af9bad20a2d47a87cde46fe3eb6bc2c4,dd1b055f0d6c8b0f74a5d817f19ea398,f2806097ae2371272f3af600edd91020,da0506bc58587ba7617e996be6c2aa26,a740ae9482ba94b0012ac4809461d816,Service,"{'gisMetadata': [], 'geometryId': 'af9bad20a2d...","LINESTRING (-121.9702245 37.7732102, -121.9706...",../shst_node_js_extraction\mtc_14.out.geojson
2,3507c17d42e8a478c534323a14df0166,b3ab693ef31a101b5ff750e84066939d,615fb80065cf56da5de8fc6b68739245,bb48b8a820688dc0402a4b6d3670502b,5fdca9d681d309860bee407674c80dcb,Other,"{'gisMetadata': [], 'geometryId': '3507c17d42e...","LINESTRING (-121.9710766 37.7736652, -121.9708...",../shst_node_js_extraction\mtc_14.out.geojson


In [505]:
shst_link_gdf.metadata.iloc[0]

{'gisMetadata': [],
 'geometryId': '09c04e14c22e115e6084c5d1eb44a7ca',
 'osmMetadata': {'waySections': [{'nodeIds': ['2386056426', '2386056457'],
    'wayId': '230076799',
    'roadClass': 'Other',
    'oneWay': False,
    'roundabout': False,
    'link': False,
    'name': ''}],
  'name': ''}}

In [504]:
shst_link_gdf.metadata.iloc[9]

{'gisMetadata': [],
 'geometryId': 'b22136a7c3de651ac15b7ea95d1e2084',
 'osmMetadata': {'waySections': [{'nodeIds': ['2386068319',
     '2386068316',
     '57956405'],
    'wayId': '230078224',
    'roadClass': 'Service',
    'oneWay': False,
    'roundabout': False,
    'link': False,
    'name': ''},
   {'nodeIds': ['57956405', '2386068312'],
    'wayId': '7880463',
    'roadClass': 'Residential',
    'oneWay': False,
    'roundabout': False,
    'link': False,
    'name': 'Annabel Lane'}],
  'name': ''}}

In [9]:
# shst geometry file has duplicates, due to the buffer area along polygon boundries
# drop duplicates

print("--------removing duplicated shst extraction data---------")
print("before removing duplicates, shst extraction has geometry # : ", shst_link_gdf.shape[0])

shst_link_non_dup_gdf = shst_link_gdf.drop_duplicates(
    subset = ['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId'])

print("after removing duplicates, shst extraction has geometry # : ", shst_link_non_dup_gdf.shape[0])

--------removing duplicated shst extraction data---------
before removing duplicates, shst extraction has geometry # :  1237008
after removing duplicates, shst extraction has geometry # :  908281


In [20]:
%%time

# this step is needed for obtaining OSM data for SHST links

print("-------extracting single osm ways by every shst geometry----------")

shst_link_df_list = []

temp = shst_link_non_dup_gdf.apply(lambda x: extract_osm_link_from_shst_shape(x),
                            axis = 1)

osm_link_df = pd.concat(shst_link_df_list)

-------extracting single osm ways by every shst geometry----------
Wall time: 33min 15s


In [625]:
osm_link_gdf = osm_link_with_shst_info(osm_link_df,
                                      shst_link_non_dup_gdf)

In [631]:
osmnx_link_gdf[osmnx_link_gdf.osmid == 635006789]

Unnamed: 0,access,area,bridge,est_width,highway,junction,key,landuse,lanes,length,maxspeed,name,oneway,osmid,ref,service,tunnel,u,v,width,geometry
174257,,,,,tertiary,,0,,,4.637,30 mph,Hanover Street,True,635006789,,,,7055083815,4177955847,,"LINESTRING (-122.1457079 37.4109288, -122.1457..."
174258,,,,,tertiary,,0,,,60.788,30 mph,Hanover Street,True,635006789,,,,7055083816,6732375482,,"LINESTRING (-122.1452791 37.4114483, -122.1456..."
590706,,,,,tertiary,,0,,,5.853,30 mph,Hanover Street,True,635006789,,,,6208518527,7055083815,,"LINESTRING (-122.1456725 37.4109733, -122.1457..."
1105984,,,,,tertiary,,0,,,3.737,30 mph,Hanover Street,True,635006789,,,,5991260810,7055083816,,"LINESTRING (-122.1452373 37.4114535, -122.1452..."
3059663,,,,,tertiary,,0,,,2.434,30 mph,Hanover Street,True,635006789,,,,6732375482,6208518527,,"LINESTRING (-122.145657 37.4109914, -122.14567..."


In [628]:
osm_link_gdf[osm_link_gdf.geometryId == "959d4c59605650229d66d14423d971d0"]

Unnamed: 0,link,name,nodeIds,oneWay,roadClass,roundabout,wayId,geometryId,id,fromIntersectionId,toIntersectionId,forwardReferenceId,backReferenceId,geometry
484616,False,,"[1750865944, 5991260810]",False,Tertiary,False,417198045,959d4c59605650229d66d14423d971d0,959d4c59605650229d66d14423d971d0,1c03d41f12ec25a8d6667c30302c9eb5,f9f6b56abd6f30436d529bfcb8d68f4d,74f674b6d6ee32ef61d99d0cdb6afa56,043ac564db0dd88710d588714f68fcba,"LINESTRING (-122.1450835 37.4116109, -122.1451..."
484617,False,,"[5991260810, 4177955847]",False,Tertiary,False,635006789,959d4c59605650229d66d14423d971d0,959d4c59605650229d66d14423d971d0,1c03d41f12ec25a8d6667c30302c9eb5,f9f6b56abd6f30436d529bfcb8d68f4d,74f674b6d6ee32ef61d99d0cdb6afa56,043ac564db0dd88710d588714f68fcba,"LINESTRING (-122.1450835 37.4116109, -122.1451..."


In [633]:
osm_link_df[osm_link_df.wayId == "635006789"]

Unnamed: 0,link,name,nodeIds,oneWay,roadClass,roundabout,wayId,geometryId
1,False,,"[5991260810, 4177955847]",False,Tertiary,False,635006789,959d4c59605650229d66d14423d971d0


In [637]:
# note, the sharedstreets extraction using default tile osm/planet 181224

# 1. join SHST with OSM
# 2. add two way links

osm_link_gdf = osm_link_with_shst_info(osm_link_df,
                                      shst_link_non_dup_gdf)

osm_link_gdf = add_two_way_osm(osm_link_gdf,osmnx_link_gdf)

shst extraction has geometry:  908281
osm links from shst extraction:  974897
---joining osm shst with osmnx data---
which includes two way links: 843159
and they are geometrys:  798093
after join, osm links from shst extraction:  1818056  out of which there are  135953  links that do not have osm info, due to shst extraction (default tile 181224) contains  38386  osm ids that are not included in latest OSM extraction, e.g. private streets, closed streets.
after join, there are shst geometry # :  1706374


In [642]:
# fill NAs
# for shst links that do not have complete osm info 

osm_link_non_na_gdf = fill_na(osm_link_gdf)

numeric columns:  ['wayId', 'u', 'v', 'key', 'osmid', 'reverse_out']
str columns:  ['nodeIds', 'roadClass', 'shstGeometryId', 'id', 'fromIntersectionId', 'toIntersectionId', 'shstReferenceId', 'geometry', 'access', 'area', 'bridge', 'est_width', 'highway', 'junction', 'landuse', 'lanes', 'maxspeed', 'name', 'oneway', 'ref', 'service', 'tunnel', 'width']


In [791]:
%%time

# aggregate osm data back to shst geometry based links

link_gdf = consolidate_osm_way_to_shst_link(osm_link_non_na_gdf)

print("after joining back to shst geometry, network has ", len(link_gdf), " links, which are based on ", 
      link_gdf.shstGeometryId.nunique(), " geometries")

-----start aggregating osm segments to one shst link for forward links----------
-----start aggregating osm segments to one shst link for backward links----------
all
after joinging back to shst geometry, network has  1706374  links, which are based on  908281  geometries
Wall time: 14min 38s


In [969]:
# simplify highway
highway_to_roadway_df = pd.read_csv(data_process_param_folder + "highway_to_roadway.csv").fillna("")
highway_to_roadway_dict = pd.Series(highway_to_roadway_df.roadway.values, index = highway_to_roadway_df.highway).to_dict()
roadway_hierarchy_dict = pd.Series(highway_to_roadway_df.hierarchy.values, index = highway_to_roadway_df.roadway).to_dict()

def highway_attribute_list_to_value(x):
    if type(x.highway) == list:
        value_list = list(set([highway_to_roadway_dict[c] for c in x.highway]))
        if len(value_list) == 1:
            if value_list[0] != "":
                return value_list[0]
            else:
                if type(x.roadClass) == list:
                    return highway_to_roadway_dict[x.roadClass[0].lower()]
                else:
                    return highway_to_roadway_dict[x.roadClass.lower()]

        else:
            ret_val = value_list[0]
            ret_val_level = roadway_hierarchy_dict[ret_val]
            for c in value_list:
                val_level = roadway_hierarchy_dict[c]
                if val_level < ret_val_level:
                    ret_val = c
                    ret_val_level = val_level
                else:
                    continue
            return ret_val
    else:
        if x.highway == "":
            return highway_to_roadway_dict[x.roadClass.lower()]
        else:
            return highway_to_roadway_dict[x.highway]
    
link_gdf["roadway"] = link_gdf.apply(lambda x: highway_attribute_list_to_value(x),
                                    axis = 1)

In [1015]:
print(link_gdf.roadway.value_counts())
print(link_gdf[link_gdf.highway == ""].roadway.value_counts())

service           569334
residential       559518
footway           252362
cycleway          115008
tertiary           94058
secondary          69265
primary            29173
motorway_link       4980
trunk               4197
motorway            2847
primary_link        1567
secondary_link      1511
trunk_link          1344
tertiary_link        608
Name: roadway, dtype: int64
cycleway       51562
service        32738
residential    29462
tertiary        3130
secondary       2033
primary         1374
motorway         128
trunk            106
Name: roadway, dtype: int64


In [1016]:
link_gdf.roadway.value_counts()

service           569334
residential       559518
footway           252362
cycleway          115008
tertiary           94058
secondary          69265
primary            29173
motorway_link       4980
trunk               4197
motorway            2847
primary_link        1567
secondary_link      1511
trunk_link          1344
tertiary_link        608
Name: roadway, dtype: int64

In [1017]:
# there are links with different shstgeomid, but same shstrefid, to/from nodes

shst_refid_counts_df = pd.DataFrame(link_gdf.shstReferenceId.value_counts())
shst_refid_counts_df = shst_refid_counts_df[shst_refid_counts_df.shstReferenceId == 2]

links_with_diff_geomid_same_refid_df = link_gdf[link_gdf.shstReferenceId.isin(shst_refid_counts_df.index.values)]

links_with_diff_geomid_same_refid_df.sort_values(by = ["shstReferenceId"])

links_with_diff_geomid_same_refid_df.highway.value_counts()

links_with_diff_geomid_same_refid_df[links_with_diff_geomid_same_refid_df.highway == "tertiary"].sort_values(by = ["shstReferenceId"])

links_with_diff_geomid_same_refid_df[links_with_diff_geomid_same_refid_df.shstReferenceId == "18b6ce8eeb59e96413ede8e115ac2aa1"]

links_with_diff_geomid_same_refid_df.groupby("shstReferenceId")['roadway'].apply(list).value_counts()

# drop one of the links that have two shstGeomId

link_gdf.drop_duplicates(subset = ["shstReferenceId"],
                        inplace = True)

link_gdf.shape

(1705772, 35)

In [1018]:
print("In the end, network has ", len(link_gdf), " links, which are based on ", 
      link_gdf.shstGeometryId.nunique(), " geometries")

In the end, network has  1705772  links, which are based on  908267  geometries


In [977]:
# add network type variables

network_type_df = pd.read_csv(data_process_param_folder + "network_type_indicator.csv")

link_gdf = pd.merge(link_gdf,
                    network_type_df,
                    how = "left",
                    on = "roadway")

In [979]:
%%time

# create node gdf

node_gdf = create_node_gdf(link_gdf)

print("In the end, network has ", len(node_gdf), " nodes")

-------start creating shst nodes--------
In the end, network has  661159  nodes
Wall time: 1min 42s


In [980]:
# add network type variable for node

A_B_df = pd.concat([link_gdf[["u", "drive_access", "walk_access", "bike_access"]].rename(columns = {"u":"osm_node_id"}),
                  link_gdf[["v", "drive_access", "walk_access", "bike_access"]].rename(columns = {"v":"osm_node_id"})],
                  sort = False,
                  ignore_index = True)

A_B_df.drop_duplicates(inplace = True)

A_B_df = A_B_df.groupby("osm_node_id").max().reset_index()

node_gdf = pd.merge(node_gdf,
                      A_B_df,
                      how = "left",
                      on = "osm_node_id")

In [1019]:
osm_link_gdf[osm_link_gdf.shstGeometryId == "959d4c59605650229d66d14423d971d0"]

Unnamed: 0,link,nodeIds,oneWay,roadClass,roundabout,wayId,shstGeometryId,id,fromIntersectionId,toIntersectionId,shstReferenceId,geometry,u,v,access,area,bridge,est_width,highway,junction,key,landuse,lanes,maxspeed,name,oneway,osmid,ref,service,tunnel,width,reverse_out
484616,False,"[1750865944, 5991260810]",False,Tertiary,False,417198045,959d4c59605650229d66d14423d971d0,959d4c59605650229d66d14423d971d0,1c03d41f12ec25a8d6667c30302c9eb5,f9f6b56abd6f30436d529bfcb8d68f4d,74f674b6d6ee32ef61d99d0cdb6afa56,"LINESTRING (-122.1450835 37.4116109, -122.1451...",1750865944,5991260810,,,,,,,,,,,,,,,,,,
484617,False,"[5991260810, 4177955847]",False,Tertiary,False,635006789,959d4c59605650229d66d14423d971d0,959d4c59605650229d66d14423d971d0,1c03d41f12ec25a8d6667c30302c9eb5,f9f6b56abd6f30436d529bfcb8d68f4d,74f674b6d6ee32ef61d99d0cdb6afa56,"LINESTRING (-122.1450835 37.4116109, -122.1451...",5991260810,4177955847,,,,,tertiary,,0.0,,,30 mph,Hanover Street,True,635006789.0,,,,,
1397295,False,"[1750865944, 5991260810]",False,Tertiary,False,417198045,959d4c59605650229d66d14423d971d0,959d4c59605650229d66d14423d971d0,f9f6b56abd6f30436d529bfcb8d68f4d,1c03d41f12ec25a8d6667c30302c9eb5,043ac564db0dd88710d588714f68fcba,"LINESTRING (-122.1450835 37.4116109, -122.1451...",5991260810,1750865944,,,,,,,,,,,,,,,,,,1.0
1397296,False,"[5991260810, 4177955847]",False,Tertiary,False,635006789,959d4c59605650229d66d14423d971d0,959d4c59605650229d66d14423d971d0,f9f6b56abd6f30436d529bfcb8d68f4d,1c03d41f12ec25a8d6667c30302c9eb5,043ac564db0dd88710d588714f68fcba,"LINESTRING (-122.1450835 37.4116109, -122.1451...",4177955847,5991260810,,,,,tertiary,,0.0,,,30 mph,Hanover Street,True,635006789.0,,,,,1.0


In [1021]:
link_gdf[link_gdf.shstGeometryId == "959d4c59605650229d66d14423d971d0"]

Unnamed: 0,shstReferenceId,id,shstGeometryId,fromIntersectionId,toIntersectionId,geometry,u,v,link,nodeIds,oneWay,roadClass,roundabout,wayId,access,area,bridge,est_width,highway,junction,key,landuse,lanes,maxspeed,name,oneway,ref,service,tunnel,width,forward,roadway,drive_access,walk_access,bike_access
415003,74f674b6d6ee32ef61d99d0cdb6afa56,959d4c59605650229d66d14423d971d0,959d4c59605650229d66d14423d971d0,1c03d41f12ec25a8d6667c30302c9eb5,f9f6b56abd6f30436d529bfcb8d68f4d,"LINESTRING (-122.1450835 37.4116109, -122.1451...",1750865944,4177955847,"[False, False]","[[1750865944, 5991260810], [5991260810, 417795...","[False, False]","[Tertiary, Tertiary]","[False, False]","[417198045, 635006789]","[, nan]","[, nan]","[, nan]","[, nan]","[, tertiary]","[, nan]","[0.0, 0.0]","[, nan]","[, nan]","[, 30 mph]","[, Hanover Street]","[, True]","[, nan]","[, nan]","[, nan]","[, nan]",1.0,tertiary,1,1,1
921376,043ac564db0dd88710d588714f68fcba,959d4c59605650229d66d14423d971d0,959d4c59605650229d66d14423d971d0,f9f6b56abd6f30436d529bfcb8d68f4d,1c03d41f12ec25a8d6667c30302c9eb5,"LINESTRING (-122.1450835 37.4116109, -122.1451...",4177955847,1750865944,"[False, False]","[[1750865944, 5991260810], [5991260810, 417795...","[False, False]","[Tertiary, Tertiary]","[False, False]","[417198045, 635006789]","[, nan]","[, nan]","[, nan]","[, nan]","[, tertiary]","[, nan]","[0.0, 0.0]","[, nan]","[, nan]","[, 30 mph]","[, Hanover Street]","[, True]","[, nan]","[, nan]","[, nan]","[, nan]",,tertiary,1,1,1


In [1022]:
node_gdf.osm_node_id.nunique()

661159

In [1023]:
len(set(link_gdf.u.tolist() + link_gdf.v.tolist()))

661159

In [1024]:
link_gdf[~link_gdf.v.isin(node_gdf.osm_node_id.tolist())]

Unnamed: 0,shstReferenceId,id,shstGeometryId,fromIntersectionId,toIntersectionId,geometry,u,v,link,nodeIds,oneWay,roadClass,roundabout,wayId,access,area,bridge,est_width,highway,junction,key,landuse,lanes,maxspeed,name,oneway,ref,service,tunnel,width,forward,roadway,drive_access,walk_access,bike_access


In [1025]:
print(shst_link_non_dup_gdf.columns)
print(shst_link_non_dup_gdf.shape)
print(shst_link_non_dup_gdf.id.nunique())

Index(['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId',
       'backReferenceId', 'roadClass', 'metadata', 'geometry', 'source'],
      dtype='object')
(908281, 9)
908281


In [987]:
shape_gdf = shst_link_non_dup_gdf[shst_link_non_dup_gdf.id.isin(link_gdf.shstGeometryId.tolist())].copy()
print(" In the end, there are " + str(len(shape_gdf)) + " geometries.")

 In the end, there are 908267 geometries.


In [988]:
%%time

print("-------write out link shape geojson---------")

shape_prop = ['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId']
shape_geojson = link_df_to_geojson(shape_gdf, shape_prop)

with open(roadway_output_folder + "shape.geojson", "w") as f:
    json.dump(shape_geojson, f)

-------write out link shape geojson---------
Wall time: 6min 11s


In [989]:
%%time

# write out link variable json
# link unique handle "shstReferenceId" + "shstGeometryId"

print("-------write out link json---------")

link_prop = link_gdf.drop(["geometry", "forward", "roadClass", "oneway"], axis = 1).columns.tolist()

out = link_gdf[link_prop].to_json(orient = "records")

with open(roadway_output_folder + "link.json', 'w') as f:
    f.write(out)

-------write out link json---------
Wall time: 40.8 s


In [990]:
%%time

print("-------write out node geojson---------")

node_prop = node_gdf.drop("geometry", axis = 1).columns.tolist()
node_geojson = point_df_to_geojson(node_gdf, node_prop)

with open(roadway_output_folder + "node.geojson", "w") as f:
    json.dump(node_geojson, f)

-------write out node geojson---------
Wall time: 2min 52s


# SF county TomTom conflation

In [991]:
# write out SF links only
sf_boundry = gpd.read_file(county_shape_folder + "boundary_4.geojson")
sf_boundry.crs

sf_shape_gdf = shape_gdf[shape_gdf.intersects(sf_boundry.geometry.unary_union)].copy()

sf_link_gdf = link_gdf[link_gdf.intersects(sf_boundry.geometry.unary_union)].copy()

sf_node_gdf = node_gdf[node_gdf.osm_node_id.isin(sf_link_gdf.u.tolist() + sf_link_gdf.v.tolist())].copy()


print("-------write out link shape geojson---------")

shape_prop = ['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId']
shape_geojson = link_df_to_geojson(sf_shape_gdf, shape_prop)

with open(roadway_output_folder + "sf_shape.geojson", "w") as f:
    json.dump(shape_geojson, f)

    
print("-------write out link json---------")

link_prop = sf_link_gdf.drop(["geometry", "forward"], axis = 1).columns.tolist()

out = sf_link_gdf[link_prop].to_json(orient = "records")

with open(roadway_output_folder + "sf_link.json', 'w') as f:
    f.write(out)

    
print("-------write out node geojson---------")

node_prop = sf_node_gdf.drop("geometry", axis = 1).columns.tolist()
node_geojson = point_df_to_geojson(sf_node_gdf, node_prop)

with open(roadway_output_folder + "sf_node.geojson", "w") as f:
    json.dump(node_geojson, f)    

-------write out link shape geojson---------
-------write out link json---------
-------write out node geojson---------


In [689]:
# read in tomtom conflation

tomtom_raw_gdf = gpd.read_file(data_interim_folder + "tomtom_sf.geojson")
tomtom_match_gdf = gpd.read_file(data_interim_folder + "tomtom_sf.out.matched.geojson")

tomtom_match_gdf.drop_duplicates(subset = ['shstReferenceId', 'shstGeometryId', 
                                           'shstFromIntersectionId', 'shstToIntersectionId'],
                                inplace = True)

tomtom_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                   "shstToIntersectionId" : "toIntersectionId"},
                       inplace = True)

In [690]:
print(tomtom_raw_gdf.shape)
print(tomtom_match_gdf.shape)

print(tomtom_raw_gdf.columns)
print(tomtom_match_gdf.columns)

(31805, 54)
(38709, 19)
Index(['ID', 'FEATTYP', 'FT', 'F_JNCTID', 'F_JNCTTYP', 'T_JNCTID', 'T_JNCTTYP',
       'PJ', 'METERS', 'FRC', 'NETCLASS', 'NETBCLASS', 'NET2CLASS', 'NAME',
       'NAMELC', 'SOL', 'NAMETYP', 'CHARGE', 'SHIELDNUM', 'RTETYP', 'RTEDIR',
       'RTEDIRVD', 'PROCSTAT', 'FOW', 'SLIPRD', 'FREEWAY', 'BACKRD', 'TOLLRD',
       'RDCOND', 'STUBBLE', 'PRIVATERD', 'CONSTATUS', 'ONEWAY', 'F_BP', 'T_BP',
       'F_ELEV', 'T_ELEV', 'KPH', 'MINUTES', 'POSACCUR', 'CARRIAGE', 'LANES',
       'RAMP', 'ADA', 'TRANS', 'DYNSPEED', 'SPEEDCAT', 'NTHRUTRAF', 'ROUGHRD',
       'PARTSTRUC', 'ORDER08', 'Shape_Length', 'tomtom_link_id', 'geometry'],
      dtype='object')
Index(['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'gisReferenceId', 'gisGeometryId',
       'gisTotalSegments', 'gisSegmentIndex', 'gisFromIntersectionId',
       'gisToIntersectionId', 'startSideOfStreet', 'endSideOfStreet',
       'sideOfStreet', 'score', 'matchType', 'pp_id', 'pp

In [691]:
tomtom_gdf = pd.merge(tomtom_match_gdf, 
                      tomtom_raw_gdf[['ID', 'F_JNCTID', 'T_JNCTID', 'LANES']],
                     how = "left",
                     left_on = ['pp_id', 'pp_f_jnctid', 'pp_t_jnctid'],
                     right_on = ['ID', 'F_JNCTID', 'T_JNCTID'])

print(tomtom_gdf.shape)

(38709, 23)


In [992]:
print(sf_link_gdf.shape)
print(sf_node_gdf.shape)
print(sf_link_gdf.columns)
print(sf_node_gdf.columns)

(74352, 35)
(27700, 6)
Index(['shstReferenceId', 'id', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'geometry', 'u', 'v', 'link', 'nodeIds', 'oneWay',
       'roadClass', 'roundabout', 'wayId', 'access', 'area', 'bridge',
       'est_width', 'highway', 'junction', 'key', 'landuse', 'lanes',
       'maxspeed', 'name', 'oneway', 'ref', 'service', 'tunnel', 'width',
       'forward', 'roadway', 'drive_access', 'walk_access', 'bike_access'],
      dtype='object')
Index(['osm_node_id', 'shst_node_id', 'geometry', 'drive_access',
       'walk_access', 'bike_access'],
      dtype='object')


In [993]:
# join tomtom with network

sf_link_with_tomtom_gdf = pd.merge(sf_link_gdf,
                                  tomtom_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId','toIntersectionId',
                                              'pp_id', 'pp_f_jnctid', 'pp_t_jnctid', "LANES"]],
                                  how = "left",
                                  on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId'],
                                  )

In [994]:
# rename tomtom id

sf_link_with_tomtom_gdf.rename(columns = {"pp_id" : "tomtom_id",
                                         "pp_f_jnctid" : "tomtom_f_jnctid",
                                         "pp_t_jnctid" : "tomtom_t_jnctid"},
                              inplace = True)

#for quick visual check use

#write out link geojson Dave requested
link_prop = [c for c in sf_link_with_tomtom_gdf.columns if c != "geometry"]
link_geojson = link_df_to_geojson(sf_link_with_tomtom_gdf, link_prop)

with open("../tests/networkstandard/sf_link_with_tomtom_lanes_for_Dave.geojson", "w") as f:
    json.dump(link_geojson, f)

In [1026]:
print(sf_link_with_tomtom_gdf.columns)
print(sf_link_with_tomtom_gdf.shape)

Index(['shstReferenceId', 'id', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'geometry', 'u', 'v', 'link', 'nodeIds', 'oneWay',
       'roadClass', 'roundabout', 'wayId', 'access', 'area', 'bridge',
       'est_width', 'highway', 'junction', 'key', 'landuse', 'lanes',
       'maxspeed', 'name', 'oneway', 'ref', 'service', 'tunnel', 'width',
       'forward', 'roadway', 'drive_access', 'walk_access', 'bike_access',
       'tomtom_id', 'tomtom_f_jnctid', 'tomtom_t_jnctid', 'LANES', 'length'],
      dtype='object')
(74352, 40)


In [1027]:
sf_link_with_tomtom_gdf.lanes.value_counts()

nan                          53590
2                             5405
                              3913
[nan, nan]                    3874
3                             1795
4                             1687
[nan, nan, nan]               1006
1                              581
[2, 2]                         217
[2, 3]                         198
5                              193
[, ]                           185
[3, 3]                         182
[4, 4]                         126
[nan, nan, nan, nan]           117
[nan, ]                        107
[3, 4]                          95
[, nan]                         85
[3, 2]                          70
[nan, nan, nan, nan, nan]       61
[, , ]                          45
6                               44
[4, 5]                          40
[2, 2, 2]                       37
[5, 5]                          29
[3, 3, 3]                       28
[nan, 2]                        25
[1, 1]                          24
[5, 4]              

In [1028]:
sf_link_with_tomtom_gdf.LANES.value_counts()

0.0    26515
2.0     6118
4.0     2265
3.0     2185
1.0     1040
5.0      410
6.0      107
7.0       13
Name: LANES, dtype: int64

In [1029]:
sf_link_with_tomtom_gdf.roadway.value_counts().sum()

74352

In [1030]:
sf_link_with_tomtom_gdf.roadway.value_counts()

residential       24799
footway           19613
service           12859
tertiary           5869
cycleway           4334
secondary          3977
primary            2256
motorway_link       148
primary_link        131
motorway            114
trunk                99
secondary_link       89
trunk_link           40
tertiary_link        24
Name: roadway, dtype: int64

In [1031]:
sf_link_with_tomtom_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 74352 entries, 0 to 74351
Data columns (total 40 columns):
shstReferenceId       74352 non-null object
id                    74352 non-null object
shstGeometryId        74352 non-null object
fromIntersectionId    74352 non-null object
toIntersectionId      74352 non-null object
geometry              74352 non-null object
u                     74352 non-null int64
v                     74352 non-null int64
link                  74352 non-null object
nodeIds               74352 non-null object
oneWay                74352 non-null object
roadClass             74352 non-null object
roundabout            74352 non-null object
wayId                 74352 non-null object
access                74352 non-null object
area                  74352 non-null object
bridge                74352 non-null object
est_width             74352 non-null object
highway               74352 non-null object
junction              74352 non-null object
key  

In [1032]:
sf_link_with_tomtom_gdf.u.nunique()

27674

In [1033]:
sf_link_with_tomtom_gdf.fromIntersectionId.nunique()

27674

In [1034]:
sf_node_gdf.shape

(27700, 6)

In [1035]:
len(set(sf_link_with_tomtom_gdf.u.tolist() + sf_link_with_tomtom_gdf.v.tolist()))

27700

In [1006]:
geom_length = sf_link_with_tomtom_gdf[['geometry']]
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

sf_link_with_tomtom_gdf["length"] = geom_length["length"]

In [1065]:
sf_node_gdf

Unnamed: 0,osm_node_id,shst_node_id,geometry,drive_access,walk_access,bike_access
9,65308539,55d370526a55a5d348e23751aad86ac0,POINT (-122.4888978 37.77797810000001),1,1,1
13,65344363,5c7c469988248e3572b82b39c894dfd6,POINT (-122.4035822 37.7344055),1,1,1
43,4034014436,228e65c325522cb3d826c5afd828abcf,POINT (-122.4644317 37.7660747),1,1,1
48,65288594,7eaf8516bbeb338c7fc1a89ec32accd2,POINT (-122.4231171 37.73735660000001),1,1,1
49,5760414283,130a1e4c1d3a355511c7b01e24100273,POINT (-122.4495758 37.7983547),1,1,1
95,5443128405,798cae7a1d2ac00f3460b6f1452ee01d,POINT (-122.3999294 37.7257127),1,1,1
128,3065529300,2ceb892d1ee28b7a727941abe880a46f,POINT (-122.4718894 37.7560009),0,1,1
159,315414051,efcddacf1e270c9f18e6142c588d66d9,POINT (-122.4910875 37.72551960000001),0,1,1
174,295218284,da5017cf77a0ef3513b0a1dbfe6d0534,POINT (-122.4225309 37.7479421),1,1,1
177,65305990,d86f4ace331185501ff11866cb3349df,POINT (-122.4089677 37.74563610000001),1,1,1


In [1067]:
# add model node id
# http://bayareametro.github.io/travel-model-two/input/#roadway-network

network_node_start = 1000000

sf_node_gdf["model_node_id"] = range(network_node_start, network_node_start + len(sf_node_gdf))

sf_node_gdf

Unnamed: 0,osm_node_id,shst_node_id,geometry,drive_access,walk_access,bike_access,model_node_id
9,65308539,55d370526a55a5d348e23751aad86ac0,POINT (-122.4888978 37.77797810000001),1,1,1,1000000
13,65344363,5c7c469988248e3572b82b39c894dfd6,POINT (-122.4035822 37.7344055),1,1,1,1000001
43,4034014436,228e65c325522cb3d826c5afd828abcf,POINT (-122.4644317 37.7660747),1,1,1,1000002
48,65288594,7eaf8516bbeb338c7fc1a89ec32accd2,POINT (-122.4231171 37.73735660000001),1,1,1,1000003
49,5760414283,130a1e4c1d3a355511c7b01e24100273,POINT (-122.4495758 37.7983547),1,1,1,1000004
95,5443128405,798cae7a1d2ac00f3460b6f1452ee01d,POINT (-122.3999294 37.7257127),1,1,1,1000005
128,3065529300,2ceb892d1ee28b7a727941abe880a46f,POINT (-122.4718894 37.7560009),0,1,1,1000006
159,315414051,efcddacf1e270c9f18e6142c588d66d9,POINT (-122.4910875 37.72551960000001),0,1,1,1000007
174,295218284,da5017cf77a0ef3513b0a1dbfe6d0534,POINT (-122.4225309 37.7479421),1,1,1,1000008
177,65305990,d86f4ace331185501ff11866cb3349df,POINT (-122.4089677 37.74563610000001),1,1,1,1000009


In [1071]:
# add A,B node id for each link
node_osm_model_dict = dict(zip(sf_node_gdf.osm_node_id, sf_node_gdf.model_node_id))

sf_link_with_tomtom_gdf["A"] = sf_link_with_tomtom_gdf.u.map(node_osm_model_dict)
sf_link_with_tomtom_gdf["B"] = sf_link_with_tomtom_gdf.v.map(node_osm_model_dict)

sf_link_with_tomtom_gdf.info()
sf_link_with_tomtom_gdf.head(3)

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 74352 entries, 0 to 74351
Data columns (total 42 columns):
shstReferenceId       74352 non-null object
id                    74352 non-null object
shstGeometryId        74352 non-null object
fromIntersectionId    74352 non-null object
toIntersectionId      74352 non-null object
geometry              74352 non-null object
u                     74352 non-null int64
v                     74352 non-null int64
link                  74352 non-null object
nodeIds               74352 non-null object
oneWay                74352 non-null object
roadClass             74352 non-null object
roundabout            74352 non-null object
wayId                 74352 non-null object
access                74352 non-null object
area                  74352 non-null object
bridge                74352 non-null object
est_width             74352 non-null object
highway               74352 non-null object
junction              74352 non-null object
key  

Unnamed: 0,shstReferenceId,id,shstGeometryId,fromIntersectionId,toIntersectionId,geometry,u,v,link,nodeIds,oneWay,roadClass,roundabout,wayId,access,area,bridge,est_width,highway,junction,key,landuse,lanes,maxspeed,name,oneway,ref,service,tunnel,width,forward,roadway,drive_access,walk_access,bike_access,tomtom_id,tomtom_f_jnctid,tomtom_t_jnctid,LANES,length,A,B
0,00008f3db470b7993ed7efe3fdbbe371,35d40b224da89d0f2a3899f0cd712df9,35d40b224da89d0f2a3899f0cd712df9,55d370526a55a5d348e23751aad86ac0,8248373f4092b20c5c977f63bb639b65,"LINESTRING (-122.4888978 37.77797810000001, -1...",65308539,65318508,False,"[65308539, 65318508]",False,Residential,False,158804475,,,,,residential,,0,,,,29th Avenue,False,,,,,1.0,residential,1,1,1,68400000000000.0,68400010000000.0,68400010000000.0,0.0,225.210046,1000000,1017571
1,000135f06e99cfa717aac0daeb35ca1e,503956ea14be95fe15b752d80ab48f20,503956ea14be95fe15b752d80ab48f20,5c7c469988248e3572b82b39c894dfd6,029c3ce3e518d096363ab6cd17d4db6d,"LINESTRING (-122.4035822 37.7344055, -122.4035...",65344363,65306218,False,"[65344363, 65306218]",False,Residential,False,27614736,,,,,residential,,0,,,,Elmira Street,False,,,,,1.0,residential,1,1,1,68400000000000.0,68400010000000.0,68400010000000.0,0.0,89.45152,1000001,1017463
2,00040e43e020b2d01c78443b2dd421ac,16ac50c41bb6e6ba642cdbc0ca8ff409,16ac50c41bb6e6ba642cdbc0ca8ff409,228e65c325522cb3d826c5afd828abcf,b2ad6b480390af3d0eed4ad140bf66d9,"LINESTRING (-122.4644317 37.7660747, -122.4651...",4034014436,65304276,False,"[4034014436, 4034027170, 65304276]",True,Primary,False,286646571,,,,,primary,,0,,3.0,35 mph,Lincoln Way,True,,,,,1.0,primary,1,1,1,68400000000000.0,68400010000000.0,68400010000000.0,3.0,92.193626,1000002,1012891


In [1072]:
print("-------write out link shape geojson---------")

shape_prop = ['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId']
shape_geojson = link_df_to_geojson(sf_shape_gdf, shape_prop)

with open(roadway_output_folder + "sf_shape.geojson", "w") as f:
    json.dump(shape_geojson, f)

    
print("-------write out link json---------")

link_prop = sf_link_with_tomtom_gdf.drop(["geometry"], axis = 1).columns.tolist()

out = sf_link_with_tomtom_gdf[link_prop].to_json(orient = "records")

with open(roadway_output_folder + "sf_link.json', 'w') as f:
    f.write(out)

    
print("-------write out node geojson---------")

node_prop = sf_node_gdf.drop("geometry", axis = 1).columns.tolist()
node_geojson = point_df_to_geojson(sf_node_gdf, node_prop)

with open(roadway_output_folder + "sf_node.geojson", "w") as f:
    json.dump(node_geojson, f)    

-------write out link shape geojson---------
-------write out link json---------
-------write out node geojson---------


In [1068]:
sf_link_with_tomtom_gdf.columns

Index(['shstReferenceId', 'id', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'geometry', 'u', 'v', 'link', 'nodeIds', 'oneWay',
       'roadClass', 'roundabout', 'wayId', 'access', 'area', 'bridge',
       'est_width', 'highway', 'junction', 'key', 'landuse', 'lanes',
       'maxspeed', 'name', 'oneway', 'ref', 'service', 'tunnel', 'width',
       'forward', 'roadway', 'drive_access', 'walk_access', 'bike_access',
       'tomtom_id', 'tomtom_f_jnctid', 'tomtom_t_jnctid', 'LANES', 'length'],
      dtype='object')

In [1069]:
sf_link_with_tomtom_gdf[sf_link_with_tomtom_gdf.shstReferenceId == "87739e2f3b65a712a902fbf4d340bb67"]

Unnamed: 0,shstReferenceId,id,shstGeometryId,fromIntersectionId,toIntersectionId,geometry,u,v,link,nodeIds,oneWay,roadClass,roundabout,wayId,access,area,bridge,est_width,highway,junction,key,landuse,lanes,maxspeed,name,oneway,ref,service,tunnel,width,forward,roadway,drive_access,walk_access,bike_access,tomtom_id,tomtom_f_jnctid,tomtom_t_jnctid,LANES,length
22029,87739e2f3b65a712a902fbf4d340bb67,ef3be5a6820fb584f2fa14da7fc2a873,ef3be5a6820fb584f2fa14da7fc2a873,1171ea0f2ae971c9bb3734cec8bd66b0,7d939ecd97580cdcc4780eed75ed32a5,"LINESTRING (-122.4226089 37.79129440000001, -1...",65307417,65307419,False,"[65307417, 4759501868, 4759501861, 65307419]",True,Secondary,False,477214642,,,,,,,0,,,,,,,,,,1.0,secondary,1,1,1,68400000000000.0,68400010000000.0,68400010000000.0,0.0,151.91


# test network routing

In [883]:
def ox_graph(nodes_df, links_df):
    """
        create an osmnx-flavored network graph
        osmnx doesn't like values that are arrays, so remove the variables
        that have arrays.  osmnx also requires that certain variables
        be filled in, so do that too.
        Parameters
        ----------
        nodes_df : GeoDataFrame
        link_df : GeoDataFrame
        Returns
        -------
        networkx multidigraph
    """
    try:
        graph_nodes = nodes_df.drop(
                ["inboundReferenceId", "outboundReferenceId"], axis=1
            )
    except:
        graph_nodes = nodes_df.copy()

    graph_nodes.gdf_name = "network_nodes"
    graph_nodes['id'] = graph_nodes['shst_node_id']

    graph_links = links_df.copy()
    graph_links['id'] = graph_links['shstReferenceId']
    graph_links['key'] = graph_links['shstReferenceId']

    G = ox.gdfs_to_graph(graph_nodes, graph_links)

    return G

In [1012]:
G_drive = ox_graph(node_gdf[node_gdf.drive_access == 1],
                  link_gdf[link_gdf.drive_access == 1])

In [908]:
# not necessary to save
# ox.save_load.save_graphml(G_drive, "Z:/Data/Users/Sijia/MTC/tests/networkstandard/drive_bayarea.graphml")

In [900]:
G_drive_sf = ox_graph(sf_node_gdf[sf_node_gdf.drive_access == 1],
                  sf_link_with_tomtom_gdf[sf_link_with_tomtom_gdf.drive_access == 1])

In [907]:
# not necessary to save
# ox.save_load.save_graphml(G_drive_sf, "Z:/Data/Users/Sijia/MTC/tests/networkstandard/drive_sf.graphml")

In [1039]:
# return shortest path between two nodes
nx.shortest_path(G_drive_sf, 293741891, 65284950, weight = "length")

[293741891,
 65290257,
 911547143,
 3593679267,
 423778249,
 65290252,
 5435466368,
 65290251,
 65290249,
 65290238,
 65290236,
 5435466213,
 5435466205,
 65290232,
 5435466333,
 5435466219,
 65290229,
 65290227,
 5435466158,
 65290225,
 5435466163,
 65281097,
 4911322443,
 5437055071,
 65284950]

# QA/QC and other tests

In [712]:
# number of shst geometries

print(len(shst_link_non_dup_gdf))
print(shst_link_non_dup_gdf.id.nunique())

908281
908281


In [1040]:
# one-way segments does not have backeReferenceId

print(osm_link_df[osm_link_df.oneWay == True].geometryId.nunique())
print(shst_link_non_dup_gdf[shst_link_non_dup_gdf.backReferenceId == ""].shape[0])

106311
106311


In [1041]:
import sys
print(sys.maxsize)
osm_link_gdf["u"] = osm_link_gdf.nodeIds.apply(lambda x: int(x[0]))
print(osm_link_gdf.u.max())

9223372036854775807
6154580436


In [1042]:
osmnx_link_gdf.groupby(["osmid", "lanes"]).count().reset_index().lanes.value_counts()

nan    409341
2       26117
3       13202
1        7695
4        7589
5        3447
6         723
7         108
8          18
12          2
18          2
9           2
10          1
11          1
Name: lanes, dtype: int64

In [1043]:
osm_link_gdf.shape

(1818056, 32)

In [1044]:
osm_link_gdf.lanes.value_counts()

nan    1517131
2        99068
3        24963
4        18963
1        11382
5         8844
6         1491
7          221
8           31
9            3
12           2
18           2
11           1
10           1
Name: lanes, dtype: int64

In [1045]:
# example for two-way segments that have only one directionality, thus no need to get the reverse direction for complete osm
osm_link_gdf[osm_link_gdf.id == "e24f210c1efe20bef41839ff1bc11f16"]

Unnamed: 0,link,nodeIds,oneWay,roadClass,roundabout,wayId,shstGeometryId,id,fromIntersectionId,toIntersectionId,shstReferenceId,geometry,u,v,access,area,bridge,est_width,highway,junction,key,landuse,lanes,maxspeed,name,oneway,osmid,ref,service,tunnel,width,reverse_out
21549,False,"[5250019116, 2829411762, 5250019116]",False,Other,False,7881681,e24f210c1efe20bef41839ff1bc11f16,e24f210c1efe20bef41839ff1bc11f16,acad3a72268359c4fe4267a4bbed1c52,acad3a72268359c4fe4267a4bbed1c52,295cfebb89f341f660ba9baf8a2a7741,"LINESTRING (-122.0940081 37.8353339, -122.0939...",5250019116,5250019116,,,yes,,path,,0.0,,,,Old Moraga Ranch Trail,False,7881681.0,,,,,


In [1046]:
# the bollard at Berkeley
osm_link_gdf[osm_link_gdf.wayId == 24024252]

Unnamed: 0,link,nodeIds,oneWay,roadClass,roundabout,wayId,shstGeometryId,id,fromIntersectionId,toIntersectionId,shstReferenceId,geometry,u,v,access,area,bridge,est_width,highway,junction,key,landuse,lanes,maxspeed,name,oneway,osmid,ref,service,tunnel,width,reverse_out
93831,False,"[260541175, 4265656988, 260541174]",False,Other,False,24024252,ef53746f39d064e3ffe56f29523463a1,ef53746f39d064e3ffe56f29523463a1,5b772f58fa36634c522d08f32a1464ad,4cb4ad8f54ce81f14c5fe6aa58cdf60d,0958c2984c63a6a11220ca7a3c5ebeb4,"LINESTRING (-122.265018 37.8633342, -122.26505...",260541175,260541174,,,,,cycleway,,0.0,,,,,False,24024252.0,,,,,
1054566,False,"[260541175, 4265656988, 260541174]",False,Other,False,24024252,ef53746f39d064e3ffe56f29523463a1,ef53746f39d064e3ffe56f29523463a1,4cb4ad8f54ce81f14c5fe6aa58cdf60d,5b772f58fa36634c522d08f32a1464ad,bde90c88ef9f469f58c5cfda09f64493,"LINESTRING (-122.265018 37.8633342, -122.26505...",260541175,260541175,,,,,cycleway,,0.0,,,,,False,24024252.0,,,,,1.0


In [1047]:
osmnx_link_gdf[osmnx_link_gdf.osmid == 24024252]

Unnamed: 0,access,area,bridge,est_width,highway,junction,key,landuse,lanes,length,maxspeed,name,oneway,osmid,ref,service,tunnel,u,v,width,geometry
312469,,,,,cycleway,,0,,,4.37,,,False,24024252,,,,260541174,4265656988,,"LINESTRING (-122.2650934 37.8633849, -122.2650..."
312474,,,,,cycleway,,0,,,4.325,,,False,24024252,,,,260541175,4265656988,,"LINESTRING (-122.265018 37.8633342, -122.26505..."
2799942,,,,,cycleway,,0,,,4.37,,,False,24024252,,,,4265656988,260541174,,"LINESTRING (-122.2650552 37.8633597, -122.2650..."
2799943,,,,,cycleway,,0,,,4.325,,,False,24024252,,,,4265656988,260541175,,"LINESTRING (-122.2650552 37.8633597, -122.2650..."


In [1048]:
pd.crosstab(osm_link_gdf.roadClass, osm_link_gdf.highway)

highway,bridleway,closed:path,corridor,cycleway,footpath,footway,junction,living_street,motorway,motorway_link,path,pedestrian,primary,primary_link,residential,road,secondary,secondary_link,service,steps,tertiary,tertiary_link,track,trunk,trunk_link,unclassified,unclassified_link
roadClass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
Motorway,0,0,0,0,0,0,0,0,5378,7703,0,0,2,0,0,0,1,0,42,0,1,0,0,84,50,5,0
Other,230,2,410,13059,78,265032,0,0,0,3,26943,2045,0,5,164,250,3,1,212170,4879,10,0,25515,0,0,24,0
Primary,0,0,0,1,0,0,0,0,5,7,0,0,32110,1479,0,0,2379,31,54,0,237,0,0,418,80,2,0
Residential,0,0,0,7,0,22,4,423,0,2,24,6,0,24,513482,2,145,33,2873,0,3384,32,221,0,6,966,0
Secondary,0,0,0,0,0,1,0,0,0,6,0,0,1555,45,97,0,75483,1375,13,0,1961,18,0,132,13,103,0
Service,0,0,0,76,0,50,0,10,0,4,18,14,2,14,519,0,41,15,347736,0,58,7,100,0,0,453,0
Tertiary,0,0,0,26,0,0,0,0,0,2,0,0,77,29,177,0,1754,88,92,0,93117,551,0,0,10,361,0
Trunk,0,0,0,0,0,0,0,0,54,15,0,0,320,7,0,0,0,0,0,0,8,1,0,5036,1316,2,0
Unclassified,0,0,0,2,0,0,0,0,0,0,42,2,0,1,426,0,2,7,531,0,246,8,28,0,0,25296,2


In [219]:
osm_link_gdf[osm_link_gdf.id != osm_link_gdf.shstGeometryId]
osm_link_gdf[osm_link_gdf.oneway != osm_link_gdf.oneWay]
osm_link_gdf[(osm_link_gdf.oneway == False) & (osm_link_gdf.oneWay == True)]
osm_link_gdf[(osm_link_gdf.oneway == True) & (osm_link_gdf.oneWay == False)]
pd.crosstab(osm_link_gdf.oneWay, osm_link_gdf.oneway)
#osm_link_gdf[(osm_link_gdf.oneway == True) & (osm_link_gdf.oneWay == False)]

oneway,False,True
oneWay,Unnamed: 1_level_1,Unnamed: 2_level_1
False,1548844,0
True,735,127358


In [189]:
osm_link_gdf.oneWay.value_counts()

False    1694026
True      127859
Name: oneWay, dtype: int64

In [389]:
osm_link_gdf.access.iloc[0] == "nan"

True

In [426]:
print(osm_link_gdf.isnull().sum())
print("\n")
print(osm_link_non_na_gdf.isnull().sum())

link                       0
nodeIds                    0
oneWay                     0
roadClass                  0
roundabout                 0
wayId                      0
shstGeometryId             0
id                         0
fromIntersectionId         0
toIntersectionId           0
shstReferenceId            0
geometry                   0
u                          0
v                          0
access                135953
area                  135953
bridge                135953
est_width             135953
highway               135953
junction              135953
key                   135953
landuse               135953
lanes                 135953
maxspeed              135953
name                  135953
oneway                135953
osmid                 135953
ref                   135953
service               135953
tunnel                135953
width                 135953
reverse_out           974897
dtype: int64


link                  0
nodeIds               0
oneWay   

In [1049]:
link_gdf.shstGeometryId.nunique()

908267

In [1050]:
link_gdf.shstReferenceId.nunique()

1705772

In [1051]:
link_gdf.shape

(1705772, 35)

In [1052]:
link_gdf[link_gdf.shstGeometryId == "df10929e2b51ffc6d8714fec585eec33"]

Unnamed: 0,shstReferenceId,id,shstGeometryId,fromIntersectionId,toIntersectionId,geometry,u,v,link,nodeIds,oneWay,roadClass,roundabout,wayId,access,area,bridge,est_width,highway,junction,key,landuse,lanes,maxspeed,name,oneway,ref,service,tunnel,width,forward,roadway,drive_access,walk_access,bike_access
660017,ba0d3ab448b052dcf8a3bfce0f4d5c0e,df10929e2b51ffc6d8714fec585eec33,df10929e2b51ffc6d8714fec585eec33,58049c638b6b97c50dc57260d7f90f49,889de6e0dee4b100ffa1d057fdbd27b5,"LINESTRING (-122.3975482 37.7911475, -122.3981...",3102328772,1723738831,False,"[3102328772, 1723738843, 4496149786, 1723738831]",True,Tertiary,False,397153595,,,,,secondary,,0,,2,,Fremont Street,True,,,,,1.0,secondary,1,1,1


In [1053]:
osm_link_gdf[osm_link_gdf.wayId == 24024252]

Unnamed: 0,link,nodeIds,oneWay,roadClass,roundabout,wayId,shstGeometryId,id,fromIntersectionId,toIntersectionId,shstReferenceId,geometry,u,v,access,area,bridge,est_width,highway,junction,key,landuse,lanes,maxspeed,name,oneway,osmid,ref,service,tunnel,width,reverse_out
93831,False,"[260541175, 4265656988, 260541174]",False,Other,False,24024252,ef53746f39d064e3ffe56f29523463a1,ef53746f39d064e3ffe56f29523463a1,5b772f58fa36634c522d08f32a1464ad,4cb4ad8f54ce81f14c5fe6aa58cdf60d,0958c2984c63a6a11220ca7a3c5ebeb4,"LINESTRING (-122.265018 37.8633342, -122.26505...",260541175,260541174,,,,,cycleway,,0.0,,,,,False,24024252.0,,,,,
1054566,False,"[260541175, 4265656988, 260541174]",False,Other,False,24024252,ef53746f39d064e3ffe56f29523463a1,ef53746f39d064e3ffe56f29523463a1,4cb4ad8f54ce81f14c5fe6aa58cdf60d,5b772f58fa36634c522d08f32a1464ad,bde90c88ef9f469f58c5cfda09f64493,"LINESTRING (-122.265018 37.8633342, -122.26505...",260541175,260541175,,,,,cycleway,,0.0,,,,,False,24024252.0,,,,,1.0


In [1054]:
osm_link_gdf[osm_link_gdf.u == osm_link_gdf.v]

Unnamed: 0,link,nodeIds,oneWay,roadClass,roundabout,wayId,shstGeometryId,id,fromIntersectionId,toIntersectionId,shstReferenceId,geometry,u,v,access,area,bridge,est_width,highway,junction,key,landuse,lanes,maxspeed,name,oneway,osmid,ref,service,tunnel,width,reverse_out
76,False,"[5478357044, 5478357047, 5478357046, 547835704...",False,Service,False,569696059,f58339aa36eea0aa623f91cc7ad2dfed,f58339aa36eea0aa623f91cc7ad2dfed,968953f3562b49b67eec78c15b1a96fc,968953f3562b49b67eec78c15b1a96fc,10254f738b1de3f833606f9f11904913,"LINESTRING (-121.9734348 37.7715697, -121.9733...",5478357044,5478357044,,,,,service,,0.0,,,,,False,569696059.0,,,,,
428,False,"[2392356463, 2392356465, 2392356462, 239235646...",False,Service,False,230783645,005e33d0f1c0428ec0e7d7485b842782,005e33d0f1c0428ec0e7d7485b842782,7578d85536d2a16fe8606df7ef8918e9,7578d85536d2a16fe8606df7ef8918e9,373b498c069a72772e2bff54006c70d6,"LINESTRING (-121.991266 37.7725161, -121.99123...",2392356463,2392356463,,,,,service,,0.0,,,,,False,230783645.0,,,,,
452,False,"[355636466, 2389106466, 2389106458, 2389106465...",False,Other,False,230400583,8dae041df2948c703053f65fd942244c,8dae041df2948c703053f65fd942244c,91ed7d476a0dbd920ab5c535d26f6f74,91ed7d476a0dbd920ab5c535d26f6f74,151a49903778c226adb747519e89f730,"LINESTRING (-121.985909 37.7678773, -121.98595...",355636466,355636466,,,,,service,,0.0,,,,,False,230400583.0,,parking_aisle,,,
664,False,"[5478313555, 5478313554, 5478313553, 547831355...",False,Service,False,569691067,43ad300504f3dc879d493571b259affd,43ad300504f3dc879d493571b259affd,3b36a3bb8ae5c30e790a478ef8c74a56,3b36a3bb8ae5c30e790a478ef8c74a56,1dff4c4267de9040fb7772a6ec5a220a,"LINESTRING (-121.9776633 37.77696690000001, -1...",5478313555,5478313555,,,,,service,,0.0,,,,,False,569691067.0,,,,,
828,False,"[2389149298, 2389149300, 2389149302, 238914930...",False,Other,False,230405711,7d9de9737df5532644744420199283c0,7d9de9737df5532644744420199283c0,f0fb06d6480dd7df67b83ef0cb7d876b,f0fb06d6480dd7df67b83ef0cb7d876b,118fabc973efe399a07f870dd43f37a1,"LINESTRING (-121.9846505 37.7779691, -121.9847...",2389149298,2389149298,,,,,service,,0.0,,,,,False,230405711.0,,parking_aisle,,,
829,False,"[57836561, 2389190726, 2230747157, 2230747153,...",False,Residential,False,213358755,4610d11b1047872920cd5c99ebebbc6d,4610d11b1047872920cd5c99ebebbc6d,9ee312e307b51425f9f63b942e367053,9ee312e307b51425f9f63b942e367053,94dfd4829c907c997ed007cb42efe562,"LINESTRING (-121.9840231 37.7785189, -121.9839...",57836561,57836561,,,,,residential,,0.0,,,,Stone Pine Lane,False,213358755.0,,,,,
889,False,"[2389179358, 2389179366, 2389179361, 238917935...",False,Other,False,230409802,f83d348e60a30b74a8eaf88bab19f7d7,f83d348e60a30b74a8eaf88bab19f7d7,df263235097bcebe46fb0d4bf6c29721,df263235097bcebe46fb0d4bf6c29721,4fadac828585ef8f7c312aad328211d7,"LINESTRING (-121.9851174 37.7749824, -121.9852...",2389179358,2389179358,,,,,service,,0.0,,,,,False,230409802.0,,parking_aisle,,,
1191,False,"[2392269228, 2392269229, 2392269226, 239226922...",False,Other,False,230773804,00dc201673747224494721e2a617c19d,00dc201673747224494721e2a617c19d,04f4c1fec65c28732a36d4826b500abb,04f4c1fec65c28732a36d4826b500abb,37911d6a77bd16d252f9aa6dc8518ebc,"LINESTRING (-121.9797678 37.7844545, -121.9796...",2392269228,2392269228,,,,,service,,0.0,,,,,False,230773804.0,,parking_aisle,,,
1201,False,"[5478237713, 5478237712, 5478237711, 547823771...",False,Service,False,569680701,c0669c1cc9478a0e80e2fd857b510a15,c0669c1cc9478a0e80e2fd857b510a15,fd938465be8907178e3f3a6d09377371,fd938465be8907178e3f3a6d09377371,b00f5527da6eae5801d454391c76fba1,"LINESTRING (-121.97905 37.7830639, -121.978987...",5478237713,5478237713,,,,,service,,0.0,,,,,False,569680701.0,,,,,
1360,False,"[5478209725, 5478209724, 5478209723, 547820972...",False,Service,False,569678410,ed34e21f6d2958d7d590d47a965947b1,ed34e21f6d2958d7d590d47a965947b1,8ebe04ddca22985ca7e999d970ab0f09,8ebe04ddca22985ca7e999d970ab0f09,27d8a70ea621e1506bec4ddc2ad19c37,"LINESTRING (-121.9823197 37.7863432, -121.9823...",5478209725,5478209725,,,,,service,,0.0,,,,,False,569678410.0,,,,,


In [1055]:
osm_link_gdf.service.value_counts()

nan                                                         1459609
parking_aisle                                                179742
driveway                                                      34118
alley                                                          6651
drive-through                                                  1104
busway                                                          584
emergency_access                                                166
golf_cart_track                                                  24
living_street                                                    24
Onyx Glen                                                        10
parking                                                          10
road                                                             10
1                                                                 8
Johnson                                                           8
access_road_to the Dominican Sisters of Mission 

In [1056]:
osm_link_gdf.groupby(["shstReferenceId", "id", "shstGeometryId", "fromIntersectionId", "toIntersectionId"]).count().shape

(1706374, 27)

In [163]:
shst_link_non_dup_gdf.columns

Index(['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId',
       'backReferenceId', 'roadClass', 'metadata', 'geometry', 'source'],
      dtype='object')

In [336]:
shst_link_non_dup_gdf[shst_link_non_dup_gdf.fromIntersectionId == shst_link_non_dup_gdf.toIntersectionId].iloc[1].metadata

{'gisMetadata': [],
 'geometryId': '9702c502010e77568216ae66dcad593d',
 'osmMetadata': {'waySections': [{'nodeIds': ['4919217936',
     '4919217935',
     '4919217934',
     '4919217933'],
    'wayId': '501008393',
    'roadClass': 'Other',
    'oneWay': False,
    'roundabout': False,
    'link': False,
    'name': ''},
   {'nodeIds': ['4919217933', '4919217940'],
    'wayId': '501008389',
    'roadClass': 'Other',
    'oneWay': False,
    'roundabout': False,
    'link': False,
    'name': ''},
   {'nodeIds': ['4919217940', '4919217941'],
    'wayId': '501008390',
    'roadClass': 'Other',
    'oneWay': False,
    'roundabout': False,
    'link': False,
    'name': ''},
   {'nodeIds': ['4919217941', '4919217939'],
    'wayId': '501008390',
    'roadClass': 'Other',
    'oneWay': False,
    'roundabout': False,
    'link': False,
    'name': ''},
   {'nodeIds': ['4919217939', '4919217938', '4919217937', '4919217936'],
    'wayId': '501008392',
    'roadClass': 'Other',
    'oneWay': F

In [339]:
shst_link_non_dup_gdf[shst_link_non_dup_gdf.forwardReferenceId == shst_link_non_dup_gdf.backReferenceId]

Unnamed: 0,id,fromIntersectionId,toIntersectionId,forwardReferenceId,backReferenceId,roadClass,metadata,geometry,source
20112,e24f210c1efe20bef41839ff1bc11f16,acad3a72268359c4fe4267a4bbed1c52,acad3a72268359c4fe4267a4bbed1c52,295cfebb89f341f660ba9baf8a2a7741,295cfebb89f341f660ba9baf8a2a7741,Other,"{'gisMetadata': [], 'geometryId': 'e24f210c1ef...","LINESTRING (-122.0940081 37.8353339, -122.0939...",../shst_node_js_extraction\mtc_14.out.geojson
25983,3ae7c50ab881584c262527334cf3a118,07d731a400948d893c332e8d65935de6,07d731a400948d893c332e8d65935de6,9e8d722f73630da1d14ff41aa0b8903a,9e8d722f73630da1d14ff41aa0b8903a,Other,"{'gisMetadata': [], 'geometryId': '3ae7c50ab88...","LINESTRING (-122.2528831 37.8569967, -122.2528...",../shst_node_js_extraction\mtc_14.out.geojson
25987,61ff73a2d550d682bdd12b245e89e3dd,455b4561a68c8d415bf43c03bfc82328,455b4561a68c8d415bf43c03bfc82328,e474312a915ae5f987021effdec6268e,e474312a915ae5f987021effdec6268e,Other,"{'gisMetadata': [], 'geometryId': '61ff73a2d55...","LINESTRING (-122.2528979 37.8571467, -122.2528...",../shst_node_js_extraction\mtc_14.out.geojson
30073,80da7cf788ae07fcf9409a417a3cae41,a823ed3e32843782d26878d0ab90a436,a823ed3e32843782d26878d0ab90a436,6e0b34f0e7f7fecd72a0b221f148d607,6e0b34f0e7f7fecd72a0b221f148d607,Service,"{'gisMetadata': [], 'geometryId': '80da7cf788a...","LINESTRING (-122.2205937 37.7980401, -122.2205...",../shst_node_js_extraction\mtc_14.out.geojson
87938,68b61c45fb9e91c7865a579dd69e1e0e,fa29c5691d317e8a1455e4b394ce72d3,fa29c5691d317e8a1455e4b394ce72d3,feff511f4a5d66bf2aebc4147a853d5f,feff511f4a5d66bf2aebc4147a853d5f,Other,"{'gisMetadata': [], 'geometryId': '68b61c45fb9...","LINESTRING (-122.2566167 37.8609477, -122.2566...",../shst_node_js_extraction\mtc_14.out.geojson
97743,583a1a32bffb7c8b68e354d75946ed0a,3a446c8884a9c4f826df1c0ee0ee45cb,3a446c8884a9c4f826df1c0ee0ee45cb,4536f75226d9cdc7bec2fc173be8309f,4536f75226d9cdc7bec2fc173be8309f,Other,"{'gisMetadata': [], 'geometryId': '583a1a32bff...","LINESTRING (-122.2590908 37.8253533, -122.2590...",../shst_node_js_extraction\mtc_14.out.geojson
98559,717976288cf29a929b19886fb1156817,1fb89e941dcde1eec1ae28f6b4b014a0,1fb89e941dcde1eec1ae28f6b4b014a0,359920fa974f78a0f930006b31d373dd,359920fa974f78a0f930006b31d373dd,Other,"{'gisMetadata': [], 'geometryId': '717976288cf...","LINESTRING (-122.2608315 37.8094562, -122.2608...",../shst_node_js_extraction\mtc_14.out.geojson
224087,8a3aef2e34c80fe3e73c76515135dfec,724cec8fc898c5e203a622ce39744353,724cec8fc898c5e203a622ce39744353,f5f7dd8980262f949e2cb82e327e9eeb,f5f7dd8980262f949e2cb82e327e9eeb,Service,"{'gisMetadata': [], 'geometryId': '8a3aef2e34c...","LINESTRING (-122.8485726 38.40740450000001, -1...",../shst_node_js_extraction\mtc_2.out.geojson
261624,59da55c29c44c060b531c237e23cecc2,52d57146d2de648bbba1a004da5cf762,52d57146d2de648bbba1a004da5cf762,b12d74dba689f8fd6c7bf7a1b79f2094,b12d74dba689f8fd6c7bf7a1b79f2094,Other,"{'gisMetadata': [], 'geometryId': '59da55c29c4...","LINESTRING (-122.238507 38.2196332, -122.23843...",../shst_node_js_extraction\mtc_3.out.geojson
261716,5ade6df9bbca82a966fedd7c4149d3f0,700070b904603d0c5e656f9fb097c722,700070b904603d0c5e656f9fb097c722,5c31f9642bd487563050429b19ee13b3,5c31f9642bd487563050429b19ee13b3,Other,"{'gisMetadata': [], 'geometryId': '5ade6df9bbc...","LINESTRING (-122.2440454 38.2135686, -122.2440...",../shst_node_js_extraction\mtc_3.out.geojson


In [1057]:
shst_link_non_dup_gdf.shape
osm_link_gdf.shstGeometryId.nunique()

908281

In [1058]:
shst_link_non_dup_gdf[shst_link_non_dup_gdf.forwardReferenceId == "0900b58d4b0735dd831190d58427848b"].iloc[0].metadata

{'gisMetadata': [],
 'geometryId': '9c1248882e11275f12c84aa44015728b',
 'osmMetadata': {'waySections': [{'nodeIds': ['5987137902',
     '5987137906',
     '5987137907',
     '5987137905'],
    'wayId': '634415564',
    'roadClass': 'Other',
    'oneWay': False,
    'roundabout': False,
    'link': False,
    'name': ''}],
  'name': ''}}

In [1059]:
shst_link_non_dup_gdf[shst_link_non_dup_gdf.forwardReferenceId == "0900b58d4b0735dd831190d58427848b"].iloc[1].metadata

{'gisMetadata': [],
 'geometryId': '36102237db73d5ea5f603a109af99252',
 'osmMetadata': {'waySections': [{'nodeIds': ['5987137902',
     '5987137903',
     '5987137904',
     '5987137905'],
    'wayId': '634415563',
    'roadClass': 'Other',
    'oneWay': False,
    'roundabout': False,
    'link': False,
    'name': ''}],
  'name': ''}}

In [1061]:
shst_link_non_dup_gdf.forwardReferenceId.nunique()

908265

In [1062]:
shst_link_non_dup_gdf.forwardReferenceId.nunique()

908265

In [1064]:
osm_link_gdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1818056 entries, 0 to 1818055
Data columns (total 32 columns):
link                  bool
nodeIds               object
oneWay                bool
roadClass             object
roundabout            bool
wayId                 int32
shstGeometryId        object
id                    object
fromIntersectionId    object
toIntersectionId      object
shstReferenceId       object
geometry              object
u                     int64
v                     int64
access                object
area                  object
bridge                object
est_width             object
highway               object
junction              object
key                   float64
landuse               object
lanes                 object
maxspeed              object
name                  object
oneway                object
osmid                 float64
ref                   object
service               object
tunnel                object
width                 ob