In [11]:
import pandas as pd
import geopandas as gpd
import numpy as np
import json
import requests
from urllib.request import urlopen
from zipfile import ZipFile
from io import BytesIO
import fiona
from shapely.geometry import Point

In [12]:
from methods import read_shst_extract
from methods import link_df_to_geojson
from methods import point_df_to_geojson

In [13]:
data_external_dir = "../../data/external/"
data_interim_dir = "../../data/interim/"

# Read base network from step 3

In [14]:
%%time
link_file = data_interim_dir + "step3_join_shst_extraction_with_osm/" + "link.json"
with open(link_file) as f:
    link_json = json.load(f)
link_df = pd.DataFrame(link_json)

shape_gdf = gpd.read_file(data_interim_dir + "step3_join_shst_extraction_with_osm/" 
                          + "shape.geojson")

link_gdf = pd.merge(link_df,
                    shape_gdf[["id", "geometry"]],
                   how = "left",
                   on = "id")

link_gdf = gpd.GeoDataFrame(link_gdf,
                            crs="EPSG:4326")

Wall time: 2min 21s


In [8]:
print('{} links with columns: \n{}'.format(link_gdf.shape[0], link_gdf.columns))

1705772 links with columns: 
Index(['shstReferenceId', 'id', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'u', 'v', 'link', 'oneWay', 'roundabout', 'wayId',
       'access', 'area', 'bridge', 'est_width', 'highway', 'junction', 'key',
       'landuse', 'lanes', 'maxspeed', 'name', 'ref', 'service', 'tunnel',
       'width', 'roadway', 'drive_access', 'walk_access', 'bike_access',
       'geometry'],
      dtype='object')


# TomTom conflation

In [9]:
# Read tomtom ShSt match result

tomtom_match_gdf = read_shst_extract(data_interim_dir + "tomtom/", "*_tomtom.out.matched.geojson")

tomtom_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                   "shstToIntersectionId" : "toIntersectionId"},
                       inplace = True)

print('\n {} rows with columns: \n{}'.format(tomtom_match_gdf.shape[0],
                                             tomtom_match_gdf.columns))

----------start reading shst extraction data-------------
----------finished reading shst extraction data-------------

 columns: 
Index([], dtype='object')


In [13]:
# Read tomtom raw data
tomtom_raw_gdf = gpd.read_file(data_external_dir + 'tomtom/tomtom_raw.geojson')

print('\n {} rows with columns: \n{}'.format(tomtom_raw_gdf.shape[0],
                                             tomtom_raw_gdf.columns))

Unnamed: 0,shstReferenceId,shstGeometryId,fromIntersectionId,toIntersectionId,gisReferenceId,gisGeometryId,gisTotalSegments,gisSegmentIndex,gisFromIntersectionId,gisToIntersectionId,startSideOfStreet,endSideOfStreet,sideOfStreet,score,matchType,pp_tomtom_link_id,geometry,source
0,340aca5eed3be40e615422d39544d223,52a5e0b4800e7283accda73c74108762,8711d153fefd546c2845baeb0f67bd68,ce8aa7e55b3c5f951154f22d1fb5efd0,35317b9b780d7f5843d00a207b13c092,d0b88c4ea04cca9bf9835f85df44ed32,2,1,91cf2a0dbffb2e7f14c03e72e704d3e5,a3ac6e509c62ac3d7796c8d8b75f72b2,right,left,unknown,4.68,hmm,337110,LINESTRING (-121.9121105815505 38.014193818053...,D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
1,4b1dd93afef058cf6b751dd55aa40031,2eba19c9a54083c15f8fd70e025559b4,ce8aa7e55b3c5f951154f22d1fb5efd0,594fb147712d1bf639de86532ff58434,35317b9b780d7f5843d00a207b13c092,d0b88c4ea04cca9bf9835f85df44ed32,2,2,91cf2a0dbffb2e7f14c03e72e704d3e5,a3ac6e509c62ac3d7796c8d8b75f72b2,right,left,unknown,4.68,hmm,337110,"LINESTRING (-121.9120067 38.0142604, -121.9118...",D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
2,9de152e522f71ed2e8fb4658213e2ad0,2eba19c9a54083c15f8fd70e025559b4,594fb147712d1bf639de86532ff58434,ce8aa7e55b3c5f951154f22d1fb5efd0,88acc59b5faa7dc7ab80892531d77978,d0b88c4ea04cca9bf9835f85df44ed32,2,1,a3ac6e509c62ac3d7796c8d8b75f72b2,91cf2a0dbffb2e7f14c03e72e704d3e5,right,left,unknown,4.68,hmm,337110,LINESTRING (-121.9118829091503 38.014152852369...,D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
3,ec573127f992ae2dd0fabaa6c6c70cbd,52a5e0b4800e7283accda73c74108762,ce8aa7e55b3c5f951154f22d1fb5efd0,8711d153fefd546c2845baeb0f67bd68,88acc59b5faa7dc7ab80892531d77978,d0b88c4ea04cca9bf9835f85df44ed32,2,2,a3ac6e509c62ac3d7796c8d8b75f72b2,91cf2a0dbffb2e7f14c03e72e704d3e5,right,left,unknown,4.68,hmm,337110,"LINESTRING (-121.9120067 38.0142604, -121.9121...",D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
4,b09838f5bba85e6f72a5d025c64f4c5f,cb0c7c67acc6c5ae4014a5e6a440e656,06b6e95d0de1a687c0aa3682a8c781b5,3c51d6bd57608d58ba09082b6b5ff904,dcaf838ca94ea539373fbe7d1a3d0e5d,2123da5e29f4836a23922e3725c206cd,2,1,fb2e2b5cf4f18ea282582ba908b35903,69df7691003f0dca1bf9acce7beb9030,left,right,unknown,7.87,hmm,489237,LINESTRING (-121.8990992349619 37.765437480739...,D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
5,79f5be81b9431c9b6ee5f3a861adf5b2,ffad91c6a5c5eac1803f14c6626da50d,3c51d6bd57608d58ba09082b6b5ff904,0eafc161d1bf63d09e8d17a11d46ee51,dcaf838ca94ea539373fbe7d1a3d0e5d,2123da5e29f4836a23922e3725c206cd,2,2,fb2e2b5cf4f18ea282582ba908b35903,69df7691003f0dca1bf9acce7beb9030,left,right,unknown,7.87,hmm,489237,"LINESTRING (-121.8991717 37.7655042, -121.8991...",D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
6,031396b53bd320bfa96fd88a8017985d,ffad91c6a5c5eac1803f14c6626da50d,0eafc161d1bf63d09e8d17a11d46ee51,3c51d6bd57608d58ba09082b6b5ff904,fa9f72d6a66c3a9aba647d8fe8c3b305,2123da5e29f4836a23922e3725c206cd,2,1,69df7691003f0dca1bf9acce7beb9030,fb2e2b5cf4f18ea282582ba908b35903,right,right,right,7.87,hmm,489237,"LINESTRING (-121.8992885 37.7656058, -121.8992...",D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
7,37f3e865c333e5a9a2390ee84d1eaaf7,cb0c7c67acc6c5ae4014a5e6a440e656,3c51d6bd57608d58ba09082b6b5ff904,06b6e95d0de1a687c0aa3682a8c781b5,fa9f72d6a66c3a9aba647d8fe8c3b305,2123da5e29f4836a23922e3725c206cd,2,2,69df7691003f0dca1bf9acce7beb9030,fb2e2b5cf4f18ea282582ba908b35903,right,right,right,7.87,hmm,489237,"LINESTRING (-121.8991717 37.7655042, -121.8991...",D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
8,456ba758c99bed5ceb9030e44ab36656,1c78571c094618c6be5512ab0ba28e69,c03538f1d44a953787e805d7153c538e,e8661658df8e8f57dfd4be76a73675e6,e9ec16de687222564740bb0f52369866,ecd5b61c731332f50dfbd3c8fe5446cd,1,1,1e21575f9699b6137e985d110b229e0b,bf07674675b8d0a0663694fde88e5e92,right,right,right,6.47,hmm,484729,LINESTRING (-122.4380561316604 38.187805173017...,D:/MTC/data/interim/tomtom\bike_rules\2_tomtom...
9,5541e1cbc5e7f96a016cc90d83d4bf7b,1c78571c094618c6be5512ab0ba28e69,e8661658df8e8f57dfd4be76a73675e6,c03538f1d44a953787e805d7153c538e,e5077e7b423d8cee96e9cebb98d8f367,ecd5b61c731332f50dfbd3c8fe5446cd,1,1,bf07674675b8d0a0663694fde88e5e92,1e21575f9699b6137e985d110b229e0b,left,left,left,6.47,hmm,484729,LINESTRING (-122.4380110968732 38.187749020001...,D:/MTC/data/interim/tomtom\bike_rules\2_tomtom...


In [18]:
print("Sharedstreets matched "+ str(tomtom_match_gdf.pp_tomtom_link_id.nunique()) +" out of " + str(len(tomtom_raw_gdf)) 
      + " total tomtom links.")

Sharedstreets matched 550620 out of 602060 total tomtom links.


In [19]:
# merge
tomtom_gdf = pd.merge(tomtom_match_gdf, 
                      tomtom_raw_gdf[['tomtom_link_id', 'ID', 'F_JNCTID', 'T_JNCTID', 'LANES', "FRC", "NAME", "SHIELDNUM",
                                      "RTEDIR"]],
                     how = "left",
                     left_on = ['pp_tomtom_link_id'],
                     right_on = ['tomtom_link_id']
                     )
                              
print('{} base network links have tomtom attributes'.format(tomtom_gdf.shape[0]))

(1749759, 27)


In [102]:
# join tomtom with network

unique_tomtom_match_gdf = tomtom_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

unique_tomtom_match_gdf.rename(columns = {"ID" : "tomtom_ID", "LANES" : "tomtom_lanes", "FRC" : "tomtom_FRC",
                                          "NAME" : "tomtom_name", "SHIELDNUM" : "tomtom_shieldnum", 
                                          "RTEDIR" : "tomtom_rtedir"},
                              inplace = True)

link_with_tomtom_gdf = pd.merge(link_gdf,
                                unique_tomtom_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId','toIntersectionId',
                                                         'pp_tomtom_link_id', "tomtom_ID", 'F_JNCTID', 'T_JNCTID', 
                                                         "tomtom_lanes", "tomtom_FRC", "tomtom_name", "tomtom_shieldnum",
                                                         "tomtom_rtedir"]],
                                how = "left",
                                on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId'],
                                )

In [104]:
link_with_tomtom_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1705772 entries, 0 to 1705771
Data columns (total 40 columns):
access                object
area                  object
bike_access           int64
bridge                object
drive_access          int64
est_width             object
fromIntersectionId    object
highway               object
id                    object
junction              object
key                   object
landuse               object
lanes                 object
link                  object
maxspeed              object
name                  object
oneWay                object
ref                   object
roadway               object
roundabout            object
service               object
shstGeometryId        object
shstReferenceId       object
toIntersectionId      object
tunnel                object
u                     int64
v                     int64
walk_access           int64
wayId                 object
width                 object
geometry      

In [21]:
# link_with_tomtom_gdf[link_with_tomtom_gdf.shstGeometryId.isnull()]
# link_with_tomtom_gdf[~(link_with_tomtom_gdf.shstGeometryId == link_with_tomtom_gdf.id)]
# link_with_tomtom_gdf.lanes.value_counts()

nan                                  1359907
                                      134553
2                                      84235
[nan, nan]                             46733
3                                      13260
4                                      11417
[nan, nan, nan]                         9469
1                                       8812
5                                       4697
[, ]                                    4198
[2, 3]                                  2697
[2, 2]                                  1977
[nan, ]                                 1784
[, nan]                                 1177
[3, 3]                                  1043
[3, 4]                                   977
[nan, nan, nan, nan]                     960
[, , ]                                   960
[2, 2, 2]                                767
6                                        725
[2, nan]                                 723
[4, 4]                                   686
[3, 2]    

# Read thrid party shst match results

In [26]:
# read TM2 non Marin conflation result

tm2nonMarin_match_gdf = read_shst_extract(data_interim_dir + "tm2_nonMarin/", "*tm2nonMarin.out.matched.geojson")

tm2nonMarin_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                        "shstToIntersectionId" : "toIntersectionId"},
                             inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\10_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\11_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\12_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\13_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\14_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\1_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\2_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\3_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules

In [27]:
# read TM2 Marin conflation result

tm2marin_match_gdf = read_shst_extract(data_interim_dir + "tm2_Marin/", "*tm2Marin.out.matched.geojson")

tm2marin_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                     "shstToIntersectionId" : "toIntersectionId"},
                          inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\10_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\11_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\12_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\13_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\14_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\1_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\2_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\3_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\4_tm2Marin.out.matched.geojson
reading shst extrac

In [28]:
# # read sfcta true shape conflation result

# sfcta_match_gdf = read_shst_extract(data_interim_dir + "stclines/", "*sfcta.out.matched.geojson")

# sfcta_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
#                                    "shstToIntersectionId" : "toIntersectionId"},
#                        inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/stclines\car_rules\sfcta.out.matched.geojson
reading shst extraction data :  ../../data/interim/stclines\ped_rules\sfcta.out.matched.geojson
----------finished reading shst extraction data-------------


In [29]:
# read sfcta stick network conflation result

sfcta_stick_match_gdf = read_shst_extract(data_interim_dir + "sfcta/", "*sfcta.out.matched.geojson")

sfcta_stick_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                        "shstToIntersectionId" : "toIntersectionId"},
                             inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/sfcta\car_rules\sfcta.out.matched.geojson
reading shst extraction data :  ../../data/interim/sfcta\ped_rules\sfcta.out.matched.geojson
reading shst extraction data :  ../../data/interim/sfcta\reverse_dir\sfcta.out.matched.geojson
----------finished reading shst extraction data-------------


In [30]:
# read pems conflation result

# pems_match_gdf = gpd.read_file(data_interim_dir + "mtc/pems_conflation_result.geojson")

In [39]:
# pems_match_gdf['type'].value_counts()

ML    60319
OR     9309
FR     5733
FF      666
HV       30
Name: type, dtype: int64

In [40]:
# # link can have multiple pems station on it, so trying to get the mode of #lanes by station type

# pems_lanes_df = pems_match_gdf[(pems_match_gdf.year.isin([2014,2015,2016]))].groupby(
#     ["shstReferenceId", "type", "lanes"]
# )["station"].count().sort_values(ascending = False).reset_index().drop_duplicates(subset = ['shstReferenceId', 'type'])

# pems_lanes_df = pems_lanes_df.pivot_table(index = ["shstReferenceId"], values = "lanes", columns = "type").fillna(0).reset_index()

In [41]:
# link_pems_df = pems_match_gdf[(pems_match_gdf.year.isin([2014,2015,2016]))].drop_duplicates(
#     subset = ["shstReferenceId", "station"]).groupby(
#     ["shstReferenceId"]
# )["station"].apply(list).reset_index().rename(columns = {"station" : "PEMSID"})

# pems_lanes_df = pd.merge(
#     pems_lanes_df,
#     link_pems_df,
#     how = "left",
#     on = "shstReferenceId"
# )

In [42]:
# link_pems_df

Unnamed: 0,shstReferenceId,PEMSID
0,000619909cfedfdc3ae846759247e09f,"[404433, 407341]"
1,001faa1f8bf0fafb4298b7438a83b506,[400615]
2,00441ada4f77ea0e20dd17acd688e289,[404649]
3,005586c0c986e59d4dcd979513f7e8e6,"[401943, 402067, 403410, 403411, 403412]"
4,005a078054386cb7659bcbbdacf61c63,[400929]
5,00ff1d8493ea56e766bedbd47663cbb0,"[407358, 407359]"
6,01248bbf4c897c845921a9c93c8d609d,"[407328, 407329, 407330]"
7,0167c71940194a800e0c35023a9933c6,"[409722, 409723]"
8,018c32728b73695ac776d4181f35ecf6,[401134]
9,01a23a9cd090dd752fe36221a58a9b92,"[406606, 406605]"


In [43]:
# pems_lanes_df

Unnamed: 0,shstReferenceId,FF,FR,HV,ML,OR,PEMSID
0,000619909cfedfdc3ae846759247e09f,0.0,0.0,0.0,3.0,0.0,"[404433, 407341]"
1,001faa1f8bf0fafb4298b7438a83b506,0.0,0.0,0.0,3.0,0.0,[400615]
2,00441ada4f77ea0e20dd17acd688e289,0.0,0.0,0.0,3.0,0.0,[404649]
3,005586c0c986e59d4dcd979513f7e8e6,0.0,1.0,0.0,4.0,1.0,"[401943, 402067, 403410, 403411, 403412]"
4,005a078054386cb7659bcbbdacf61c63,0.0,0.0,0.0,5.0,0.0,[400929]
5,00ff1d8493ea56e766bedbd47663cbb0,2.0,0.0,1.0,0.0,0.0,"[407358, 407359]"
6,01248bbf4c897c845921a9c93c8d609d,0.0,1.0,0.0,3.0,1.0,"[407328, 407329, 407330]"
7,0167c71940194a800e0c35023a9933c6,0.0,0.0,0.0,3.0,1.0,"[409722, 409723]"
8,018c32728b73695ac776d4181f35ecf6,0.0,0.0,0.0,3.0,0.0,[401134]
9,01a23a9cd090dd752fe36221a58a9b92,0.0,1.0,0.0,4.0,0.0,"[406606, 406605]"


In [None]:
# read source data

# TM2 non-Marin
tm2_link_roadway_gdf = gpd.read_file(data_external_dir + 'TM2_nonMarin/tm2nonMarin_raw.geojson')

# TM2 Marin
tm2_marin_link_roadway_gdf = gpd.read_file(data_external_dir + 'TM2_Marin/tm2Marin_raw.geojson')

# SFCTA stick
sfcta_stick_gdf = gpd.read_file(data_external_dir + 'sfcta/sfcta_raw.geojson')

In [76]:
# join match result with source data

# TM2 non Marin
unique_tm2nonMarin_match_gdf = tm2nonMarin_match_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

unique_tm2nonMarin_match_gdf = pd.merge(unique_tm2nonMarin_match_gdf, 
                                        tm2_link_roadway_gdf[['A', 'B', "NUMLANES", "FT", "ASSIGNABLE"]],
                                        how = "left",
                                        left_on = ['pp_a', "pp_b"],
                                        right_on = ['A', "B"]
                                       )

unique_tm2nonMarin_match_gdf.rename(columns = {"A" : "TM2_A", "B" : "TM2_B", "NUMLANES" : "TM2_LANES", 
                                               "FT" : "TM2_FT", "ASSIGNABLE" : "TM2_ASSIGNABLE"},
                                    inplace = True)

# TM2 Marin
unique_tm2marin_match_gdf = tm2marin_match_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

unique_tm2marin_match_gdf = pd.merge(unique_tm2marin_match_gdf, 
                                     tm2_marin_link_roadway_gdf[['A', 'B', "NUMLANES", "FT", "ASSIGNABLE"]],
                                     how = "left",
                                     left_on = ['pp_a', "pp_b"],
                                     right_on = ['A', "B"]
                                     )

unique_tm2marin_match_gdf.rename(columns = {"A" : "TM2Marin_A", "B" : "TM2Marin_B", "NUMLANES" : "TM2Marin_LANES", 
                                            "FT" : "TM2Marin_FT", "ASSIGNABLE" : "TM2Marin_ASSIGNABLE"},
                                   inplace = True)

# sfcta stick and true shape
unique_sfcta_match_gdf = sfcta_stick_match_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

unique_sfcta_match_gdf = pd.merge(unique_sfcta_match_gdf,
                                  sfcta_stick_gdf[["A", "B", "FT", "STREETNAME", "LANE_AM", "LANE_OP", "LANE_PM"]],
                                  how = "left",
                                  left_on = ["pp_a", "pp_b"],
                                  right_on = ["A", "B"])

unique_sfcta_match_gdf.rename(columns = {"A" : "sfcta_A", "B" : "sfcta_B", "FT" : "sfcta_FT", "STREETNAME" : "sfcta_STREETNAME",
                                         "LANE_AM" : "sfcta_LANE_AM", "LANE_OP" : "sfcta_LANE_OP", "LANE_PM" : "sfcta_LANE_PM"},
                             inplace = True)

# pems
# pems_lanes_df.rename(columns = {"FF" : "pems_lanes_FF", "FR" : "pems_lanes_FR", "HV" : "pems_lanes_HV", 
#                                 "ML" : "pems_lanes_ML", "OR" : "pems_lanes_OR"},
#                      inplace = True)

# Join network with third party sources

In [108]:
link_all_conflated_gdf = pd.merge(
    link_with_tomtom_gdf,
    unique_tm2nonMarin_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'TM2_A', 'TM2_B', 'TM2_LANES', 'TM2_FT', 'TM2_ASSIGNABLE']],
    how = "left",
    on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', "toIntersectionId"]
)

In [109]:
link_all_conflated_gdf = pd.merge(
    link_all_conflated_gdf,
    unique_tm2marin_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'TM2Marin_A', 'TM2Marin_B', 'TM2Marin_LANES', 'TM2Marin_FT',
       'TM2Marin_ASSIGNABLE']],
    how = "left",
    on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', "toIntersectionId"]
)

In [110]:
link_all_conflated_gdf = pd.merge(
    link_all_conflated_gdf,
    unique_sfcta_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'sfcta_A', 'sfcta_B', 'sfcta_FT', 'sfcta_STREETNAME', 'sfcta_LANE_AM', 'sfcta_LANE_OP',
       'sfcta_LANE_PM']],
    how = "left",
    on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', "toIntersectionId"]
)

In [111]:
# link_all_conflated_gdf = pd.merge(
#     link_all_conflated_gdf,
#     pems_lanes_df,
#     how = "left",
#     on = "shstReferenceId"
# )

In [112]:
print('after conflation, {} links with the follow columns: \n{}'.format(
    link_all_conflated_gdf.shape[0],
    link_all_conflated_gdf.columns))

(1705772, 63)

Index(['access', 'area', 'bike_access', 'bridge', 'drive_access', 'est_width',
       'fromIntersectionId', 'highway', 'id', 'junction', 'key', 'landuse',
       'lanes', 'link', 'maxspeed', 'name', 'oneWay', 'ref', 'roadway',
       'roundabout', 'service', 'shstGeometryId', 'shstReferenceId',
       'toIntersectionId', 'tunnel', 'u', 'v', 'walk_access', 'wayId', 'width',
       'geometry', 'pp_tomtom_link_id', 'tomtom_ID', 'F_JNCTID', 'T_JNCTID',
       'tomtom_lanes', 'tomtom_FRC', 'tomtom_name', 'tomtom_shieldnum',
       'tomtom_rtedir', 'TM2_A', 'TM2_B', 'TM2_LANES', 'TM2_FT',
       'TM2_ASSIGNABLE', 'TM2Marin_A', 'TM2Marin_B', 'TM2Marin_LANES',
       'TM2Marin_FT', 'TM2Marin_ASSIGNABLE', 'sfcta_A', 'sfcta_B', 'sfcta_FT',
       'sfcta_STREETNAME', 'sfcta_LANE_AM', 'sfcta_LANE_OP', 'sfcta_LANE_PM',
       'pems_lanes_FF', 'pems_lanes_FR', 'pems_lanes_HV', 'pems_lanes_ML',
       'pems_lanes_OR', 'PEMSID'],
      dtype='object')

In [114]:
link_all_conflated_gdf.head(30).lanes.iloc[19][1]

'2'

# Write out standard link json and link feather

In [88]:
link_json_columns = link_df.columns.tolist()

In [89]:
link_json_columns

['access',
 'area',
 'bike_access',
 'bridge',
 'drive_access',
 'est_width',
 'fromIntersectionId',
 'highway',
 'id',
 'junction',
 'key',
 'landuse',
 'lanes',
 'link',
 'maxspeed',
 'name',
 'oneWay',
 'ref',
 'roadway',
 'roundabout',
 'service',
 'shstGeometryId',
 'shstReferenceId',
 'toIntersectionId',
 'tunnel',
 'u',
 'v',
 'walk_access',
 'wayId',
 'width']

In [119]:
link_all_conflated_gdf.columns

Index(['access', 'area', 'bike_access', 'bridge', 'drive_access', 'est_width',
       'fromIntersectionId', 'highway', 'id', 'junction', 'key', 'landuse',
       'lanes', 'link', 'maxspeed', 'name', 'oneWay', 'ref', 'roadway',
       'roundabout', 'service', 'shstGeometryId', 'shstReferenceId',
       'toIntersectionId', 'tunnel', 'u', 'v', 'walk_access', 'wayId', 'width',
       'geometry', 'pp_tomtom_link_id', 'tomtom_ID', 'F_JNCTID', 'T_JNCTID',
       'tomtom_lanes', 'tomtom_FRC', 'tomtom_name', 'tomtom_shieldnum',
       'tomtom_rtedir', 'TM2_A', 'TM2_B', 'TM2_LANES', 'TM2_FT',
       'TM2_ASSIGNABLE', 'TM2Marin_A', 'TM2Marin_B', 'TM2Marin_LANES',
       'TM2Marin_FT', 'TM2Marin_ASSIGNABLE', 'sfcta_A', 'sfcta_B', 'sfcta_FT',
       'sfcta_STREETNAME', 'sfcta_LANE_AM', 'sfcta_LANE_OP', 'sfcta_LANE_PM',
       'pems_lanes_FF', 'pems_lanes_FR', 'pems_lanes_HV', 'pems_lanes_ML',
       'pems_lanes_OR', 'PEMSID', 'tomtom_FRC_def', 'TM2_FT_def'],
      dtype='object')

In [115]:
link_all_conflated_gdf[link_all_conflated_gdf.shstReferenceId == "6233a20ff0d9dc29be597a06ffd19727"]

Unnamed: 0,access,area,bike_access,bridge,drive_access,est_width,fromIntersectionId,highway,id,junction,...,sfcta_STREETNAME,sfcta_LANE_AM,sfcta_LANE_OP,sfcta_LANE_PM,pems_lanes_FF,pems_lanes_FR,pems_lanes_HV,pems_lanes_ML,pems_lanes_OR,PEMSID
348788,,,1,,0,,f407efc156163c556b516200a966187e,track,b5a13d71332300baf271a3dde50dd06d,,...,,,,,,,,,,


In [116]:
object_col = list(link_all_conflated_gdf.select_dtypes(['object']).columns)

In [117]:
print("-------write out link json---------")

#link_prop = link_all_conflated_gdf.drop("geometry", axis = 1).columns.tolist()
link_prop = link_json_columns

out = link_all_conflated_gdf[link_prop].to_json(orient = "records")

with open(data_interim_dir + 'step4_conflate_with_tomtom/link.json', 'w') as f:
    f.write(out)

-------write out link json---------


In [118]:
print("-------write out link feather---------")

link_feather = link_all_conflated_gdf[link_json_columns].copy()

object_col = list(link_feather.select_dtypes(['object']).columns)

for c in object_col:
    link_feather[c] = link_feather[c].astype(str)

link_feather.to_feather(data_interim_dir + 'step4_conflate_with_tomtom/link.feather')

-------write out link feather---------


In [119]:
link_all_conflated_gdf_prop = link_all_conflated_gdf.drop("geometry", axis = 1).columns
link_all_conflated_geojson = link_df_to_geojson(link_all_conflated_gdf, link_all_conflated_gdf_prop)

with open(data_interim_dir + 'step4_conflate_with_tomtom/link.geojson', "w") as f:
    json.dump(link_all_conflated_geojson, f)

# Write out conflation result data base

In [120]:
# tomtom FRC dictionary

tomtom_FRC_dict = {
    0: "0-Motorway, Freeway, or Other Major Road",
    1: "1-Major Road Less Important than a Motorway",
    2: "2-Other Major Road",
    3: "3-Secondary Road",
    4: "4-Local Connecting Road",
    5: "5-Local Road of High Importance",
    6: "6-Local Road",
    7: "7-Local Road of Minor Importance",
    8: "8-Other Road"
}

link_all_conflated_gdf["tomtom_FRC_def"] = link_all_conflated_gdf["tomtom_FRC"].map(tomtom_FRC_dict)

In [121]:
link_all_conflated_gdf.tomtom_FRC_def.value_counts()

6-Local Road                                   345262
7-Local Road of Minor Importance               325150
5-Local Road of High Importance                 83747
4-Local Connecting Road                         71881
8-Other Road                                    35400
3-Secondary Road                                14483
1-Major Road Less Important than a Motorway      1851
0-Motorway, Freeway, or Other Major Road         1217
2-Other Major Road                                408
Name: tomtom_FRC_def, dtype: int64

In [122]:
TM2_FT_dict = {
    0: "0-Connector",
    1: "1-Freeway to Freeway",
    2: "2-Freeway",
    3: "3-Expressway",
    4: "4-Collector",
    5: "5-Ramp",
    6: "6-Special Facility",
    7: "7-Major Arterial",
}

link_all_conflated_gdf["TM2_FT_def"] = link_all_conflated_gdf["TM2_FT"].map(TM2_FT_dict)

link_all_conflated_gdf.TM2_FT_def.value_counts()

4-Collector             540491
7-Major Arterial        141340
0-Connector              56409
3-Expressway             13888
2-Freeway                 3271
5-Ramp                    2168
1-Freeway to Freeway       156
Name: TM2_FT_def, dtype: int64

In [123]:
# write conflation data base

tableau_fields = ["shstReferenceId", "roadway", "lanes", "drive_access", "bike_access", "walk_access",
                 "tomtom_FRC", "tomtom_FRC_def", "tomtom_lanes", "pp_tomtom_link_id", 'F_JNCTID', 'T_JNCTID',
                  'tomtom_name', 'tomtom_shieldnum', 'tomtom_rtedir', 'TM2Marin_A',
                   'TM2Marin_B', 'TM2Marin_FT', 'TM2Marin_LANES', 'TM2Marin_ASSIGNABLE', 
                  'TM2_A', 'TM2_B', 'TM2_FT', "TM2_FT_def", 'TM2_LANES', 'TM2_ASSIGNABLE', 
                  "sfcta_A", 'sfcta_B', "sfcta_STREETNAME", 'sfcta_FT', 'sfcta_LANE_AM', 'sfcta_LANE_OP',
                   'sfcta_LANE_PM', 'PEMSID', 'pems_lanes_FF', 'pems_lanes_FR',
       'pems_lanes_HV', 'pems_lanes_ML', 'pems_lanes_OR']

link_all_conflated_gdf[tableau_fields].rename(columns = {"lanes" : "lanes_osm",
                                                         "pp_tomtom_link_id" : "tomtom_unique_id"}
                              ).to_csv(data_interim_dir + "conflation_result.csv",
                             index = False)

In [124]:
link_all_conflated_gdf[link_all_conflated_gdf.shstReferenceId == "0d2e3cab2e51b61fc6b42641fa648540"].sfcta_LANE_PM

47077    0.0
Name: sfcta_LANE_PM, dtype: float64

In [125]:
pd.crosstab(link_all_conflated_gdf.TM2_ASSIGNABLE, link_all_conflated_gdf.TM2_LANES)

TM2_LANES,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0
TM2_ASSIGNABLE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,56409,1690,1,0,0,0,0,0,0,0
1.0,0,642776,37448,14234,3813,1151,186,12,2,1


In [126]:
link_all_conflated_gdf[link_all_conflated_gdf.TM2_A.notnull()].shape

(757723, 65)

In [127]:
pd.crosstab(link_all_conflated_gdf.TM2Marin_ASSIGNABLE, link_all_conflated_gdf.TM2Marin_LANES)

TM2Marin_LANES,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0
TM2Marin_ASSIGNABLE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,56409,1692,1,0,0,0,0,0,0,0
1.0,0,642194,37944,14294,3828,1155,188,12,2,1


In [128]:
link_all_conflated_gdf[link_all_conflated_gdf.TM2Marin_A.notnull()].shape

(757720, 65)

In [129]:
link_all_conflated_gdf[link_all_conflated_gdf.PEMSID.notnull()].shape

(1389, 65)

In [130]:
link_all_conflated_gdf[link_all_conflated_gdf.pp_tomtom_link_id.notnull()].shape

(879399, 65)

In [131]:
link_all_conflated_gdf[link_all_conflated_gdf.sfcta_A.notnull()].shape

(34906, 65)