In [529]:
import pandas as pd
import geopandas as gpd
import numpy as np
import json
import requests
from urllib.request import urlopen
from zipfile import ZipFile
from io import BytesIO
import fiona
from shapely.geometry import Point

In [2]:
from methods import read_shst_extract
from methods import link_df_to_geojson
from methods import point_df_to_geojson

In [46]:
data_interim_dir = "../../data/interim/"

# Prepare tomtom for conflation

In [14]:
# loading tomtom data source

gdb_file = "../../data/external/tomtom/Network_region.gdb"

# Get all the layers from the .gdb file 

layers = fiona.listlayers(gdb_file)

# street link layer

tomtom_raw_gdf = gpd.read_file(gdb_file, layer = 'mn_nw_BayArea')

# convert to ESPG lat-lon

tomtom_raw_gdf = tomtom_raw_gdf.to_crs({'init': 'epsg:4326'})
tomtom_raw_gdf.crs

{'init': 'epsg:4326'}

In [39]:
# There is no existing unique tomtom handle for Bay Area, thus we need to create unique handle

In [15]:
# total # tomtom links

tomtom_raw_gdf.shape

(602060, 53)

In [43]:
# unique tomtom handles

len(tomtom_raw_gdf.groupby(["ID", "F_JNCTID", "T_JNCTID"]).count())

601846

In [None]:
# generating unique handle for tomtom

tomtom_raw_gdf["tomtom_link_id"] = range(1, len(tomtom_raw_gdf)+1)

In [51]:
for line in tomtom_raw_gdf[tomtom_raw_gdf.tomtom_link_id == 97874]["geometry"].iloc[0]:
     print(line)

LINESTRING (-122.4323109994277 37.76753700037561, -122.4334150001658 37.76747600031828)


# Partition tomtom by county for shst match

In [44]:
for i in range(14):
    boundary_gdf = gpd.read_file("../../data/external/county_boundaries/boundary_"+str(i+1)+".geojson")
    
    sub_tomtom_gdf = tomtom_raw_gdf[tomtom_raw_gdf.intersects(boundary_gdf.geometry.unary_union)].copy()
    
    sub_tomtom_gdf[["tomtom_link_id", "geometry"]].to_file("../../data/external/tomtom/tomtom"+str(i+1)+".in.geojson",
                                                driver = "GeoJSON")

In [None]:
tomtom_raw_gdf.head(3)

In [None]:
tomtom_raw_gdf[~(tomtom_raw_gdf.geom_type == 'MultiLineString')]

In [66]:
tomtom_raw_gdf.crs

{'init': 'epsg:4326'}

# Prepare TM2 for conflation

In [71]:
# loading TM2 non Marin version data source
# http://bayareametro.github.io/travel-model-two/input/#roadway-network

tm2_link_file = "../../data/external/TM2_nonMarin/mtc_final_network_base.shp"

# street link layer

tm2_link_gdf = gpd.read_file(tm2_link_file)


In [73]:
tm2_link_gdf.crs = {"init" : "esri:102646"}

# convert to ESPG lat-lon

tm2_link_gdf = tm2_link_gdf.to_crs({'init': 'epsg:4326'})
tm2_link_gdf.crs

{'init': 'epsg:4326'}

In [530]:
tm2_link_gdf.shape

(1405145, 41)

In [109]:
tm2_link_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1405145 entries, 0 to 1405144
Data columns (total 41 columns):
A              1405145 non-null int64
B              1405145 non-null int64
NUMLANES       1405145 non-null int64
F_JNCTID       1405145 non-null int64
T_JNCTID       1405145 non-null int64
FRC            1405145 non-null int64
NAME           741995 non-null object
FREEWAY        1405145 non-null int64
TOLLRD         291 non-null object
ONEWAY         129292 non-null object
KPH            1405145 non-null int64
MINUTES        1405145 non-null float64
CARRIAGE       100 non-null object
LANES          1405145 non-null int64
RAMP           1405145 non-null int64
SPEEDCAT       1405145 non-null int64
FEET           1405145 non-null float64
ASSIGNABLE     1405145 non-null int64
CNTYPE         1405113 non-null object
TRANSIT        1405145 non-null int64
HIGHWAYT       223551 non-null object
B_CLASS        1405145 non-null int64
GRADE_CAT      1405145 non-null int64
PED_FL

In [75]:
tm2_link_gdf.CNTYPE.value_counts()

TANA     807224
MAZ      252437
PED      221905
TAZ       63182
TAP       43512
USE        8981
BIKE       6678
LRAIL       864
CRAIL       144
HRAIL        90
FERRY        52
EXT          44
Name: CNTYPE, dtype: int64

In [77]:
tm2_link_roadway_gdf = tm2_link_gdf[tm2_link_gdf.CNTYPE.isin(["BIKE", "PED", "TANA"])].copy()

In [78]:
tm2_link_roadway_gdf.shape

(1035807, 41)

In [160]:
tm2_link_roadway_gdf.groupby(["A", "B"]).count().shape

(1035807, 39)

In [80]:
tm2_link_roadway_gdf.head(3)

Unnamed: 0,A,B,NUMLANES,F_JNCTID,T_JNCTID,FRC,NAME,FREEWAY,TOLLRD,ONEWAY,...,TAP_DRIVE,FT,FFS,USECLASS,TOLLBOOTH,DANGLING,HASTRANSIT,DELETE,TOLLSEG,geometry
178886,1000000,1001408,0,0,0,0,,0,,,...,0,0,0.0,0,0,0,0,1,0,LINESTRING (-122.4631000108774 37.772600013153...
178887,1000000,1032451,0,0,0,0,,0,,,...,0,0,0.0,0,0,0,0,1,0,LINESTRING (-122.4631000108774 37.772600013153...
178888,1000001,1023686,0,0,0,0,,0,,,...,0,0,0.0,0,0,0,0,1,0,LINESTRING (-122.3923000045329 37.775399991779...


# Partition TM2 Non Marin for shst Match

In [82]:
for i in range(14):
    boundary_gdf = gpd.read_file("../../data/external/county_boundaries/boundary_"+str(i+1)+".geojson")
    
    sub_gdf = tm2_link_roadway_gdf[tm2_link_roadway_gdf.intersects(boundary_gdf.geometry.unary_union)].copy()
    
    sub_gdf[["A", "B", "geometry"]].to_file("../../data/external/TM2_nonMarin/tm2nonMarin_"+str(i+1)+".in.geojson",
                                                driver = "GeoJSON")

# Prepare TM2 Marin for conflation

In [84]:
# loading TM2 Marin version data source
# http://bayareametro.github.io/travel-model-two/input/#roadway-network

tm2_marin_link_file = "../../data/external/TM2_Marin/mtc_final_network_base.shp"

# street link layer

tm2_marin_link_gdf = gpd.read_file(tm2_marin_link_file)

In [87]:
tm2_marin_link_gdf.CNTYPE.value_counts()

TANA     807115
MAZ      252435
PED      221905
TAZ       63182
TAP       43894
USE        8797
BIKE       6678
LRAIL       864
CRAIL       144
HRAIL        90
FERRY        52
EXT          44
Name: CNTYPE, dtype: int64

In [88]:
tm2_marin_link_gdf.crs = {"init" : "esri:102646"}

# convert to ESPG lat-lon

tm2_marin_link_gdf = tm2_marin_link_gdf.to_crs({'init': 'epsg:4326'})
tm2_marin_link_gdf.crs

{'init': 'epsg:4326'}

In [89]:
tm2_marin_link_roadway_gdf = tm2_marin_link_gdf[tm2_marin_link_gdf.CNTYPE.isin(["BIKE", "PED", "TANA"])].copy()

In [91]:
tm2_marin_link_roadway_gdf.shape

(1035698, 47)

In [93]:
tm2_marin_link_roadway_gdf.groupby(["A", "B"]).count().shape

(1035698, 45)

In [99]:
tm2_link_roadway_gdf.FRC.value_counts()

6    326320
7    270378
0    235558
4     85342
5     81408
3     22340
1      8590
8      4621
2      1250
Name: FRC, dtype: int64

In [100]:
tm2_marin_link_roadway_gdf.FRC.value_counts()

6    326322
7    270378
0    235461
4     85342
5     81408
3     22341
1      8575
8      4621
2      1250
Name: FRC, dtype: int64

# Partition TM2 Marin for shst Match 

In [92]:
for i in range(14):
    boundary_gdf = gpd.read_file("../../data/external/county_boundaries/boundary_"+str(i+1)+".geojson")
    
    sub_gdf = tm2_marin_link_roadway_gdf[tm2_marin_link_roadway_gdf.intersects(boundary_gdf.geometry.unary_union)].copy()
    
    sub_gdf[["A", "B", "geometry"]].to_file("../../data/external/TM2_Marin/tm2Marin_"+str(i+1)+".in.geojson",
                                                driver = "GeoJSON")

# Prepared SFCTA true shape file for conflation

In [151]:
sfcta_file = "../../data/external/stclines/stclines.shp"

sfcta_gdf = gpd.read_file(sfcta_file)

In [153]:
sfcta_gdf.crs = {"init" : "epsg:2227"}

In [154]:
# convert to ESPG lat-lon

sfcta_gdf = sfcta_gdf.to_crs({'init': 'epsg:4326'})
sfcta_gdf.crs

{'init': 'epsg:4326'}

In [165]:
# there's no unique id for sfcta true shape file, not even combinations of A/B

sfcta_gdf["sfcta_trueshape_id"] = range(1, len(sfcta_gdf) + 1)

In [164]:
sfcta_gdf.drop_duplicates(subset = sfcta_gdf.drop(["geometry"], axis=1).columns)

Unnamed: 0,OBJECTID,CNN,STREET,ST_TYPE,LF_FADD,LF_TOADD,RT_FADD,RT_TOADD,F_NODE_CNN,T_NODE_CNN,...,JURISDICTI,NHOOD,LAYER,CNNTEXT,STREETNAME,A,B,AB,MEDIANDIV,geometry
0,1,7520002.0,JESSIE,ST,351.0,399.0,350.0,398.0,54081000,24884000,...,DPW,Financial District South,STREETS,7520002,JESSIE ST,54081,24884,54081 24884,0,LINESTRING (-122.4065825247182 37.783499222590...
1,2,7520001.0,JESSIE,ST,301.0,349.0,300.0,348.0,24635000,54080000,...,DPW,Financial District South,STREETS,7520001,JESSIE ST,24635,54080,24635 54080,0,LINESTRING (-122.4047950624642 37.784908299223...
2,3,3186101.0,ROBERT C LEVY,TUNL,0.0,0.0,0.0,0.0,25111000,25308000,...,DPW,Nob Hill,STREETS,3186101,ROBERT C LEVY TUNL,25111,25308,25111 25308,0,LINESTRING (-122.4102635257008 37.797420184978...
3,4,3186201.0,ROBERT C LEVY,TUNL,0.0,0.0,0.0,0.0,25111000,25308000,...,DPW,Nob Hill,STREETS,3186201,ROBERT C LEVY TUNL,25111,25308,25111 25308,0,LINESTRING (-122.4102635257008 37.797420184978...
4,5,5754001.0,FRANCISCO,ST,101.0,175.0,100.0,174.0,25385000,50352000,...,,Telegraph Hill,UPROW,5754001,FRANCISCO ST,25385,50352,25385 50352,0,LINESTRING (-122.4069566340834 37.805494468097...
5,6,5491001.0,FILBERT,ST,211.0,219.0,216.0,222.0,50354000,24865000,...,,Telegraph Hill,UPROW,5491001,FILBERT ST,50354,24865,50354 24865,0,LINESTRING (-122.4033973604645 37.802113386380...
6,7,13087001.0,VALLEJO,ST,401.0,433.0,400.0,432.0,24841000,50210000,...,DPW,Telegraph Hill,PAPER,13087001,VALLEJO ST,24841,50210,24841 50210,0,LINESTRING (-122.4040531316579 37.799172217316...
7,8,3126001.0,BRIGHT,ST,51.0,99.0,50.0,98.0,54078000,54079000,...,DPW,Ingleside Heights,Paper_fwys,3126001,BRIGHT ST,54078,54079,54078 54079,0,LINESTRING (-122.4634754559928 37.711861596185...
8,9,5769000.0,FRANCISCO,ST,1001.0,1099.0,1000.0,1098.0,25581000,26702000,...,DPW,Russian Hill,STREETS,5769000,FRANCISCO ST,25581,26702,25581 26702,0,LINESTRING (-122.4216263341576 37.803502957971...
9,10,8155001.0,LARKIN,ST,2801.0,2849.0,2800.0,2848.0,25581000,50368000,...,DPW,Russian Hill,PAPER,8155001,LARKIN ST,25581,50368,25581 50368,0,LINESTRING (-122.4216263341576 37.803502957971...


In [156]:
sfcta_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 16165 entries, 0 to 16164
Data columns (total 23 columns):
OBJECTID      16165 non-null int64
CNN           16165 non-null float64
STREET        16145 non-null object
ST_TYPE       15713 non-null object
LF_FADD       16165 non-null float64
LF_TOADD      16165 non-null float64
RT_FADD       16165 non-null float64
RT_TOADD      16165 non-null float64
F_NODE_CNN    16165 non-null int64
T_NODE_CNN    16165 non-null int64
ZIP_CODE      16071 non-null object
DISTRICT      16159 non-null object
ACCEPTED      16159 non-null object
JURISDICTI    12980 non-null object
NHOOD         16159 non-null object
LAYER         16159 non-null object
CNNTEXT       16148 non-null object
STREETNAME    16161 non-null object
A             16165 non-null int64
B             16165 non-null int64
AB            16164 non-null object
MEDIANDIV     16165 non-null int64
geometry      16165 non-null object
dtypes: float64(5), int64(6), object(12)
memory usage: 2

In [159]:
sfcta_gdf.OBJECTID.nunique()

16091

In [161]:
sfcta_gdf.groupby(["OBJECTID", "AB"]).count().shape

(16117, 21)

# Partition SFCTA for conflation

In [167]:
# only conflatio SF county from SFCTA

sfcta_gdf[["sfcta_trueshape_id", "geometry"]].to_file("../../data/external/sfclines/sfcta.in.geojson",
                                                driver = "GeoJSON")

# Prepare SFCTA Stick Network for conflation

In [376]:
sfcta_stick_file = "../../data/external/sfcta/SanFrancisco_links.shp"

sfcta_stick_gdf = gpd.read_file(sfcta_stick_file)

In [377]:
sfcta_stick_gdf.crs = {"init" : "epsg:2227"}

In [378]:
# convert to ESPG lat-lon

sfcta_stick_gdf = sfcta_stick_gdf.to_crs({'init': 'epsg:4326'})
sfcta_stick_gdf.crs

{'init': 'epsg:4326'}

In [379]:
sfcta_stick_gdf

Unnamed: 0,A,B,TOLL,USE,CAP,AT,FT,STREETNAME,TYPE,MTYPE,...,TSIN,BIKE_CLASS,PER_RISE,ONEWAY,PROJ,DTA_EDIT_FL,TOLLTIME,PHASE,ACTION,geometry
0,1,20368,0,1,2000,3,6,,,SF,...,T,0,0.0,0,,0,0.0,0,,LINESTRING (-122.413689287528 37.7094935033756...
1,1,20373,0,1,2000,3,6,,,SF,...,T,0,0.0,0,,0,0.0,0,,LINESTRING (-122.413689287528 37.7094935033756...
2,1,20375,0,1,2000,3,6,,,SF,...,T,0,0.0,0,,0,0.0,0,,LINESTRING (-122.413689287528 37.7094935033756...
3,1,20408,0,1,2000,3,6,,,SF,...,T,0,0.0,0,,0,0.0,0,,LINESTRING (-122.413689287528 37.7094935033756...
4,1,33349,0,1,2000,3,6,,,SF,...,T,0,0.0,0,,0,0.0,0,,LINESTRING (-122.413689287528 37.7094935033756...
5,2,22479,0,1,2000,2,6,,,SF,...,T,0,0.0,0,,0,0.0,0,,LINESTRING (-122.4636867689285 37.709210357198...
6,2,22482,0,1,2000,2,6,,,SF,...,T,0,0.0,0,,0,0.0,0,,LINESTRING (-122.4636867689285 37.709210357198...
7,2,22483,0,1,2000,2,6,,,SF,...,T,0,0.0,0,,0,0.0,0,,LINESTRING (-122.4636867689285 37.709210357198...
8,2,22513,0,1,2000,2,6,,,SF,...,T,0,0.0,0,,0,0.0,0,,LINESTRING (-122.4636867689285 37.709210357198...
9,2,33454,0,1,2000,2,6,,,SF,...,T,0,0.0,0,,0,0.0,0,,LINESTRING (-122.4636867689285 37.709210357198...


In [380]:
sfcta_stick_gdf.columns

Index(['A', 'B', 'TOLL', 'USE', 'CAP', 'AT', 'FT', 'STREETNAME', 'TYPE',
       'MTYPE', 'SPEED', 'DISTANCE', 'TIME', 'LANE_AM', 'LANE_OP', 'LANE_PM',
       'BUSLANE_AM', 'BUSLANE_OP', 'BUSLANE_PM', 'TOLLAM_DA', 'TOLLAM_SR2',
       'TOLLAM_SR3', 'TOLLPM_DA', 'TOLLPM_SR2', 'TOLLPM_SR3', 'TOLLEA_DA',
       'TOLLEA_SR2', 'TOLLEA_SR3', 'TOLLMD_DA', 'TOLLMD_SR2', 'TOLLMD_SR3',
       'TOLLEV_DA', 'TOLLEV_SR2', 'TOLLEV_SR3', 'VALUETOLL_F', 'PASSTHRU',
       'BUSTPS_AM', 'BUSTPS_OP', 'BUSTPS_PM', 'TSVA', 'TSIN', 'BIKE_CLASS',
       'PER_RISE', 'ONEWAY', 'PROJ', 'DTA_EDIT_FL', 'TOLLTIME', 'PHASE',
       'ACTION', 'geometry'],
      dtype='object')

# Write out SFCTA stick network for conflation

In [385]:
boundary_4_gdf = gpd.read_file("../../data/external/county_boundaries/boundary_4.geojson")

sfcta_stick_roadway_gdf = sfcta_stick_gdf[~(sfcta_stick_gdf.FT == 6)].copy()

sfcta_stick_roadway_gdf[sfcta_stick_roadway_gdf.intersects(boundary_4_gdf.geometry.unary_union)][['A', 'B', "geometry"]].to_file(
    "../../data/external/sfcta/sfcta_in.geojson",
                                            driver="GeoJSON")

# Prepare PEMS for conflation

In [132]:
pems_file = "../../data/external/mtc/pems_period.csv"

pems_df = pd.read_csv(pems_file)

In [198]:
pems_df.columns

Index(['station', 'district', 'route', 'direction', 'type', 'time_period',
       'lanes', 'median_flow', 'avg_flow', 'sd_flow', 'median_speed',
       'avg_speed', 'sd_speed', 'median_occup', 'avg_occup', 'sd_occupancy',
       'days_observed', 'state_pm', 'abs_pm', 'latitude', 'longitude', 'year',
       'geometry'],
      dtype='object')

In [279]:
pems_df.columns

Index(['station', 'district', 'route', 'direction', 'type', 'time_period',
       'lanes', 'median_flow', 'avg_flow', 'sd_flow', 'median_speed',
       'avg_speed', 'sd_speed', 'median_occup', 'avg_occup', 'sd_occupancy',
       'days_observed', 'state_pm', 'abs_pm', 'latitude', 'longitude', 'year',
       'geometry'],
      dtype='object')

In [282]:
pems_df[pems_df.station == 404912]

Unnamed: 0,station,district,route,direction,type,time_period,lanes,median_flow,avg_flow,sd_flow,...,median_occup,avg_occup,sd_occupancy,days_observed,state_pm,abs_pm,latitude,longitude,year,geometry
58462,404912,4,80,E,ML,AM,5,21710.5,21609.441176,585.715682,...,0.086007,0.08637,0.007972,34,39.0,5.87,37.819788,-122.339284,2015,POINT (-122.339284 37.819788)
58463,404912,4,80,E,ML,EA,5,3655.0,3640.545455,160.732909,...,0.023093,0.02653,0.011436,33,39.0,5.87,37.819788,-122.339284,2015,POINT (-122.339284 37.819788)
58464,404912,4,80,E,ML,EV,5,36117.5,36121.928571,2254.275492,...,0.125401,0.12035,0.025382,28,39.0,5.87,37.819788,-122.339284,2015,POINT (-122.339284 37.819788)
58465,404912,4,80,E,ML,MD,5,31697.0,31727.4,1833.019886,...,0.100678,0.104338,0.013787,30,39.0,5.87,37.819788,-122.339284,2015,POINT (-122.339284 37.819788)
58466,404912,4,80,E,ML,PM,5,32495.0,32299.516129,725.068037,...,0.136917,0.14092,0.016671,31,39.0,5.87,37.819788,-122.339284,2015,POINT (-122.339284 37.819788)
67803,404912,4,80,E,ML,AM,5,21893.5,21657.166667,1198.05241,...,0.076067,0.078784,0.00929,72,40.0,5.87,37.819788,-122.339284,2016,POINT (-122.339284 37.819788)
67804,404912,4,80,E,ML,EA,5,3734.0,3736.959459,167.699866,...,0.019176,0.01986,0.003401,74,40.0,5.87,37.819788,-122.339284,2016,POINT (-122.339284 37.819788)
67805,404912,4,80,E,ML,EV,5,38699.0,38742.183099,2579.255337,...,0.087891,0.09131,0.014602,71,40.0,5.87,37.819788,-122.339284,2016,POINT (-122.339284 37.819788)
67806,404912,4,80,E,ML,MD,5,32742.0,32568.920635,1425.203147,...,0.095564,0.096204,0.009059,63,40.0,5.87,37.819788,-122.339284,2016,POINT (-122.339284 37.819788)
67807,404912,4,80,E,ML,PM,5,32201.5,32064.352941,1156.708593,...,0.120003,0.12381,0.022957,68,40.0,5.87,37.819788,-122.339284,2016,POINT (-122.339284 37.819788)


In [173]:
pems_df["geometry"] = [Point(xy) for xy in zip(pems_df.longitude, pems_df.latitude)]

pems_gdf = gpd.GeoDataFrame(pems_df)

In [186]:
pems_gdf = pems_gdf[~((pems_gdf.longitude.isnull()) | (pems_gdf.latitude.isnull()))]

In [190]:
# keep unique

pems_gdf.drop_duplicates(subset = ["station", "longitude", "latitude"])[["station", "longitude", "latitude", "geometry"]].to_file("../../data/external/mtc/pems.in.geojson",
                                                driver = "GeoJSON")

# Read network

In [6]:
link_file = data_interim_dir + "step3_join_shst_extraction_with_osm/" + "link.json"
with open(link_file) as f:
    link_json = json.load(f)
link_df = pd.DataFrame(link_json)

shape_gdf = gpd.read_file(data_interim_folder + "step3_join_shst_extraction_with_osm/" 
                          + "shape.geojson")

link_gdf = pd.merge(link_df,
                    shape_gdf[["id", "geometry"]],
                   how = "left",
                   on = "id")

link_gdf = gpd.GeoDataFrame(link_gdf,
                            crs="EPSG:4326")

In [49]:
link_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1705772 entries, 0 to 1705771
Data columns (total 31 columns):
access                object
area                  object
bike_access           int64
bridge                object
drive_access          int64
est_width             object
fromIntersectionId    object
highway               object
id                    object
junction              object
key                   object
landuse               object
lanes                 object
link                  object
maxspeed              object
name                  object
oneWay                object
ref                   object
roadway               object
roundabout            object
service               object
shstGeometryId        object
shstReferenceId       object
toIntersectionId      object
tunnel                object
u                     int64
v                     int64
walk_access           int64
wayId                 object
width                 object
geometry      

# Read TomTom ShSt Match result

In [19]:
# read in tomtom conflation

tomtom_match_gdf = read_shst_extract(data_interim_dir + "tomtom/", "*_tomtom.out.matched.geojson")

tomtom_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                   "shstToIntersectionId" : "toIntersectionId"},
                       inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  D:/MTC/data/interim/tomtom\bike_rules\1_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\bike_rules\2_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\10_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\11_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\12_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\13_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\14_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\1_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\2_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/t

In [531]:
tomtom_match_gdf

Unnamed: 0,shstReferenceId,shstGeometryId,fromIntersectionId,toIntersectionId,gisReferenceId,gisGeometryId,gisTotalSegments,gisSegmentIndex,gisFromIntersectionId,gisToIntersectionId,startSideOfStreet,endSideOfStreet,sideOfStreet,score,matchType,pp_tomtom_link_id,geometry,source
0,340aca5eed3be40e615422d39544d223,52a5e0b4800e7283accda73c74108762,8711d153fefd546c2845baeb0f67bd68,ce8aa7e55b3c5f951154f22d1fb5efd0,35317b9b780d7f5843d00a207b13c092,d0b88c4ea04cca9bf9835f85df44ed32,2,1,91cf2a0dbffb2e7f14c03e72e704d3e5,a3ac6e509c62ac3d7796c8d8b75f72b2,right,left,unknown,4.68,hmm,337110,LINESTRING (-121.9121105815505 38.014193818053...,D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
1,4b1dd93afef058cf6b751dd55aa40031,2eba19c9a54083c15f8fd70e025559b4,ce8aa7e55b3c5f951154f22d1fb5efd0,594fb147712d1bf639de86532ff58434,35317b9b780d7f5843d00a207b13c092,d0b88c4ea04cca9bf9835f85df44ed32,2,2,91cf2a0dbffb2e7f14c03e72e704d3e5,a3ac6e509c62ac3d7796c8d8b75f72b2,right,left,unknown,4.68,hmm,337110,"LINESTRING (-121.9120067 38.0142604, -121.9118...",D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
2,9de152e522f71ed2e8fb4658213e2ad0,2eba19c9a54083c15f8fd70e025559b4,594fb147712d1bf639de86532ff58434,ce8aa7e55b3c5f951154f22d1fb5efd0,88acc59b5faa7dc7ab80892531d77978,d0b88c4ea04cca9bf9835f85df44ed32,2,1,a3ac6e509c62ac3d7796c8d8b75f72b2,91cf2a0dbffb2e7f14c03e72e704d3e5,right,left,unknown,4.68,hmm,337110,LINESTRING (-121.9118829091503 38.014152852369...,D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
3,ec573127f992ae2dd0fabaa6c6c70cbd,52a5e0b4800e7283accda73c74108762,ce8aa7e55b3c5f951154f22d1fb5efd0,8711d153fefd546c2845baeb0f67bd68,88acc59b5faa7dc7ab80892531d77978,d0b88c4ea04cca9bf9835f85df44ed32,2,2,a3ac6e509c62ac3d7796c8d8b75f72b2,91cf2a0dbffb2e7f14c03e72e704d3e5,right,left,unknown,4.68,hmm,337110,"LINESTRING (-121.9120067 38.0142604, -121.9121...",D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
4,b09838f5bba85e6f72a5d025c64f4c5f,cb0c7c67acc6c5ae4014a5e6a440e656,06b6e95d0de1a687c0aa3682a8c781b5,3c51d6bd57608d58ba09082b6b5ff904,dcaf838ca94ea539373fbe7d1a3d0e5d,2123da5e29f4836a23922e3725c206cd,2,1,fb2e2b5cf4f18ea282582ba908b35903,69df7691003f0dca1bf9acce7beb9030,left,right,unknown,7.87,hmm,489237,LINESTRING (-121.8990992349619 37.765437480739...,D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
5,79f5be81b9431c9b6ee5f3a861adf5b2,ffad91c6a5c5eac1803f14c6626da50d,3c51d6bd57608d58ba09082b6b5ff904,0eafc161d1bf63d09e8d17a11d46ee51,dcaf838ca94ea539373fbe7d1a3d0e5d,2123da5e29f4836a23922e3725c206cd,2,2,fb2e2b5cf4f18ea282582ba908b35903,69df7691003f0dca1bf9acce7beb9030,left,right,unknown,7.87,hmm,489237,"LINESTRING (-121.8991717 37.7655042, -121.8991...",D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
6,031396b53bd320bfa96fd88a8017985d,ffad91c6a5c5eac1803f14c6626da50d,0eafc161d1bf63d09e8d17a11d46ee51,3c51d6bd57608d58ba09082b6b5ff904,fa9f72d6a66c3a9aba647d8fe8c3b305,2123da5e29f4836a23922e3725c206cd,2,1,69df7691003f0dca1bf9acce7beb9030,fb2e2b5cf4f18ea282582ba908b35903,right,right,right,7.87,hmm,489237,"LINESTRING (-121.8992885 37.7656058, -121.8992...",D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
7,37f3e865c333e5a9a2390ee84d1eaaf7,cb0c7c67acc6c5ae4014a5e6a440e656,3c51d6bd57608d58ba09082b6b5ff904,06b6e95d0de1a687c0aa3682a8c781b5,fa9f72d6a66c3a9aba647d8fe8c3b305,2123da5e29f4836a23922e3725c206cd,2,2,69df7691003f0dca1bf9acce7beb9030,fb2e2b5cf4f18ea282582ba908b35903,right,right,right,7.87,hmm,489237,"LINESTRING (-121.8991717 37.7655042, -121.8991...",D:/MTC/data/interim/tomtom\bike_rules\1_tomtom...
8,456ba758c99bed5ceb9030e44ab36656,1c78571c094618c6be5512ab0ba28e69,c03538f1d44a953787e805d7153c538e,e8661658df8e8f57dfd4be76a73675e6,e9ec16de687222564740bb0f52369866,ecd5b61c731332f50dfbd3c8fe5446cd,1,1,1e21575f9699b6137e985d110b229e0b,bf07674675b8d0a0663694fde88e5e92,right,right,right,6.47,hmm,484729,LINESTRING (-122.4380561316604 38.187805173017...,D:/MTC/data/interim/tomtom\bike_rules\2_tomtom...
9,5541e1cbc5e7f96a016cc90d83d4bf7b,1c78571c094618c6be5512ab0ba28e69,e8661658df8e8f57dfd4be76a73675e6,c03538f1d44a953787e805d7153c538e,e5077e7b423d8cee96e9cebb98d8f367,ecd5b61c731332f50dfbd3c8fe5446cd,1,1,bf07674675b8d0a0663694fde88e5e92,1e21575f9699b6137e985d110b229e0b,left,left,left,6.47,hmm,484729,LINESTRING (-122.4380110968732 38.187749020001...,D:/MTC/data/interim/tomtom\bike_rules\2_tomtom...


In [20]:
tomtom_raw_gdf

Unnamed: 0,ID,FEATTYP,FT,F_JNCTID,F_JNCTTYP,T_JNCTID,T_JNCTTYP,PJ,METERS,FRC,...,ADA,TRANS,DYNSPEED,SPEEDCAT,NTHRUTRAF,ROUGHRD,PARTSTRUC,ORDER08,Shape_Length,geometry
0,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,1554.62,7,...,0,0,0,7,0,0,0,CA097,1989.817478,(LINESTRING (-122.8315739998535 38.62133700023...
1,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,551.09,7,...,0,0,0,6,0,0,0,CA097,704.628862,(LINESTRING (-122.83972099993 38.6229560004714...
2,6.840000e+13,4110,0,6.840001e+13,0,6.840002e+13,0,0,53.22,7,...,0,0,0,7,0,0,0,CA097,68.054041,(LINESTRING (-122.868480999434 38.626759999424...
3,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,45.20,7,...,0,0,0,7,0,0,0,CA097,58.007552,(LINESTRING (-122.8674820000745 38.63032300025...
4,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,30.24,7,...,0,0,0,7,0,0,0,CA097,38.659695,(LINESTRING (-122.8610680001998 38.64033899956...
5,6.840000e+13,4110,0,6.840002e+13,0,6.840001e+13,0,0,602.67,6,...,0,0,0,6,0,0,0,CA097,772.686674,(LINESTRING (-122.9378580002176 38.65743100040...
6,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,152.51,7,...,0,0,0,7,0,0,0,CA097,195.565558,(LINESTRING (-122.9092719998926 38.70904799989...
7,6.840000e+13,4110,0,6.840001e+13,0,6.840002e+13,0,0,466.16,7,...,0,0,0,6,0,0,0,CA097,598.825857,(LINESTRING (-122.9685940004959 38.77782800021...
8,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,80.25,6,...,0,0,0,7,0,0,0,CA097,102.906186,(LINESTRING (-123.0185530001317 38.78901899997...
9,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,127.11,7,...,0,0,0,7,0,0,0,CA097,162.910525,(LINESTRING (-123.0254980004591 38.79289899992...


In [528]:
tomtom_raw_gdf.TOLLRD.value_counts()

0     601466
11       480
21       114
Name: TOLLRD, dtype: int64

In [21]:
print(tomtom_raw_gdf.shape)
print(tomtom_match_gdf.shape)

print(tomtom_raw_gdf.columns)
print(tomtom_match_gdf.columns)

(602060, 53)
(1749759, 18)
Index(['ID', 'FEATTYP', 'FT', 'F_JNCTID', 'F_JNCTTYP', 'T_JNCTID', 'T_JNCTTYP',
       'PJ', 'METERS', 'FRC', 'NETCLASS', 'NETBCLASS', 'NET2CLASS', 'NAME',
       'NAMELC', 'SOL', 'NAMETYP', 'CHARGE', 'SHIELDNUM', 'RTETYP', 'RTEDIR',
       'RTEDIRVD', 'PROCSTAT', 'FOW', 'SLIPRD', 'FREEWAY', 'BACKRD', 'TOLLRD',
       'RDCOND', 'STUBBLE', 'PRIVATERD', 'CONSTATUS', 'ONEWAY', 'F_BP', 'T_BP',
       'F_ELEV', 'T_ELEV', 'KPH', 'MINUTES', 'POSACCUR', 'CARRIAGE', 'LANES',
       'RAMP', 'ADA', 'TRANS', 'DYNSPEED', 'SPEEDCAT', 'NTHRUTRAF', 'ROUGHRD',
       'PARTSTRUC', 'ORDER08', 'Shape_Length', 'geometry'],
      dtype='object')
Index(['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'gisReferenceId', 'gisGeometryId',
       'gisTotalSegments', 'gisSegmentIndex', 'gisFromIntersectionId',
       'gisToIntersectionId', 'startSideOfStreet', 'endSideOfStreet',
       'sideOfStreet', 'score', 'matchType', 'pp_tomtom_link_id', 'geome

In [329]:
# NAME, SHEILDNUM, RTEDIR
tomtom_raw_gdf.FREEWAY.value_counts()

0    586165
1     15895
Name: FREEWAY, dtype: int64

In [22]:
tomtom_match_gdf.pp_tomtom_link_id.max()
print("Sharedstreets matched "+ str(tomtom_match_gdf.pp_tomtom_link_id.nunique()) +" out of " + str(len(tomtom_raw_gdf)) 
      + " total tomtom links.")

Sharedstreets matched 550620 out of 602060 total tomtom links.


In [330]:
tomtom_raw_gdf["tomtom_link_id"] = range(1, len(tomtom_raw_gdf)+1)

tomtom_gdf = pd.merge(tomtom_match_gdf, 
                      tomtom_raw_gdf[['tomtom_link_id', 'ID', 'F_JNCTID', 'T_JNCTID', 'LANES', "FRC", "NAME", "SHIELDNUM",
                                      "RTEDIR"]],
                     how = "left",
                     left_on = ['pp_tomtom_link_id'],
                     right_on = ['tomtom_link_id']
                     )
                                 
print(tomtom_gdf.shape)

(1749759, 27)


# Join network with TomTom

In [331]:
# join tomtom with network

unique_tomtom_match_gdf = tomtom_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

unique_tomtom_match_gdf.rename(columns = {"ID" : "tomtom_ID", "LANES" : "tomtom_lanes", "FRC" : "tomtom_FRC",
                                          "NAME" : "tomtom_name", "SHIELDNUM" : "tomtom_shieldnum", 
                                          "RTEDIR" : "tomtom_rtedir"},
                              inplace = True)

link_with_tomtom_gdf = pd.merge(link_gdf,
                            unique_tomtom_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId','toIntersectionId',
                                                     'pp_tomtom_link_id', "tomtom_ID", 'F_JNCTID', 'T_JNCTID', 
                                                     "tomtom_lanes", "tomtom_FRC", "tomtom_name", "tomtom_shieldnum",
                                                     "tomtom_rtedir"]],
                                  how = "left",
                                  on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId'],
                                  )

In [332]:
link_with_tomtom_gdf[link_with_tomtom_gdf.shstGeometryId.isnull()]
link_with_tomtom_gdf[~(link_with_tomtom_gdf.shstGeometryId == link_with_tomtom_gdf.id)]
link_with_tomtom_gdf.lanes.value_counts()

nan                                  1359907
                                      134553
2                                      84235
[nan, nan]                             46733
3                                      13260
4                                      11417
[nan, nan, nan]                         9469
1                                       8812
5                                       4697
[, ]                                    4198
[2, 3]                                  2697
[2, 2]                                  1977
[nan, ]                                 1784
[, nan]                                 1177
[3, 3]                                  1043
[3, 4]                                   977
[, , ]                                   960
[nan, nan, nan, nan]                     960
[2, 2, 2]                                767
6                                        725
[2, nan]                                 723
[4, 4]                                   686
[3, 2]    

In [333]:
link_with_tomtom_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1705772 entries, 0 to 1705771
Data columns (total 40 columns):
access                object
area                  object
bike_access           int64
bridge                object
drive_access          int64
est_width             object
fromIntersectionId    object
highway               object
id                    object
junction              object
key                   object
landuse               object
lanes                 object
link                  object
maxspeed              object
name                  object
oneWay                object
ref                   object
roadway               object
roundabout            object
service               object
shstGeometryId        object
shstReferenceId       object
toIntersectionId      object
tunnel                object
u                     int64
v                     int64
walk_access           int64
wayId                 object
width                 object
geometry      

In [334]:
link_with_tomtom_gdf.u.nunique()

661023

In [335]:
link_with_tomtom_gdf.fromIntersectionId.nunique()

661023

In [336]:
len(set(link_with_tomtom_gdf.u.tolist() + link_with_tomtom_gdf.v.tolist()))

661159

# Read thrid party shst match results

In [386]:
# read TM2 non Marin conflation result

tm2nonMarin_match_gdf = read_shst_extract(data_interim_dir + "tm2_nonMarin/", "*tm2nonMarin.out.matched.geojson")

tm2nonMarin_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                   "shstToIntersectionId" : "toIntersectionId"},
                       inplace = True)


----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\10_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\11_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\12_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\13_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\14_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\1_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\2_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\3_tm2nonMarin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules

In [387]:
# read TM2 Marin conflation result

tm2marin_match_gdf = read_shst_extract(data_interim_dir + "tm2_Marin/", "*tm2Marin.out.matched.geojson")

tm2marin_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                   "shstToIntersectionId" : "toIntersectionId"},
                       inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\10_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\11_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\12_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\13_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\14_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\1_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\2_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\3_tm2Marin.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\4_tm2Marin.out.matched.geojson
reading shst extrac

In [393]:
# read sfcta true shape conflation result

sfcta_match_gdf = read_shst_extract(data_interim_dir + "stclines/", "*sfcta.out.matched.geojson")

sfcta_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                   "shstToIntersectionId" : "toIntersectionId"},
                       inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/stclines\car_rules\sfcta.out.matched.geojson
reading shst extraction data :  ../../data/interim/stclines\ped_rules\sfcta.out.matched.geojson
----------finished reading shst extraction data-------------


In [394]:
# read sfcta stick network conflation result

sfcta_stick_match_gdf = read_shst_extract(data_interim_dir + "sfcta/", "*sfcta.out.matched.geojson")

sfcta_stick_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                   "shstToIntersectionId" : "toIntersectionId"},
                       inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/sfcta\car_rules\sfcta.out.matched.geojson
reading shst extraction data :  ../../data/interim/sfcta\ped_rules\sfcta.out.matched.geojson
reading shst extraction data :  ../../data/interim/sfcta\reverse_dir\sfcta.out.matched.geojson
----------finished reading shst extraction data-------------


In [437]:
# read pems conflation result

pems_match_gdf = gpd.read_file(data_interim_dir + "mtc/pems_conflation_result.geojson")

In [204]:
tm2nonMarin_match_gdf.shstReferenceId.nunique()

759164

In [389]:
tm2nonMarin_match_gdf.shstReferenceId.nunique()

758537

In [206]:
tm2marin_match_gdf.shstReferenceId.nunique()

759156

In [390]:
tm2marin_match_gdf.shstReferenceId.nunique()

758534

In [395]:
sfcta_match_gdf.shstReferenceId.nunique()

37449

In [397]:
sfcta_stick_match_gdf.shstReferenceId.nunique()

34910

In [448]:
pems_match_gdf[pems_match_gdf.station == 401943]

Unnamed: 0,station,district,route,direction,type,time_period,lanes,median_flow,avg_flow,sd_flow,...,abs_pm,latitude,longitude,year,shstReferenceId,tomtom_shieldnum,tomtom_rtedir,snap_distance,source,geometry
33050,401943,4,280,S,ML,AM,4,15090.5,14497.0,1815.697442,...,2.85,37.322361,-121.8978,2014,005586c0c986e59d4dcd979513f7e8e6,280,S,4.06128,nearest,POINT (-2086117.780704529 4546249.146790181)
33051,401943,4,280,S,ML,EA,4,1129.0,1130.241379,59.19318,...,2.85,37.322361,-121.8978,2014,005586c0c986e59d4dcd979513f7e8e6,280,S,4.06128,nearest,POINT (-2086117.780704529 4546249.146790181)
33052,401943,4,280,S,ML,EV,4,17841.5,17727.236842,1127.814879,...,2.85,37.322361,-121.8978,2014,005586c0c986e59d4dcd979513f7e8e6,280,S,4.06128,nearest,POINT (-2086117.780704529 4546249.146790181)
33053,401943,4,280,S,ML,MD,4,21750.5,21328.181818,3004.852264,...,2.85,37.322361,-121.8978,2014,005586c0c986e59d4dcd979513f7e8e6,280,S,4.06128,nearest,POINT (-2086117.780704529 4546249.146790181)
33054,401943,4,280,S,ML,PM,4,25372.0,24350.5,3805.566827,...,2.85,37.322361,-121.8978,2014,005586c0c986e59d4dcd979513f7e8e6,280,S,4.06128,nearest,POINT (-2086117.780704529 4546249.146790181)
40307,401943,4,280,S,ML,AM,4,14900.5,14798.814815,839.78288,...,2.85,37.322361,-121.8978,2015,005586c0c986e59d4dcd979513f7e8e6,280,S,4.06128,nearest,POINT (-2086117.780704529 4546249.146790181)
40308,401943,4,280,S,ML,EA,4,1141.0,1146.9375,49.355164,...,2.85,37.322361,-121.8978,2015,005586c0c986e59d4dcd979513f7e8e6,280,S,4.06128,nearest,POINT (-2086117.780704529 4546249.146790181)
40309,401943,4,280,S,ML,EV,4,17769.5,17732.175,716.354764,...,2.85,37.322361,-121.8978,2015,005586c0c986e59d4dcd979513f7e8e6,280,S,4.06128,nearest,POINT (-2086117.780704529 4546249.146790181)
40310,401943,4,280,S,ML,MD,4,21372.0,21209.777778,976.87763,...,2.85,37.322361,-121.8978,2015,005586c0c986e59d4dcd979513f7e8e6,280,S,4.06128,nearest,POINT (-2086117.780704529 4546249.146790181)
40311,401943,4,280,S,ML,PM,4,23156.0,22938.693878,886.817089,...,2.85,37.322361,-121.8978,2015,005586c0c986e59d4dcd979513f7e8e6,280,S,4.06128,nearest,POINT (-2086117.780704529 4546249.146790181)


In [449]:
pems_match_gdf.columns

Index(['station', 'district', 'route', 'direction', 'type', 'time_period',
       'lanes', 'median_flow', 'avg_flow', 'sd_flow', 'median_speed',
       'avg_speed', 'sd_speed', 'median_occup', 'avg_occup', 'sd_occupancy',
       'days_observed', 'state_pm', 'abs_pm', 'latitude', 'longitude', 'year',
       'shstReferenceId', 'tomtom_shieldnum', 'tomtom_rtedir', 'snap_distance',
       'source', 'geometry'],
      dtype='object')

In [455]:
pems_match_gdf['type'].value_counts()

ML    60319
OR     9309
FR     5733
FF      666
HV       30
Name: type, dtype: int64

In [483]:
# link can have multiple pems station on it, so trying to get the mode of #lanes by station type

pems_lanes_df = pems_match_gdf[(pems_match_gdf.year.isin([2014,2015,2016]))].groupby(
    ["shstReferenceId", "type", "lanes"]
)["station"].count().sort_values(ascending = False).reset_index().drop_duplicates(subset = ['shstReferenceId', 'type'])

pems_lanes_df = pems_lanes_df.pivot_table(index = ["shstReferenceId"], values = "lanes", columns = "type").fillna(0).reset_index()

In [488]:
# join match result with source data

# TM2 non Marin
unique_tm2nonMarin_match_gdf = tm2nonMarin_match_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

unique_tm2nonMarin_match_gdf = pd.merge(unique_tm2nonMarin_match_gdf, 
                      tm2_link_roadway_gdf[['A', 'B', "LANES", "FT", "ASSIGNABLE"]],
                     how = "left",
                     left_on = ['pp_a', "pp_b"],
                     right_on = ['A', "B"]
                     )

unique_tm2nonMarin_match_gdf.rename(columns = {"A" : "TM2_A", "B" : "TM2_B", "LANES" : "TM2_LANES", 
                                               "FT" : "TM2_FT", "ASSIGNABLE" : "TM2_ASSIGNABLE"},
                                   inplace = True)

# TM2 Marin

unique_tm2marin_match_gdf = tm2marin_match_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

unique_tm2marin_match_gdf = pd.merge(unique_tm2marin_match_gdf, 
                      tm2_marin_link_roadway_gdf[['A', 'B', "LANES", "FT", "ASSIGNABLE"]],
                     how = "left",
                     left_on = ['pp_a', "pp_b"],
                     right_on = ['A', "B"]
                     )

unique_tm2marin_match_gdf.rename(columns = {"A" : "TM2Marin_A", "B" : "TM2Marin_B", "LANES" : "TM2Marin_LANES", 
                                               "FT" : "TM2Marin_FT", "ASSIGNABLE" : "TM2Marin_ASSIGNABLE"},
                                   inplace = True)

# sfcta stick and true shape
unique_sfcta_match_gdf = sfcta_stick_match_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

"""
# sfcta true shape
unique_sfcta_match_gdf = pd.merge(unique_sfcta_match_gdf, 
                      sfcta_gdf[['sfcta_trueshape_id', "A", "B", "AB"]],
                     how = "left",
                     left_on = ['pp_sfcta_trueshape_id'],
                     right_on = ['sfcta_trueshape_id']
                     )

sfcta_stick_gdf = gpd.read_file("../../data/external/sfcta/SanFrancisco_links.shp")
"""
unique_sfcta_match_gdf = pd.merge(unique_sfcta_match_gdf,
                                  sfcta_stick_gdf[["A", "B", "FT", "STREETNAME", "LANE_AM", "LANE_OP", "LANE_PM"]],
                                  how = "left",
                                  left_on = ["pp_a", "pp_b"],
                                  right_on = ["A", "B"])

unique_sfcta_match_gdf.rename(columns = {"A" : "sfcta_A", "B" : "sfcta_B", "FT" : "sfcta_FT", "STREETNAME" : "sfcta_STREETNAME",
                                         "LANE_AM" : "sfcta_LANE_AM", "LANE_OP" : "sfcta_LANE_OP", "LANE_PM" : "sfcta_LANE_PM"},
                             inplace = True)

# pems
pems_lanes_df.rename(columns = {"FF" : "pems_lanes_FF", "FR" : "pems_lanes_FR", "HV" : "pems_lanes_HV", 
                                "ML" : "pems_lanes_ML", "OR" : "pems_lanes_OR"},
                     inplace = True)

In [489]:
pems_lanes_df.columns

Index(['shstReferenceId', 'pems_lanes_FF', 'pems_lanes_FR', 'pems_lanes_HV',
       'pems_lanes_ML', 'pems_lanes_OR'],
      dtype='object', name='type')

In [507]:
unique_sfcta_match_gdf.shstReferenceId.shape

(34910,)

In [508]:
link_all_conflated_gdf.sfcta_LANE_AM.notnull().sum()

34906

# Join network with third party sources

In [509]:
link_all_conflated_gdf = pd.merge(
    link_with_tomtom_gdf,
    unique_tm2nonMarin_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'TM2_A', 'TM2_B', 'TM2_LANES', 'TM2_FT', 'TM2_ASSIGNABLE']],
    how = "left",
    on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', "toIntersectionId"]
)

In [510]:
link_all_conflated_gdf = pd.merge(
    link_all_conflated_gdf,
    unique_tm2marin_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'TM2Marin_A', 'TM2Marin_B', 'TM2Marin_LANES', 'TM2Marin_FT',
       'TM2Marin_ASSIGNABLE']],
    how = "left",
    on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', "toIntersectionId"]
)

In [511]:
link_all_conflated_gdf = pd.merge(
    link_all_conflated_gdf,
    unique_sfcta_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'sfcta_A', 'sfcta_B', 'sfcta_FT', 'sfcta_STREETNAME', 'sfcta_LANE_AM', 'sfcta_LANE_OP',
       'sfcta_LANE_PM']],
    how = "left",
    on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', "toIntersectionId"]
)

In [512]:
link_all_conflated_gdf = pd.merge(
    link_all_conflated_gdf,
    pems_lanes_df,
    how = "left",
    on = "shstReferenceId"
)

In [513]:
link_all_conflated_gdf.shape

(1705772, 62)

In [514]:
link_all_conflated_gdf.columns

Index(['access', 'area', 'bike_access', 'bridge', 'drive_access', 'est_width',
       'fromIntersectionId', 'highway', 'id', 'junction', 'key', 'landuse',
       'lanes', 'link', 'maxspeed', 'name', 'oneWay', 'ref', 'roadway',
       'roundabout', 'service', 'shstGeometryId', 'shstReferenceId',
       'toIntersectionId', 'tunnel', 'u', 'v', 'walk_access', 'wayId', 'width',
       'geometry', 'pp_tomtom_link_id', 'tomtom_ID', 'F_JNCTID', 'T_JNCTID',
       'tomtom_lanes', 'tomtom_FRC', 'tomtom_name', 'tomtom_shieldnum',
       'tomtom_rtedir', 'TM2_A', 'TM2_B', 'TM2_LANES', 'TM2_FT',
       'TM2_ASSIGNABLE', 'TM2Marin_A', 'TM2Marin_B', 'TM2Marin_LANES',
       'TM2Marin_FT', 'TM2Marin_ASSIGNABLE', 'sfcta_A', 'sfcta_B', 'sfcta_FT',
       'sfcta_STREETNAME', 'sfcta_LANE_AM', 'sfcta_LANE_OP', 'sfcta_LANE_PM',
       'pems_lanes_FF', 'pems_lanes_FR', 'pems_lanes_HV', 'pems_lanes_ML',
       'pems_lanes_OR'],
      dtype='object')

In [515]:
link_all_conflated_gdf.head(30).lanes.iloc[19][1]

'2'

# Write out standard link json and link feather

In [516]:
link_json_columns = link_df.columns.tolist()

In [526]:
link_json_columns

['access',
 'area',
 'bike_access',
 'bridge',
 'drive_access',
 'est_width',
 'fromIntersectionId',
 'highway',
 'id',
 'junction',
 'key',
 'landuse',
 'lanes',
 'link',
 'maxspeed',
 'name',
 'oneWay',
 'ref',
 'roadway',
 'roundabout',
 'service',
 'shstGeometryId',
 'shstReferenceId',
 'toIntersectionId',
 'tunnel',
 'u',
 'v',
 'walk_access',
 'wayId',
 'width']

In [517]:
link_all_conflated_gdf.columns

Index(['access', 'area', 'bike_access', 'bridge', 'drive_access', 'est_width',
       'fromIntersectionId', 'highway', 'id', 'junction', 'key', 'landuse',
       'lanes', 'link', 'maxspeed', 'name', 'oneWay', 'ref', 'roadway',
       'roundabout', 'service', 'shstGeometryId', 'shstReferenceId',
       'toIntersectionId', 'tunnel', 'u', 'v', 'walk_access', 'wayId', 'width',
       'geometry', 'pp_tomtom_link_id', 'tomtom_ID', 'F_JNCTID', 'T_JNCTID',
       'tomtom_lanes', 'tomtom_FRC', 'tomtom_name', 'tomtom_shieldnum',
       'tomtom_rtedir', 'TM2_A', 'TM2_B', 'TM2_LANES', 'TM2_FT',
       'TM2_ASSIGNABLE', 'TM2Marin_A', 'TM2Marin_B', 'TM2Marin_LANES',
       'TM2Marin_FT', 'TM2Marin_ASSIGNABLE', 'sfcta_A', 'sfcta_B', 'sfcta_FT',
       'sfcta_STREETNAME', 'sfcta_LANE_AM', 'sfcta_LANE_OP', 'sfcta_LANE_PM',
       'pems_lanes_FF', 'pems_lanes_FR', 'pems_lanes_HV', 'pems_lanes_ML',
       'pems_lanes_OR'],
      dtype='object')

In [518]:
object_col = list(link_all_conflated_gdf.select_dtypes(['object']).columns)

In [519]:
print("-------write out link json---------")

#link_prop = link_all_conflated_gdf.drop("geometry", axis = 1).columns.tolist()
link_prop = link_json_columns

out = link_all_conflated_gdf[link_prop].to_json(orient = "records")

with open(data_interim_dir + 'step4_conflate_with_tomtom/link.json', 'w') as f:
    f.write(out)

-------write out link json---------


In [520]:
print("-------write out link feather---------")

link_feather = link_all_conflated_gdf[link_json_columns].copy()

object_col = list(link_feather.select_dtypes(['object']).columns)

for c in object_col:
    link_feather[c] = link_feather[c].astype(str)

link_feather.to_feather(data_interim_dir + 'step4_conflate_with_tomtom/link.feather')

-------write out link feather---------


In [316]:
link_all_conflated_gdf_prop = link_all_conflated_gdf.drop("geometry", axis = 1).columns
link_all_conflated_geojson = link_df_to_geojson(link_all_conflated_gdf, link_all_conflated_gdf_prop)

with open(data_interim_dir + 'step4_conflate_with_tomtom/link.geojson', "w") as f:
    json.dump(link_all_conflated_geojson, f)

# Write out conflation result data base

In [521]:
# tomtom FRC dictionary

tomtom_FRC_dict = {
    0: "0-Motorway, Freeway, or Other Major Road",
    1: "1-Major Road Less Important than a Motorway",
    2: "2-Other Major Road",
    3: "3-Secondary Road",
    4: "4-Local Connecting Road",
    5: "5-Local Road of High Importance",
    6: "6-Local Road",
    7: "7-Local Road of Minor Importance",
    8: "8-Other Road"
}

link_all_conflated_gdf["tomtom_FRC_def"] = link_all_conflated_gdf["tomtom_FRC"].map(tomtom_FRC_dict)

In [525]:
link_all_conflated_gdf.tomtom_FRC_def.value_counts()

6-Local Road                                   345262
7-Local Road of Minor Importance               325150
5-Local Road of High Importance                 83747
4-Local Connecting Road                         71881
8-Other Road                                    35400
3-Secondary Road                                14483
1-Major Road Less Important than a Motorway      1851
0-Motorway, Freeway, or Other Major Road         1217
2-Other Major Road                                408
Name: tomtom_FRC_def, dtype: int64

In [522]:
TM2_FT_dict = {
    0: "0-Connector",
    1: "1-Freeway to Freeway",
    2: "2-Freeway",
    3: "3-Expressway",
    4: "4-Collector",
    5: "5-Ramp",
    6: "6-Special Facility",
    7: "7-Major Arterial",
}

link_all_conflated_gdf["TM2_FT_def"] = link_all_conflated_gdf["TM2_FT"].map(TM2_FT_dict)

link_all_conflated_gdf.TM2_FT_def.value_counts()

4-Collector             540491
7-Major Arterial        141340
0-Connector              56409
3-Expressway             13888
2-Freeway                 3271
5-Ramp                    2168
1-Freeway to Freeway       156
Name: TM2_FT_def, dtype: int64

In [527]:
# write conflation data base

tableau_fields = ["shstReferenceId", "roadway", "lanes", "drive_access", "bike_access", "walk_access",
                 "tomtom_FRC", "tomtom_FRC_def", "tomtom_lanes", "pp_tomtom_link_id", 
                  'tomtom_name', 'tomtom_shieldnum', 'tomtom_rtedir', 'TM2Marin_A',
                   'TM2Marin_B', 'TM2Marin_FT', 'TM2Marin_LANES', 'TM2Marin_ASSIGNABLE', 
                  'TM2_A', 'TM2_B', 'TM2_FT', "TM2_FT_def", 'TM2_LANES', 'TM2_ASSIGNABLE', 
                  "sfcta_A", 'sfcta_B', "sfcta_STREETNAME", 'sfcta_FT', 'sfcta_LANE_AM', 'sfcta_LANE_OP',
                   'sfcta_LANE_PM', 'pems_lanes_FF', 'pems_lanes_FR',
       'pems_lanes_HV', 'pems_lanes_ML', 'pems_lanes_OR']

link_all_conflated_gdf[tableau_fields].rename(columns = {"lanes" : "lanes_osm",
                                                         "pp_tomtom_link_id" : "tomtom_unique_id"}
                              ).to_csv(data_interim_dir + "conflation_result.csv",
                             index = False)