In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import json
import requests
from urllib.request import urlopen
from zipfile import ZipFile
from io import BytesIO
import fiona
from shapely.geometry import Point

In [2]:
from methods import read_shst_extract
from methods import link_df_to_geojson
from methods import point_df_to_geojson

In [3]:
data_external_dir = "../../data/external/"
data_interim_dir = "../../data/interim/"

# Read base network from step 3

In [4]:
%%time
link_file = data_interim_dir + "step3_join_shst_extraction_with_osm/" + "link.json"
with open(link_file) as f:
    link_json = json.load(f)
link_df = pd.DataFrame(link_json)

shape_gdf = gpd.read_file(data_interim_dir + "step3_join_shst_extraction_with_osm/" 
                          + "shape.geojson")

link_gdf = pd.merge(link_df,
                    shape_gdf[["id", "geometry"]],
                   how = "left",
                   on = "id")

link_gdf = gpd.GeoDataFrame(link_gdf,
                            crs="EPSG:4326")

Wall time: 1min 56s


In [5]:
print('{} links with columns: \n{}'.format(link_gdf.shape[0], link_gdf.columns))

1705772 links with columns: 
Index(['shstReferenceId', 'id', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'u', 'v', 'link', 'oneWay', 'roundabout', 'wayId',
       'access', 'area', 'bridge', 'est_width', 'highway', 'junction', 'key',
       'landuse', 'lanes', 'maxspeed', 'name', 'ref', 'service', 'tunnel',
       'width', 'roadway', 'drive_access', 'walk_access', 'bike_access',
       'geometry'],
      dtype='object')


# TomTom conflation

In [8]:
# Read tomtom ShSt match result

tomtom_match_gdf = read_shst_extract(data_interim_dir + "tomtom/", "tomtom_*.out.matched.geojson")

tomtom_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                   "shstToIntersectionId" : "toIntersectionId"},
                       inplace = True)

print('\n {} rows with columns: \n{}'.format(tomtom_match_gdf.shape[0],
                                             tomtom_match_gdf.columns))

----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/tomtom\bike_rules\tomtom_1.out.matched.geojson
reading shst extraction data :  ../../data/interim/tomtom\bike_rules\tomtom_10.out.matched.geojson
reading shst extraction data :  ../../data/interim/tomtom\bike_rules\tomtom_11.out.matched.geojson
reading shst extraction data :  ../../data/interim/tomtom\bike_rules\tomtom_12.out.matched.geojson
reading shst extraction data :  ../../data/interim/tomtom\bike_rules\tomtom_13.out.matched.geojson
reading shst extraction data :  ../../data/interim/tomtom\bike_rules\tomtom_14.out.matched.geojson
reading shst extraction data :  ../../data/interim/tomtom\bike_rules\tomtom_2.out.matched.geojson
reading shst extraction data :  ../../data/interim/tomtom\bike_rules\tomtom_3.out.matched.geojson
reading shst extraction data :  ../../data/interim/tomtom\bike_rules\tomtom_4.out.matched.geojson
reading shst extraction data :  ../../data/interim/tomt

In [10]:
print("Sharedstreets matched "+ str(tomtom_match_gdf.pp_tomtom_link_id.nunique()) +" out of " + str(len(tomtom_raw_gdf)) 
      + " total tomtom links.")

Sharedstreets matched 491883 out of 959969 total tomtom links.


In [9]:
# Read tomtom raw data
tomtom_raw_gdf = gpd.read_file(data_external_dir + 'tomtom/tomtom_raw.geojson')

print('\n {} rows with columns: \n{}'.format(tomtom_raw_gdf.shape[0],
                                             tomtom_raw_gdf.columns))


 959969 rows with columns: 
Index(['ID', 'FEATTYP', 'FT', 'F_JNCTID', 'F_JNCTTYP', 'T_JNCTID', 'T_JNCTTYP',
       'PJ', 'METERS', 'FRC', 'NETCLASS', 'NETBCLASS', 'NET2CLASS', 'NAME',
       'NAMELC', 'SOL', 'NAMETYP', 'CHARGE', 'SHIELDNUM', 'RTETYP', 'RTEDIR',
       'RTEDIRVD', 'PROCSTAT', 'FOW', 'SLIPRD', 'FREEWAY', 'BACKRD', 'TOLLRD',
       'RDCOND', 'STUBBLE', 'PRIVATERD', 'CONSTATUS', 'ONEWAY', 'F_BP', 'T_BP',
       'F_ELEV', 'T_ELEV', 'KPH', 'MINUTES', 'POSACCUR', 'CARRIAGE', 'LANES',
       'RAMP', 'ADA', 'TRANS', 'DYNSPEED', 'SPEEDCAT', 'NTHRUTRAF', 'ROUGHRD',
       'PARTSTRUC', 'Shape_Length', 'tomtom_link_id', 'geometry'],
      dtype='object')


In [11]:
# merge
tomtom_gdf = pd.merge(tomtom_match_gdf, 
                      tomtom_raw_gdf[['tomtom_link_id', 'ID', 'F_JNCTID', 'T_JNCTID', 'LANES', "FRC", "NAME", "SHIELDNUM",
                                      "RTEDIR"]],
                     how = "left",
                     left_on = ['pp_tomtom_link_id'],
                     right_on = ['tomtom_link_id']
                     )
                              
print('{} base network links have tomtom attributes'.format(tomtom_gdf.shape[0]))

2477984 base network links have tomtom attributes


In [12]:
# join tomtom with network

unique_tomtom_match_gdf = tomtom_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

unique_tomtom_match_gdf.rename(columns = {"ID" : "tomtom_ID", "LANES" : "tomtom_lanes", "FRC" : "tomtom_FRC",
                                          "NAME" : "tomtom_name", "SHIELDNUM" : "tomtom_shieldnum", 
                                          "RTEDIR" : "tomtom_rtedir"},
                              inplace = True)

link_with_tomtom_gdf = pd.merge(link_gdf,
                                unique_tomtom_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId','toIntersectionId',
                                                         'pp_tomtom_link_id', "tomtom_ID", 'F_JNCTID', 'T_JNCTID', 
                                                         "tomtom_lanes", "tomtom_FRC", "tomtom_name", "tomtom_shieldnum",
                                                         "tomtom_rtedir"]],
                                how = "left",
                                on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId'],
                                )

In [13]:
link_with_tomtom_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1705772 entries, 0 to 1705771
Data columns (total 40 columns):
 #   Column              Dtype   
---  ------              -----   
 0   shstReferenceId     object  
 1   id                  object  
 2   shstGeometryId      object  
 3   fromIntersectionId  object  
 4   toIntersectionId    object  
 5   u                   int64   
 6   v                   int64   
 7   link                object  
 8   oneWay              object  
 9   roundabout          object  
 10  wayId               object  
 11  access              object  
 12  area                object  
 13  bridge              object  
 14  est_width           object  
 15  highway             object  
 16  junction            object  
 17  key                 object  
 18  landuse             object  
 19  lanes               object  
 20  maxspeed            object  
 21  name                object  
 22  ref                 object  
 23  service             obje

# Read thrid party shst match results

In [14]:
# read TM2 non Marin conflation result

tm2nonMarin_match_gdf = read_shst_extract(data_interim_dir + "tm2_nonMarin/", "tm2nonMarin_*.out.matched.geojson")

tm2nonMarin_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                        "shstToIntersectionId" : "toIntersectionId"},
                             inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\tm2nonMarin_1.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\tm2nonMarin_10.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\tm2nonMarin_11.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\tm2nonMarin_12.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\tm2nonMarin_13.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\tm2nonMarin_14.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\tm2nonMarin_2.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules\tm2nonMarin_3.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_nonMarin\car_rules

In [17]:
# read TM2 Marin conflation result

tm2marin_match_gdf = read_shst_extract(data_interim_dir + "tm2_Marin/", "tm2Marin_*.out.matched.geojson")

tm2marin_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                     "shstToIntersectionId" : "toIntersectionId"},
                          inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\tm2Marin_1.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\tm2Marin_10.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\tm2Marin_11.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\tm2Marin_12.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\tm2Marin_13.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\tm2Marin_14.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\tm2Marin_2.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\tm2Marin_3.out.matched.geojson
reading shst extraction data :  ../../data/interim/tm2_Marin\car_rules\tm2Marin_4.out.matched.geojson
reading shst extrac

In [None]:
# # read sfcta true shape conflation result

# sfcta_match_gdf = read_shst_extract(data_interim_dir + "stclines/", "*sfcta.out.matched.geojson")

# sfcta_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
#                                    "shstToIntersectionId" : "toIntersectionId"},
#                        inplace = True)

In [18]:
# read sfcta stick network conflation result

sfcta_stick_match_gdf = read_shst_extract(data_interim_dir + "sfcta/", "*sfcta.out.matched.geojson")

sfcta_stick_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                        "shstToIntersectionId" : "toIntersectionId"},
                             inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  ../../data/interim/sfcta\car_rules\sfcta.out.matched.geojson
reading shst extraction data :  ../../data/interim/sfcta\ped_rules\sfcta.out.matched.geojson
reading shst extraction data :  ../../data/interim/sfcta\reverse_dir\sfcta.out.matched.geojson
----------finished reading shst extraction data-------------


In [25]:
# read source data

# TM2 non-Marin
tm2_link_roadway_gdf = gpd.read_file(data_external_dir + 'TM2_nonMarin/tm2nonMarin_raw.geojson')

# TM2 Marin
tm2_marin_link_roadway_gdf = gpd.read_file(data_external_dir + 'TM2_Marin/tm2Marin_raw.geojson')

# SFCTA stick
sfcta_stick_gdf = gpd.read_file(data_external_dir + 'sfcta/sfcta_raw.geojson')

In [26]:
# join match result with source data

# TM2 non Marin
unique_tm2nonMarin_match_gdf = tm2nonMarin_match_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

unique_tm2nonMarin_match_gdf = pd.merge(unique_tm2nonMarin_match_gdf, 
                                        tm2_link_roadway_gdf[['A', 'B', "NUMLANES", "FT", "ASSIGNABLE"]],
                                        how = "left",
                                        left_on = ['pp_a', "pp_b"],
                                        right_on = ['A', "B"]
                                       )

unique_tm2nonMarin_match_gdf.rename(columns = {"A" : "TM2_A", "B" : "TM2_B", "NUMLANES" : "TM2_LANES", 
                                               "FT" : "TM2_FT", "ASSIGNABLE" : "TM2_ASSIGNABLE"},
                                    inplace = True)

# TM2 Marin
unique_tm2marin_match_gdf = tm2marin_match_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

unique_tm2marin_match_gdf = pd.merge(unique_tm2marin_match_gdf, 
                                     tm2_marin_link_roadway_gdf[['A', 'B', "NUMLANES", "FT", "ASSIGNABLE"]],
                                     how = "left",
                                     left_on = ['pp_a', "pp_b"],
                                     right_on = ['A', "B"]
                                     )

unique_tm2marin_match_gdf.rename(columns = {"A" : "TM2Marin_A", "B" : "TM2Marin_B", "NUMLANES" : "TM2Marin_LANES", 
                                            "FT" : "TM2Marin_FT", "ASSIGNABLE" : "TM2Marin_ASSIGNABLE"},
                                   inplace = True)

# sfcta stick and true shape
unique_sfcta_match_gdf = sfcta_stick_match_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()

unique_sfcta_match_gdf = pd.merge(unique_sfcta_match_gdf,
                                  sfcta_stick_gdf[["A", "B", "FT", "STREETNAME", "LANE_AM", "LANE_OP", "LANE_PM"]],
                                  how = "left",
                                  left_on = ["pp_a", "pp_b"],
                                  right_on = ["A", "B"])

unique_sfcta_match_gdf.rename(columns = {"A" : "sfcta_A", "B" : "sfcta_B", "FT" : "sfcta_FT", "STREETNAME" : "sfcta_STREETNAME",
                                         "LANE_AM" : "sfcta_LANE_AM", "LANE_OP" : "sfcta_LANE_OP", "LANE_PM" : "sfcta_LANE_PM"},
                             inplace = True)

In [20]:
# read pems conflation result

pems_match_gdf = gpd.read_file(data_interim_dir + "mtc/pems_conflation_result.geojson")

print(pems_match_gdf['type'].value_counts())

ML    60319
OR     9309
FR     5733
FF      666
HV       30
Name: type, dtype: int64


In [21]:
# link can have multiple pems station on it, so trying to get the mode of #lanes by station type

pems_lanes_df = pems_match_gdf[(pems_match_gdf.year.isin([2014,2015,2016]))].groupby(
    ["shstReferenceId", "type", "lanes"]
)["station"].count().sort_values(ascending = False).reset_index().drop_duplicates(subset = ['shstReferenceId', 'type'])

pems_lanes_df = pems_lanes_df.pivot_table(index = ["shstReferenceId"], values = "lanes", columns = "type").fillna(0).reset_index()
display(pems_lanes_df)

type,shstReferenceId,FF,FR,HV,ML,OR
0,000619909cfedfdc3ae846759247e09f,0.0,0.0,0.0,3.0,0.0
1,001faa1f8bf0fafb4298b7438a83b506,0.0,0.0,0.0,3.0,0.0
2,003a921cd366f424ba0d42e1635f85c2,0.0,0.0,0.0,2.0,0.0
3,00441ada4f77ea0e20dd17acd688e289,0.0,0.0,0.0,3.0,0.0
4,005586c0c986e59d4dcd979513f7e8e6,0.0,0.0,0.0,4.0,0.0
...,...,...,...,...,...,...
2008,ff7acac14c0bb77fcdacf4ccd6966517,0.0,0.0,0.0,5.0,0.0
2009,ff8f4bf33f498176b2a6e2fe4d078ff4,0.0,0.0,0.0,2.0,0.0
2010,ff90d3fb5034f779cc6c4841f2fbcfc5,0.0,0.0,0.0,2.0,0.0
2011,ff9ba0a399cb6a07872339ed82edea56,0.0,0.0,0.0,4.0,0.0


In [23]:
link_pems_df = pems_match_gdf[(pems_match_gdf.year.isin([2014,2015,2016]))].drop_duplicates(
    subset = ["shstReferenceId", "station"]).groupby(
    ["shstReferenceId"]
)["station"].apply(list).reset_index().rename(columns = {"station" : "PEMSID"})

display(link_pems_df)

Unnamed: 0,shstReferenceId,PEMSID
0,000619909cfedfdc3ae846759247e09f,"[404433, 407341]"
1,001faa1f8bf0fafb4298b7438a83b506,[400615]
2,003a921cd366f424ba0d42e1635f85c2,"[409481, 409485]"
3,00441ada4f77ea0e20dd17acd688e289,[404649]
4,005586c0c986e59d4dcd979513f7e8e6,"[401943, 402067, 403412]"
...,...,...
2008,ff7acac14c0bb77fcdacf4ccd6966517,"[402575, 402577, 402579]"
2009,ff8f4bf33f498176b2a6e2fe4d078ff4,[400108]
2010,ff90d3fb5034f779cc6c4841f2fbcfc5,"[408152, 402096]"
2011,ff9ba0a399cb6a07872339ed82edea56,[400703]


In [24]:
pems_lanes_df = pd.merge(
    pems_lanes_df,
    link_pems_df,
    how = "left",
    on = "shstReferenceId"
)

pems_lanes_df.rename(columns = {"FF" : "pems_lanes_FF", "FR" : "pems_lanes_FR", "HV" : "pems_lanes_HV", 
                                "ML" : "pems_lanes_ML", "OR" : "pems_lanes_OR"},
                     inplace = True)

# Join network with third party sources

In [28]:
link_all_conflated_gdf = pd.merge(
    link_with_tomtom_gdf,
    unique_tm2nonMarin_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'TM2_A', 'TM2_B', 'TM2_LANES', 'TM2_FT', 'TM2_ASSIGNABLE']],
    how = "left",
    on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', "toIntersectionId"]
)

In [29]:
link_all_conflated_gdf = pd.merge(
    link_all_conflated_gdf,
    unique_tm2marin_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'TM2Marin_A', 'TM2Marin_B', 'TM2Marin_LANES', 'TM2Marin_FT',
       'TM2Marin_ASSIGNABLE']],
    how = "left",
    on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', "toIntersectionId"]
)

In [30]:
link_all_conflated_gdf = pd.merge(
    link_all_conflated_gdf,
    unique_sfcta_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'sfcta_A', 'sfcta_B', 'sfcta_FT', 'sfcta_STREETNAME', 'sfcta_LANE_AM', 'sfcta_LANE_OP',
       'sfcta_LANE_PM']],
    how = "left",
    on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', "toIntersectionId"]
)

In [31]:
link_all_conflated_gdf = pd.merge(
    link_all_conflated_gdf,
    pems_lanes_df,
    how = "left",
    on = "shstReferenceId"
)

In [32]:
print('after conflation, {} links with the follow columns: \n{}'.format(
    link_all_conflated_gdf.shape[0],
    link_all_conflated_gdf.columns))

after conflation, 1705772 links with the follow columns: 
Index(['shstReferenceId', 'id', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'u', 'v', 'link', 'oneWay', 'roundabout', 'wayId',
       'access', 'area', 'bridge', 'est_width', 'highway', 'junction', 'key',
       'landuse', 'lanes', 'maxspeed', 'name', 'ref', 'service', 'tunnel',
       'width', 'roadway', 'drive_access', 'walk_access', 'bike_access',
       'geometry', 'pp_tomtom_link_id', 'tomtom_ID', 'F_JNCTID', 'T_JNCTID',
       'tomtom_lanes', 'tomtom_FRC', 'tomtom_name', 'tomtom_shieldnum',
       'tomtom_rtedir', 'TM2_A', 'TM2_B', 'TM2_LANES', 'TM2_FT',
       'TM2_ASSIGNABLE', 'TM2Marin_A', 'TM2Marin_B', 'TM2Marin_LANES',
       'TM2Marin_FT', 'TM2Marin_ASSIGNABLE', 'sfcta_A', 'sfcta_B', 'sfcta_FT',
       'sfcta_STREETNAME', 'sfcta_LANE_AM', 'sfcta_LANE_OP', 'sfcta_LANE_PM',
       'pems_lanes_FF', 'pems_lanes_FR', 'pems_lanes_HV', 'pems_lanes_ML',
       'pems_lanes_OR', 'PEMSID'],
      dtype='o

# Write out standard link json and link feather

In [33]:
link_json_columns = link_df.columns.tolist()
print(link_json_columns)

['shstReferenceId', 'id', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId', 'u', 'v', 'link', 'oneWay', 'roundabout', 'wayId', 'access', 'area', 'bridge', 'est_width', 'highway', 'junction', 'key', 'landuse', 'lanes', 'maxspeed', 'name', 'ref', 'service', 'tunnel', 'width', 'roadway', 'drive_access', 'walk_access', 'bike_access']


In [34]:
print("-------write out link json---------")

link_prop = link_json_columns

out = link_all_conflated_gdf[link_prop].to_json(orient = "records")

with open(data_interim_dir + 'step4_conflate_with_tomtom/link.json', 'w') as f:
    f.write(out)

-------write out link json---------


In [35]:
print("-------write out link feather---------")

link_feather = link_all_conflated_gdf[link_json_columns].copy()

object_col = list(link_feather.select_dtypes(['object']).columns)

for c in object_col:
    link_feather[c] = link_feather[c].astype(str)

link_feather.to_feather(data_interim_dir + 'step4_conflate_with_tomtom/link.feather')

-------write out link feather---------


In [37]:
print("-------write out link geojson---------")

link_all_conflated_gdf_prop = link_all_conflated_gdf.drop("geometry", axis = 1).columns
link_all_conflated_geojson = link_df_to_geojson(link_all_conflated_gdf, link_all_conflated_gdf_prop)

with open(data_interim_dir + 'step4_conflate_with_tomtom/link.geojson', "w") as f:
    json.dump(link_all_conflated_geojson, f)

-------write out link geojson---------


# Write out conflation result data base

In [38]:
# tomtom FRC dictionary

tomtom_FRC_dict = {
    0: "0-Motorway, Freeway, or Other Major Road",
    1: "1-Major Road Less Important than a Motorway",
    2: "2-Other Major Road",
    3: "3-Secondary Road",
    4: "4-Local Connecting Road",
    5: "5-Local Road of High Importance",
    6: "6-Local Road",
    7: "7-Local Road of Minor Importance",
    8: "8-Other Road"
}

link_all_conflated_gdf["tomtom_FRC_def"] = link_all_conflated_gdf["tomtom_FRC"].map(tomtom_FRC_dict)

In [39]:
link_all_conflated_gdf.tomtom_FRC_def.value_counts()

7-Local Road of Minor Importance               194543
6-Local Road                                   162898
4-Local Connecting Road                         60807
5-Local Road of High Importance                 51673
8-Other Road                                    17417
3-Secondary Road                                14542
1-Major Road Less Important than a Motorway      6381
0-Motorway, Freeway, or Other Major Road         5104
2-Other Major Road                                811
Name: tomtom_FRC_def, dtype: int64

In [40]:
TM2_FT_dict = {
    0: "0-Connector",
    1: "1-Freeway to Freeway",
    2: "2-Freeway",
    3: "3-Expressway",
    4: "4-Collector",
    5: "5-Ramp",
    6: "6-Special Facility",
    7: "7-Major Arterial",
}

link_all_conflated_gdf["TM2_FT_def"] = link_all_conflated_gdf["TM2_FT"].map(TM2_FT_dict)

link_all_conflated_gdf.TM2_FT_def.value_counts()

4-Collector             549075
7-Major Arterial        143229
0-Connector              57792
3-Expressway             14180
2-Freeway                 3357
5-Ramp                    2207
1-Freeway to Freeway       159
Name: TM2_FT_def, dtype: int64

In [41]:
# write conflation data base

tableau_fields = ["shstReferenceId", "roadway", "lanes", "drive_access", "bike_access", "walk_access",
                  "tomtom_FRC", "tomtom_FRC_def", "tomtom_lanes", "pp_tomtom_link_id", 'F_JNCTID', 'T_JNCTID',
                  'tomtom_name', 'tomtom_shieldnum', 'tomtom_rtedir', 'TM2Marin_A',
                  'TM2Marin_B', 'TM2Marin_FT', 'TM2Marin_LANES', 'TM2Marin_ASSIGNABLE', 
                  'TM2_A', 'TM2_B', 'TM2_FT', "TM2_FT_def", 'TM2_LANES', 'TM2_ASSIGNABLE', 
                  "sfcta_A", 'sfcta_B', "sfcta_STREETNAME", 'sfcta_FT', 'sfcta_LANE_AM', 'sfcta_LANE_OP',
                  'sfcta_LANE_PM', 'PEMSID', 'pems_lanes_FF', 'pems_lanes_FR',
                  'pems_lanes_HV', 'pems_lanes_ML', 'pems_lanes_OR']

link_all_conflated_gdf[tableau_fields].rename(columns = {"lanes" : "lanes_osm",
                                                         "pp_tomtom_link_id" : "tomtom_unique_id"}
                              ).to_csv(data_interim_dir + "conflation_result.csv",
                             index = False)