# This notebook reads in TomTom raw data and prepares data for conflation

In [2]:
import fiona
import geopandas as gpd
import pandas as pd

In [None]:
tomtom_input_folder = "../data/external/tomtom/"
data_interim_folder = "../data/interim/"
county_data_folder = "../external_data/county/"

# reading tomtom data

In [3]:
# loading tomtom data source

gdb_file = tomtom_input_folder + "Network_region.gdb"

# Get all the layers from the .gdb file 

layers = fiona.listlayers(gdb_file)

In [5]:
# street link layer

link_gdf = gpd.read_file(gdb_file, layer = 'mn_nw_BayArea')

In [8]:
# street link attributes

link_gdf.columns

Index(['ID', 'FEATTYP', 'FT', 'F_JNCTID', 'F_JNCTTYP', 'T_JNCTID', 'T_JNCTTYP',
       'PJ', 'METERS', 'FRC', 'NETCLASS', 'NETBCLASS', 'NET2CLASS', 'NAME',
       'NAMELC', 'SOL', 'NAMETYP', 'CHARGE', 'SHIELDNUM', 'RTETYP', 'RTEDIR',
       'RTEDIRVD', 'PROCSTAT', 'FOW', 'SLIPRD', 'FREEWAY', 'BACKRD', 'TOLLRD',
       'RDCOND', 'STUBBLE', 'PRIVATERD', 'CONSTATUS', 'ONEWAY', 'F_BP', 'T_BP',
       'F_ELEV', 'T_ELEV', 'KPH', 'MINUTES', 'POSACCUR', 'CARRIAGE', 'LANES',
       'RAMP', 'ADA', 'TRANS', 'DYNSPEED', 'SPEEDCAT', 'NTHRUTRAF', 'ROUGHRD',
       'PARTSTRUC', 'ORDER08', 'Shape_Length', 'geometry'],
      dtype='object')

In [32]:
# convert to ESPG lat-lon

link_gdf = link_gdf.to_crs({'init': 'epsg:4326'})
link_gdf.crs

{'init': 'epsg:4326'}

# there is no existing unique tomtom handle for Bay Area, thus we need to create unique handle

In [40]:
# total # tomtom links

link_gdf.shape

(602060, 54)

In [47]:
# unique tomtom handles

len(link_gdf.groupby(["ID", "F_JNCTID", "T_JNCTID"]).count())

601846

In [11]:
# generating unique handle for tomtom

link_gdf["tomtom_link_id"] = range(1, len(link_gdf)+1)

In [19]:
# write out tomtom geometry for shst match

for i in range():
    link_gdf[["tomtom_link_id", "geometry"]].iloc[i*100000 :(i+1)*100000].to_file(data_interim_folder + "tomtom"+str(i)+".in.geojson",
                                                driver = "GeoJSON")

In [48]:
link_gdf.LANES.value_counts()

0     448466
2      66145
3      34110
1      21904
4      20391
5       9004
6       1682
7        321
8         21
9          9
10         5
17         2
Name: LANES, dtype: int64

# SF county, combination of ["ID", "F_JNCTID", "T_JNCTID"] can serve as unique handle

In [24]:
# read county boundry

sf_boundry = gpd.read_file(county_data_folder + "boundary_4.geojson")
sf_boundry.crs

{'init': 'epsg:4326'}

In [43]:
# tomtom links in sf
sf_link_gdf = link_gdf[link_gdf.intersects(sf_boundry.geometry.unary_union)].copy()

In [39]:
sf_link_gdf.shape

(31805, 54)

In [46]:
len(sf_link_gdf.groupby(["ID", "F_JNCTID", "T_JNCTID"]).count())

31805

In [44]:
# write out complete tomtom atrributes in geojson

sf_link_gdf.to_file(data_interim_folder + "tomtom_sf.geojson", driver = "GeoJSON")

In [None]:
# write out sf tomtom for SHST match

sf_link_gdf[["ID", "F_JNCTID", "T_JNCTID", "geometry"]].to_file(data_interim_folder + "tomtom_sf.in.geojson",
                                                driver = "GeoJSON")