In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import json
import requests
from urllib.request import urlopen
from zipfile import ZipFile
from io import BytesIO
import fiona

In [2]:
from methods import read_shst_extract
from methods import link_df_to_geojson
from methods import point_df_to_geojson

In [46]:
data_interim_dir = "../../data/interim/"

# Prepare tomtom for conflation

In [14]:
# loading tomtom data source

gdb_file = "../../data/external/tomtom/Network_region.gdb"

# Get all the layers from the .gdb file 

layers = fiona.listlayers(gdb_file)

# street link layer

tomtom_raw_gdf = gpd.read_file(gdb_file, layer = 'mn_nw_BayArea')

# convert to ESPG lat-lon

tomtom_raw_gdf = tomtom_raw_gdf.to_crs({'init': 'epsg:4326'})
tomtom_raw_gdf.crs

{'init': 'epsg:4326'}

In [39]:
# There is no existing unique tomtom handle for Bay Area, thus we need to create unique handle

In [15]:
# total # tomtom links

tomtom_raw_gdf.shape

(602060, 53)

In [43]:
# unique tomtom handles

len(tomtom_raw_gdf.groupby(["ID", "F_JNCTID", "T_JNCTID"]).count())

601846

In [None]:
# generating unique handle for tomtom

tomtom_raw_gdf["tomtom_link_id"] = range(1, len(tomtom_raw_gdf)+1)

In [51]:
for line in tomtom_raw_gdf[tomtom_raw_gdf.tomtom_link_id == 97874]["geometry"].iloc[0]:
     print(line)

LINESTRING (-122.4323109994277 37.76753700037561, -122.4334150001658 37.76747600031828)


# Partition tomtom by county for shst match

In [44]:
for i in range(14):
    boundary_gdf = gpd.read_file("../../data/external/county_boundaries/boundary_"+str(i+1)+".geojson")
    
    sub_tomtom_gdf = tomtom_raw_gdf[tomtom_raw_gdf.intersects(boundary_gdf.geometry.unary_union)].copy()
    
    sub_tomtom_gdf[["tomtom_link_id", "geometry"]].to_file("../../data/external/tomtom/tomtom"+str(i+1)+".in.geojson",
                                                driver = "GeoJSON")

In [None]:
tomtom_raw_gdf.head(3)

In [None]:
tomtom_raw_gdf[~(tomtom_raw_gdf.geom_type == 'MultiLineString')]

# Read network

In [6]:
link_file = data_interim_dir + "step3_join_shst_extraction_with_osm/" + "link.json"
with open(link_file) as f:
    link_json = json.load(f)
link_df = pd.DataFrame(link_json)

shape_gdf = gpd.read_file(data_interim_folder + "step3_join_shst_extraction_with_osm/" 
                          + "shape.geojson")

link_gdf = pd.merge(link_df,
                    shape_gdf[["id", "geometry"]],
                   how = "left",
                   on = "id")

link_gdf = gpd.GeoDataFrame(link_gdf,
                            crs="EPSG:4326")

In [49]:
link_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1705772 entries, 0 to 1705771
Data columns (total 31 columns):
access                object
area                  object
bike_access           int64
bridge                object
drive_access          int64
est_width             object
fromIntersectionId    object
highway               object
id                    object
junction              object
key                   object
landuse               object
lanes                 object
link                  object
maxspeed              object
name                  object
oneWay                object
ref                   object
roadway               object
roundabout            object
service               object
shstGeometryId        object
shstReferenceId       object
toIntersectionId      object
tunnel                object
u                     int64
v                     int64
walk_access           int64
wayId                 object
width                 object
geometry      

# Read TomTom ShSt Match result

In [19]:
# read in tomtom conflation

tomtom_match_gdf = read_shst_extract(data_interim_dir + "tomtom/", "*_tomtom.out.matched.geojson")

tomtom_match_gdf.rename(columns = {"shstFromIntersectionId" : "fromIntersectionId",
                                   "shstToIntersectionId" : "toIntersectionId"},
                       inplace = True)

----------start reading shst extraction data-------------
reading shst extraction data :  D:/MTC/data/interim/tomtom\bike_rules\1_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\bike_rules\2_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\10_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\11_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\12_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\13_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\14_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\1_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/tomtom\car_rules\2_tomtom.out.matched.geojson
reading shst extraction data :  D:/MTC/data/interim/t

In [20]:
tomtom_raw_gdf

Unnamed: 0,ID,FEATTYP,FT,F_JNCTID,F_JNCTTYP,T_JNCTID,T_JNCTTYP,PJ,METERS,FRC,...,ADA,TRANS,DYNSPEED,SPEEDCAT,NTHRUTRAF,ROUGHRD,PARTSTRUC,ORDER08,Shape_Length,geometry
0,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,1554.62,7,...,0,0,0,7,0,0,0,CA097,1989.817478,(LINESTRING (-122.8315739998535 38.62133700023...
1,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,551.09,7,...,0,0,0,6,0,0,0,CA097,704.628862,(LINESTRING (-122.83972099993 38.6229560004714...
2,6.840000e+13,4110,0,6.840001e+13,0,6.840002e+13,0,0,53.22,7,...,0,0,0,7,0,0,0,CA097,68.054041,(LINESTRING (-122.868480999434 38.626759999424...
3,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,45.20,7,...,0,0,0,7,0,0,0,CA097,58.007552,(LINESTRING (-122.8674820000745 38.63032300025...
4,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,30.24,7,...,0,0,0,7,0,0,0,CA097,38.659695,(LINESTRING (-122.8610680001998 38.64033899956...
5,6.840000e+13,4110,0,6.840002e+13,0,6.840001e+13,0,0,602.67,6,...,0,0,0,6,0,0,0,CA097,772.686674,(LINESTRING (-122.9378580002176 38.65743100040...
6,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,152.51,7,...,0,0,0,7,0,0,0,CA097,195.565558,(LINESTRING (-122.9092719998926 38.70904799989...
7,6.840000e+13,4110,0,6.840001e+13,0,6.840002e+13,0,0,466.16,7,...,0,0,0,6,0,0,0,CA097,598.825857,(LINESTRING (-122.9685940004959 38.77782800021...
8,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,80.25,6,...,0,0,0,7,0,0,0,CA097,102.906186,(LINESTRING (-123.0185530001317 38.78901899997...
9,6.840000e+13,4110,0,6.840001e+13,0,6.840001e+13,0,0,127.11,7,...,0,0,0,7,0,0,0,CA097,162.910525,(LINESTRING (-123.0254980004591 38.79289899992...


In [21]:
print(tomtom_raw_gdf.shape)
print(tomtom_match_gdf.shape)

print(tomtom_raw_gdf.columns)
print(tomtom_match_gdf.columns)

(602060, 53)
(1749759, 18)
Index(['ID', 'FEATTYP', 'FT', 'F_JNCTID', 'F_JNCTTYP', 'T_JNCTID', 'T_JNCTTYP',
       'PJ', 'METERS', 'FRC', 'NETCLASS', 'NETBCLASS', 'NET2CLASS', 'NAME',
       'NAMELC', 'SOL', 'NAMETYP', 'CHARGE', 'SHIELDNUM', 'RTETYP', 'RTEDIR',
       'RTEDIRVD', 'PROCSTAT', 'FOW', 'SLIPRD', 'FREEWAY', 'BACKRD', 'TOLLRD',
       'RDCOND', 'STUBBLE', 'PRIVATERD', 'CONSTATUS', 'ONEWAY', 'F_BP', 'T_BP',
       'F_ELEV', 'T_ELEV', 'KPH', 'MINUTES', 'POSACCUR', 'CARRIAGE', 'LANES',
       'RAMP', 'ADA', 'TRANS', 'DYNSPEED', 'SPEEDCAT', 'NTHRUTRAF', 'ROUGHRD',
       'PARTSTRUC', 'ORDER08', 'Shape_Length', 'geometry'],
      dtype='object')
Index(['shstReferenceId', 'shstGeometryId', 'fromIntersectionId',
       'toIntersectionId', 'gisReferenceId', 'gisGeometryId',
       'gisTotalSegments', 'gisSegmentIndex', 'gisFromIntersectionId',
       'gisToIntersectionId', 'startSideOfStreet', 'endSideOfStreet',
       'sideOfStreet', 'score', 'matchType', 'pp_tomtom_link_id', 'geome

In [22]:
tomtom_match_gdf.pp_tomtom_link_id.max()
print("Sharedstreets matched "+ str(tomtom_match_gdf.pp_tomtom_link_id.nunique()) +" out of " + str(len(tomtom_raw_gdf)) 
      + " total tomtom links.")

Sharedstreets matched 550620 out of 602060 total tomtom links.


In [23]:
tomtom_raw_gdf["tomtom_link_id"] = range(1, len(tomtom_raw_gdf)+1)

tomtom_gdf = pd.merge(tomtom_match_gdf, 
                      tomtom_raw_gdf[['tomtom_link_id', 'ID', 'F_JNCTID', 'T_JNCTID', 'LANES', "FRC"]],
                     how = "left",
                     left_on = ['pp_tomtom_link_id'],
                     right_on = ['tomtom_link_id']
                     )
                                 
print(tomtom_gdf.shape)

(1749759, 24)


# Join network with TomTom

In [50]:
# join tomtom with network

unique_tomtom_match_gdf = tomtom_gdf.drop_duplicates(
    subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId']).copy()


link_with_tomtom_gdf = pd.merge(link_gdf,
                            unique_tomtom_match_gdf[['shstReferenceId', 'shstGeometryId', 'fromIntersectionId','toIntersectionId',
                                              'pp_tomtom_link_id', "ID", 'F_JNCTID', 'T_JNCTID', "LANES", "FRC"]],
                                  how = "left",
                                  on = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId'],
                                  )

In [51]:
link_with_tomtom_df[link_with_tomtom_df.shstGeometryId.isnull()]
link_with_tomtom_df[~(link_with_tomtom_df.shstGeometryId == link_with_tomtom_df.id)]
link_with_tomtom_df.lanes.value_counts()

nan                                  1359907
                                      134553
2                                      84235
[nan, nan]                             46733
3                                      13260
4                                      11417
[nan, nan, nan]                         9469
1                                       8812
5                                       4697
[, ]                                    4198
[2, 3]                                  2697
[2, 2]                                  1977
[nan, ]                                 1784
[, nan]                                 1177
[3, 3]                                  1043
[3, 4]                                   977
[, , ]                                   960
[nan, nan, nan, nan]                     960
[2, 2, 2]                                767
6                                        725
[2, nan]                                 723
[4, 4]                                   686
[3, 2]    

In [61]:
link_with_tomtom_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1705772 entries, 0 to 1705771
Data columns (total 37 columns):
access                object
area                  object
bike_access           int64
bridge                object
drive_access          int64
est_width             object
fromIntersectionId    object
highway               object
id                    object
junction              object
key                   object
landuse               object
lanes                 object
link                  object
maxspeed              object
name                  object
oneWay                object
ref                   object
roadway               object
roundabout            object
service               object
shstGeometryId        object
shstReferenceId       object
toIntersectionId      object
tunnel                object
u                     int64
v                     int64
walk_access           int64
wayId                 object
width                 object
geometry      

In [31]:
link_with_tomtom_df.u.nunique()

661023

In [32]:
link_with_tomtom_df.fromIntersectionId.nunique()

661023

In [33]:
len(set(link_with_tomtom_df.u.tolist() + link_with_tomtom_df.v.tolist()))

661159

# Write out

In [62]:
print("-------write out link json---------")

link_prop = link_with_tomtom_gdf.drop("geometry", axis = 1).columns.tolist()

out = link_with_tomtom_df[link_prop].to_json(orient = "records")

with open(data_interim_dir + 'step4_conflate_with_tomtom/link.json', 'w') as f:
    f.write(out)

-------write out link json---------
