Conflate travel time reliability data to Sun Cloud routes.

Define workspace.

In [None]:
import arcpy
arcpy.env.overwriteOutput = True

# authentication to access secured data
from arcgis.gis import GIS
from arcgis.features import FeatureLayer

gis = GIS('pro')
db = arcpy.env.workspace = './data/sun_cloud_ttr_revision2.gdb'
source = 'inrix_2022'
arcpy.Describe(db)

Reusable functions.

In [None]:
# save feature layer to db
def save_fl(db, url, outname):
    fl = FeatureLayer(url)
    featureset = fl.query()
    featureset.save(db, outname)

# project in_data to to target_sr
def project(in_data, target_sr, out_name):


    in_sr = arcpy.Describe(in_data).spatialReference

    tr = arcpy.ListTransformations (in_sr, target_sr)
    datum_conversion = ''
    if(len(tr)>0):
        datum_conversion = arcpy.ListTransformations (in_sr, target_sr)[0]

    arcpy.Project_management(
        in_dataset = in_data, 
        out_dataset = out_name,
        out_coor_system = target_sr,
        transform_method = datum_conversion)
# filter snapped points via fuzzy string match
import re
def strip_stop_words(x):
# remove leading zeros
    x =  x.lower()

    stop_words = ['avenue', 'ave', 
            'boulevard', 'blvd', 
            'drive', 'dr', 
            'freeway', 'frwy', 'fwy',
            'lane', 'ln',
            'parkway', 'pkwy',  
            'road', 'rd', 
            'route', 'rte',
            'street', 'st', 
            'trail', 'tr',
            'way',

            'railroad', 'chn', 'drainage', 'lake', 'siphons', 'track', 'bnsf', 
            'place', 'pl', 
            'l', 'n', 's', 'e', 'w', 'irr', 'i', 'us', 'to', '-', 'from',
            'sl', 'sr', 'loop', 'lp', 'frtg', 
            'sb', 'wb', 'eb', 'nb', 'direct', 'hov', 'ramp', 'n-w', 'w-s', 'n-e']
            
    x = re.split(r'\'|\s|;|,|/|-|\(|\)', x)
    # print(x)
    x = [y for y in x if y.lower() not in stop_words]
    # clean_list = [n for n in x if n.strip()]
    remove_zero = [item.lstrip('0') for item in x]
    clean= [n for n in remove_zero if n.strip()]
    return(clean)
def string_match(str1, str2):
    if str1 and str2:
        rid = strip_stop_words(str1)
        road_name = strip_stop_words(str2)
        print(rid, road_name)
        if(bool(set(rid) & set(road_name))) :

            return "match"
        else:

            return "non-match"
    else: 
        return 'undetermined'
def delete_if(fc, query):
    selected = arcpy.SelectLayerByAttribute_management(fc, 'NEW_SELECTION', query)
    count = int(arcpy.GetCount_management(selected)[0])
    print(count)
    if(count>0):
        print("deleting {} features".format(count))
        arcpy.DeleteFeatures_management(selected)

    

### Prepare the data.

Save the routes locally.

In [None]:
routes_url = 'https://services6.arcgis.com/clPWQMwZfdWn4MQZ/ArcGIS/rest/services/Sun_Cloud_Routes/FeatureServer/92' 

save_fl(db, routes_url, 'sun_cloud_routes')

Project data.

In [None]:
target_sr = arcpy.SpatialReference(2223)
project('sun_cloud_routes', target_sr, 'sun_cloud_routes_project')
project('inrix_2021', target_sr, 'ttr_project')

Delete the source segments with no geometry and no road name. Segements w/ no road name do not coincide with the sun cloud routes. 

In [None]:
sql = "Shape_Length = 0 or RoadName = ' '"
selection = arcpy.management.SelectLayerByAttribute('ttr_project', 'NEW_SELECTION', sql)

cnt = int(arcpy.GetCount_management(selection).getOutput(0))
print(cnt)
if cnt>0 :
    arcpy.DeleteFeatures_management(selection)

Create end points from the source layer

In [None]:
arcpy.management.FeatureVerticesToPoints("ttr_project", "ttr_end_pts", "BOTH_ENDS")

# delete overlapping end points
arcpy.management.DeleteIdentical("ttr_end_pts", "Shape", None, 0)

Snap end points to the routes.

In [None]:
arcpy.edit.Snap("ttr_end_pts", "sun_cloud_routes_project EDGE '50 Feet'")

Split destination to match the source segments.

In [None]:
fc="routes_split"
arcpy.management.SplitLineAtPoint("sun_cloud_routes_project", "ttr_end_pts", fc, "50 Feet")

Add new fields to the routes layer.

In [None]:
# add fields
fc="routes_split"
fields = [
['xd_segment_id', 'TEXT', 'XD Segment ID'],
['road_name', 'TEXT', 'Road Name'],
['miles', 'DOUBLE', 'Miles'],
['free_flow_speed', 'DOUBLE', 'Free Flow Speed'],
['average_speed', 'DOUBLE', 'Average Speed'],
['peak_lottr', 'DOUBLE', 'Peak Period LOTTR'],
['daily_delay', 'DOUBLE', 'Daily Delay (Minutes)'],
['peak_tti', 'DOUBLE', 'Peak Period TTI'],
['peak_pti', 'DOUBLE', 'Peak Period PTI'],
['notes','TEXT']]

arcpy.AddFields_management(fc, fields)


### Conflation - Match ID
- For each segment where road_name is null, create a buffer.
- Intersect the buffer with the TTR source layer. 
  - Validate routeId vs. road namend functional class.
  - Delete out the bad matches. 
  - Find the longest match if there are multiple matches for a segment. 
- Join the routes layer to the longest valid match and attribute the key fields.
  
Review the result and repeat the process while increasing the buffer distance. 

Create a buffer if route name is null. 

In [None]:
# buffer distance in meters

query_string = "road_name is null"
dist =  75# feet
out_buffer = "{0}_{1}m".format('routes_split', dist)

arcpy.Delete_management('null_lyr')
arcpy.MakeFeatureLayer_management('routes_split', 'null_lyr', query_string)

arcpy.analysis.Buffer("null_lyr", out_buffer, "{} Feet".format(dist), "FULL", "FLAT", "NONE", None, "PLANAR")

Intersect the buffer with the source layer.

In [None]:
# create a intersect table
intersect = 'intersect'
source = 'ttr_project'
arcpy.analysis.PairwiseIntersect(
    in_features="{0};{1}".format(out_buffer, source),
    out_feature_class=intersect,
    join_attributes="ALL",
    cluster_tolerance=None,
    output_type="INPUT"
)

Add a field to hold string match result. 

In [None]:
arcpy.AddField_management('intersect', 'string_match', 'text')

Pare down the the intersect.
- Delete intersect segments shorter than 2ft.

In [None]:
selected = arcpy.management.SelectLayerByAttribute(
    in_layer_or_view="intersect",
    selection_type="NEW_SELECTION",
    where_clause="Shape_Length < 2",
    invert_where_clause=None
)
count = int(arcpy.GetCount_management(selected)[0])
print(count)

if count>0:
    arcpy.DeleteFeatures_management(selected)

String match -  mark the rows if names don't match.

In [None]:
import collections
fc = 'intersect'
fields = ['route_id', 'RoadName', 'Shape_Length', 'notes']

custom_code = """
import collections
def string_match(f0, f1):
    a = (strip_stop_words(f0))
    b = (strip_stop_words(f1))
    string_match = bool(set(a) & set(b))
    if string_match == False:
        return "delete"
    else:
        return "keep"
"""


arcpy.management.CalculateField(
    in_table=fc,
    field="notes",
    expression="string_match(!route_id!,!RoadName!)",
    expression_type="PYTHON3",
    code_block=custom_code,
    field_type="TEXT",
    enforce_domains="NO_ENFORCE_DOMAINS"
)  


Note if there are more than one match for SC routes ID.

In [None]:
orig_fids_all = [r[0] for r in arcpy.da.SearchCursor(fc, ['ORIG_FID'])]

In [None]:

fc = 'intersect'
fields = ['route_id', 'RoadName', 'Shape_Length', 'notes']



custom_code = """

def is_unique(field):
    if orig_fids_all.count(field) == 1:
        return "unique"
    else:
        return "dupe"

"""
arcpy.management.CalculateField(
    in_table=fc,
    field="is_unique",
    expression="is_unique(!ORIG_FID!)",
    expression_type="PYTHON3",
    code_block=custom_code,
    field_type="TEXT",
    enforce_domains="NO_ENFORCE_DOMAINS"
)

- A single spatial match: keep
- Multiple spatial match: keep if string match is true

In [None]:
delete_if('intersect', "notes = 'delete' And is_unique = 'dupe'")

In [None]:
arcpy.management.DeleteIdentical(
    in_dataset="intersect",
    fields="ORIG_FID;Shape_Length",
    xy_tolerance=None,
    z_tolerance=0
)

Leave the longest segments for each orig oid.

In [None]:
fc='intersect'
orig_fids_all = [r[0] for r in arcpy.da.SearchCursor(fc, ['ORIG_FID'])]
unique_ids = unique_values('intersect', 'ORIG_FID')
dupe_ids = []
for id in unique_ids:   
    if orig_fids_all.count(id)>1:
        dupe_ids.append(id)
print(len(dupe_ids))

In [None]:
fc='intersect'

# collect ids of multiple matches
orig_fids_all = [r[0] for r in arcpy.da.SearchCursor(fc, ['ORIG_FID'])]
unique_ids = unique_values('intersect', 'ORIG_FID')
dupe_ids = []
for id in unique_ids:   
    if orig_fids_all.count(id)>1:
        dupe_ids.append(id)
print(len(dupe_ids))
    
# For Each OID, leave the longest segment and delete the rest
max_length_pair = {}
fields = ['Shape_Length']
for oid in dupe_ids:
    print('Processing oid = {}'.format(oid))
    _query = 'ORIG_FID={}'.format(oid)
    arcpy.MakeFeatureLayer_management(intersect, 'intersect_lyr')

    # delete segments smaller than the max length
    candidates = [row for row in arcpy.da.SearchCursor(
            in_table='intersect_lyr', 
            field_names=['OID@', 'Shape_Length'],
            where_clause=_query,
            sql_clause=('', 'ORDER BY Shape_Length DESC'))]

    # oid of the longest segment 
    max_oid = (candidates[0][0])
    print(max_oid)
    _query = 'OBJECTID = {}'.format(max_oid)
    # _query = 'ORIG_FID = {} and Shape_Length <{}'.format(oid, max_len)

    with arcpy.da.UpdateCursor('intersect', ['notes'], _query) as cur:
        for row in cur:
            row[0] = 'long'
            cur.updateRow(row)

Export out the long segments. 

In [None]:
arcpy.conversion.ExportFeatures(
    in_features="intersect",
    out_features="long_intersects",
    where_clause="notes='long'"
)

Run the identify dupes code and delete the 'dupe'.

Join the tables and attribute the ID. 

In [None]:
join_tb = 'intersect'

joined = arcpy.management.AddJoin(
    in_layer_or_view="routes_split",
    in_field="OBJECTID",
    join_table="{}".format(join_tb),
    join_field="ORIG_FID",
    join_type="KEEP_COMMON",
    index_join_fields="INDEX_JOIN_FIELDS"
)

arcpy.management.CalculateField(
    in_table=joined,
    field="routes_split.xd_segment_id",
    expression="!{}.XDSegID!".format(join_tb),
    expression_type="PYTHON3",
    code_block="",
    field_type="TEXT",
    enforce_domains="NO_ENFORCE_DOMAINS"
)

In [None]:
join_tb = 'long_intersects'

joined = arcpy.management.AddJoin(
    in_layer_or_view="routes_split",
    in_field="OBJECTID",
    join_table="{}".format(join_tb),
    join_field="ORIG_FID",
    join_type="KEEP_COMMON",
    index_join_fields="INDEX_JOIN_FIELDS"
)

arcpy.management.CalculateField(
    in_table=joined,
    field="routes_split.xd_segment_id",
    expression="!{}.XDSegID!".format(join_tb),
    expression_type="PYTHON3",
    code_block="",
    field_type="TEXT",
    enforce_domains="NO_ENFORCE_DOMAINS"
)

In cases where multiple candidates exist for a segment match but name values aren't available, use below method to find the link.

- Nullify where xd_segment_id IS NOT NULL And Shape_Length < 50.022172038305


In [None]:
def fill_short(distance, field, fc):
    
    arcpy.Delete_management('short_lyr')
    arcpy.Delete_management('full_lyr')
    _query = 'Shape_Length <{0} and {1} is null'.format(distance, field)
    print(_query)
    arcpy.management.MakeFeatureLayer(fc, 'short_lyr', _query)
    print(arcpy.GetCount_management('short_lyr')[0])

    arcpy.management.MakeFeatureLayer(fc, 'full_lyr', '{} is not null'.format(field))


    fields = ['SHAPE@', 'OID@', 'route_id', field]

    with arcpy.da.UpdateCursor('short_lyr', fields) as cursor:
        for row in cursor:
            geom = row[0]
            # select full layer that touches boundary of short layer.
            selected = arcpy.SelectLayerByLocation_management('full_lyr', 'BOUNDARY_TOUCHES', geom, '', 'NEW_SELECTION')
            sql="route_id = '{}'".format(row[2])
            # print(sql)
            # subselect where route_id matches. 
            sub_selection = arcpy.SelectLayerByAttribute_management(selected, 'SUBSET_SELECTION', sql)

            sid = [r[0] for r in arcpy.da.SearchCursor(sub_selection, [field])]
            # notes = [r[0] for r in arcpy.da.SearchCursor(sub_selection, ['notes'])]
            # print(sid)
            # if there are multiple segments that meet the condition, just grab the first one.
            if len(sid)>0:
                print(sid)
                row[3]=sid[0]
            
            cursor.updateRow(row)

In [None]:
fill_short(50, 'xd_segment_id', 'routes_split')

Run the "Transfer Attributes" tool (75 search distance).  Attribute the ID where name comparison is not suitable. (13 minutes)

In [None]:
# takes ~ 15 minute
arcpy.edit.TransferAttributes(
    source_features="ttr_project",
    target_features="routes_split",
    transfer_fields="XDSegID",
    search_distance="75 Feet",
    match_fields=None,
    out_match_table=None,
    transfer_rule_fields=None
)

In [None]:
# select the new match
arcpy.management.SelectLayerByAttribute(
    in_layer_or_view="routes_split",
    selection_type="NEW_SELECTION",
    where_clause="xd_segment_id IS NULL And XDSegID_1 IS NOT NULL",
    invert_where_clause=None
)

In [None]:
# transfer the ID
arcpy.management.CalculateField(
    in_table="routes_split",
    field="xd_segment_id",
    expression="!XDSegID_1!",
    expression_type="PYTHON3",
    code_block="",
    field_type="TEXT",
    enforce_domains="NO_ENFORCE_DOMAINS"
)

Run fill short again. 

In [None]:
fill_short(50, 'xd_segment_id', 'routes_split')

Identify Sun Cloud segments with no source.

In [None]:
arcpy.gapro.CreateBuffers(
    input_layer="ttr_project",
    out_feature_class="ttr_project_75",
    method="PLANAR",
    buffer_type="DISTANCE",
    buffer_field=None,
    buffer_distance="200 Feet",
    buffer_expression=None,
    dissolve_option="NONE",
    dissolve_fields=None,
    summary_fields=None,
    multipart=None
)

In [None]:
# select segments not within the buffer
selected = arcpy.management.SelectLayerByLocation(
    in_layer="routes_split",
    overlap_type="WITHIN",
    select_features="ttr_project_75",
    search_distance=None,
    selection_type="NEW_SELECTION",
    invert_spatial_relationship="INVERT"
)

In [None]:
# sub-select 
sub_sel = arcpy.management.SelectLayerByAttribute(
    in_layer_or_view=selected,
    selection_type="SUBSET_SELECTION",
    where_clause="xd_segment_id IS NULL",
    invert_where_clause=None
)

In [None]:
# attribute as 'no_source'
arcpy.management.CalculateField(
    in_table=sub_sel,
    field="xd_segment_id",
    expression="'no_source'",
    expression_type="PYTHON3",
    code_block="",
    field_type="TEXT",
    enforce_domains="NO_ENFORCE_DOMAINS"
)

In [None]:
fill_short(202, 'xd_segment_id', 'routes_split')

Review the intersections.

In [None]:
route_raw = 'sun_cloud_routes_project'
arcpy.management.UnsplitLine(route_raw, "unsplit_route")

In [None]:
arcpy.analysis.Intersect("sun_cloud_routes_project", "major_intersection", "ALL", None, "POINT")

In [None]:
arcpy.management.MultipartToSinglepart("major_intersection", "major_intersection_single")

Check the intersections points and verify match link is correct. 

### Conflation: Transfer key attributes.

Source data calculations.

In [None]:
# daily_delay = sum (FF_DelayAM, FF_DelayMid, FF_DelayPM)
arcpy.management.CalculateField(
    in_table="ttr_project",
    field="daily_delay",
    expression="!FF_DelayAM!+!FF_DelayMid!+!FF_DelayPM!",
    expression_type="PYTHON3",
    code_block="",
    field_type="DOUBLE",
    enforce_domains="NO_ENFORCE_DOMAINS"
)

In [None]:
# peak_tti = max(TTI_AM, TTI_MID, TTI_PM)

arcpy.management.CalculateField(
    in_table="ttr_project",
    field="peak_tti",
    expression="max(!TTI_AM!, !TTI_MID!, !TTI_PM!)",
    expression_type="PYTHON3",
    code_block="",
    field_type="DOUBLE",
    enforce_domains="NO_ENFORCE_DOMAINS"
)

In [None]:
# peak_pti = max(!PTI_AM!, !PTI_MID!, !PTI_PM!)
arcpy.management.CalculateField(
    in_table="ttr_project",
    field="peak_pti",
    expression="max(!PTI_AM!, !PTI_MID!, !PTI_PM!)",
    expression_type="PYTHON3",
    code_block="",
    field_type="DOUBLE",
    enforce_domains="NO_ENFORCE_DOMAINS"
)


Join by ID and calculcate fields. 

In [None]:
joined = arcpy.management.AddJoin(
    in_layer_or_view="routes_split",
    in_field="xd_segment_id",
    join_table="ttr_project",
    join_field="XDSegID",
    join_type="KEEP_COMMON",
    index_join_fields="INDEX_JOIN_FIELDS"
)

In [None]:
[f.name for f in arcpy.ListFields(joined)]

In [None]:
key_fields = [('road_name', 'RoadName'),
('miles', 'Miles'),
('free_flow_speed', 'FF85'),
('average_speed', 'SpeedAVG'),
('peak_lottr', 'WORST_LOTTR'),
('daily_delay', 'daily_delay'),
('peak_tti', 'peak_tti'),
('peak_pti', 'peak_pti')]

Calculate all key values.

In [None]:
# transfer over all the key field values
destination = 'routes_split'
source = 'ttr_project'
for destination_field, source_field in key_fields:
    # print(destination_field, source_field)
    arcpy.management.CalculateField(
    in_table=joined,
    field="{0}.{1}".format(destination, destination_field),
    expression="!{0}.{1}!".format(source, source_field),
    expression_type="PYTHON3")

### Clean up.

Decode the functional class.

In [None]:
alter_fields = [
    ('functional_class', 'functional_classification_code', 'Functional Classification Code'),
]
fc = 'routes_split'
for current, new, alias in alter_fields:
    arcpy.management.AlterField(
        in_table=fc,
        field=current, 
        new_field_name=new, 
        new_field_alias=alias)

In [None]:
fc = 'routes_split'
fc_dict = {
    1 : 'Interstate',
    2 : 'Other Freeways and Expressways',
    3 : 'Other Principal Arterial',
    4 : 'Minor Arterial',
    5 : 'Major Collector',
    6 : 'Minor Collector',
    7:  'Local'}


arcpy.CalculateField_management(
    in_table=fc,
    field="functional_classification",
    expression="fc_dict.get(!functional_classification_code!)",
    expression_type="PYTHON3",
    field_type='TEXT'
)

In [None]:
[f.name for f in arcpy.ListFields(fc)]

Delete unused fields.

In [None]:
final_fields = ['route_id',
 'functional_classification_code',
 'Shape_Length',
 'xd_segment_id',
 'road_name',
 'miles',
 'free_flow_speed',
 'average_speed',
 'peak_lottr',
 'daily_delay',
 'peak_tti',
 'peak_pti',
 'functional_classification']
req_fields = [f.name for f in arcpy.ListFields(fc) if f.required == True] 
final_fields = final_fields + req_fields

all_fields = [f.name for f in arcpy.ListFields(fc)]
delete_fields = (set(final_fields) ^ set(all_fields))
delete_fields


In [62]:
for f in delete_fields:
    print(f)
    arcpy.management.DeleteField(fc, f)

XDSegID_1
ORIG_SEQ
type
notes
ORIG_FID
XDSegID


In [None]:
arcpy.management.UnsplitLine("", "sun_cloud_travel_time", "route_id;functional_class;road_name;miles;free_flow_speed;average_speed;peak_lottr;daily_delay;peak_tti;peak_pti;xd_segment_id", None, '')