In [4]:
import geopandas as gpd
from geopandas.tools import sjoin
from match_classes import MatchableFeature, TraceSnapOptions
from match_traces import get_trace_matches

input_to_match_file = "data/macon-osm-traces-combined.geojson"
input_overture_file = "data/overture-transportation-macon.geojson"
output_file = "data/.output-macon-osm-traces-combined-match-results.txt"

In [5]:

to_match_gdf = gpd.read_file(input_to_match_file)
to_match_gdf.crs = "epsg:4326"

# add column "id_to_match"; this will be used to group all match candidates for a feature, so it must be unique within your data set.
# if your data doesn't have an id column, you can use to_match_gdf.index for example
to_match_gdf["id_to_match"] = to_match_gdf["id"] 
property_columns = to_match_gdf.columns.difference(["geometry", "id", "type"])
to_match_gdf["properties"] = to_match_gdf[property_columns].apply(lambda x: x.to_dict(), axis=1)

# construct a MatchableFeature object for each feature
to_match_gdf["feature_to_match"] = to_match_gdf.apply(lambda row: MatchableFeature(row.id, row.geometry, row.properties), axis=1)

to_match_gdf.head(3)

Unnamed: 0,id,filename,track.number,track.link,track.name,track.segment.number,track.segment.split.number,track.description,times,geometry,id_to_match,properties,feature_to_match
0,trace#0,osm-traces-page-0.gpx,0,/user/sunnypilot/traces/7824504,2023_06_05T12_07_14.093431Z.gpx,0,0,Routes from sunnypilot 2022.11.13 (HYUNDAI SON...,"[2023-06-05 12:07:14+00:00, 2023-06-05 12:07:1...","LINESTRING (-83.63079 32.85085, -83.63041 32.8...",trace#0,"{'filename': 'osm-traces-page-0.gpx', 'id_to_m...","{""id"": ""trace#0"", ""geometry"": ""LINESTRING (-83..."
1,trace#1,osm-traces-page-1.gpx,1,/user/sunnypilot/traces/7805545,2023_06_04T21_01_48.584136Z.gpx,0,0,Routes from sunnypilot 2022.11.13 (HYUNDAI ELA...,"[2023-06-04 21:01:50+00:00, 2023-06-04 21:01:5...","LINESTRING (-83.58567 32.81775, -83.58611 32.8...",trace#1,"{'filename': 'osm-traces-page-1.gpx', 'id_to_m...","{""id"": ""trace#1"", ""geometry"": ""LINESTRING (-83..."
2,trace#2,osm-traces-page-10.gpx,1,/user/sunnypilot/traces/7579456,2023_05_20T01_24_29.316387Z.gpx,0,0,Routes from sunnypilot 2023.03.29 (KIA EV6 2022).,"[2023-05-20 01:25:05+00:00, 2023-05-20 01:25:5...","LINESTRING (-83.64841 32.86189, -83.64795 32.8...",trace#2,"{'filename': 'osm-traces-page-10.gpx', 'id_to_...","{""id"": ""trace#2"", ""geometry"": ""LINESTRING (-83..."


In [6]:
overture_gdf = gpd.read_file(input_overture_file)
overture_gdf.crs = "epsg:4326"

# combine properties into a single column 
property_columns = overture_gdf.columns.difference(["geometry", "id", "type"])
overture_gdf["properties"] = overture_gdf[property_columns].apply(lambda x: x.to_dict(), axis=1)

# construct a MatchableFeature object for each feature
overture_gdf["candidate_feature"] = overture_gdf.apply(lambda row: MatchableFeature(row.id, row.geometry, row.properties), axis=1)

overture_gdf.head(3)

Unnamed: 0,id,theme,type,level,road,subType,connectors,geometry,properties,candidate_feature
0,8f44c0a3295d494-13ff6831c22569b6,transportation,connector,,,,,POINT (-83.61940 32.85803),"{'connectors': nan, 'level': nan, 'road': nan,...","{""id"": ""8f44c0a3295d494-13ff6831c22569b6"", ""ge..."
1,8f44c0a328622e0-179f6831ca420442,transportation,connector,,,,,POINT (-83.61940 32.85954),"{'connectors': nan, 'level': nan, 'road': nan,...","{""id"": ""8f44c0a328622e0-179f6831ca420442"", ""ge..."
2,8844c0a329fffff-13d76831cd6aa403,transportation,segment,,{'class': 'residential'},road,"[8f44c0a328622e0-179f6831ca420442, 8f44c0a3295...","LINESTRING (-83.61940 32.85803, -83.61940 32.8...",{'connectors': ['8f44c0a328622e0-179f6831ca420...,"{""id"": ""8844c0a329fffff-13d76831cd6aa403"", ""ge..."


In [7]:
# filter the overture features that we want to match against - in this case road segments
candidates_gdf = overture_gdf[overture_gdf["type"] == "segment"]

# project to UTM 17N to get distances in meters for the spatial join
candidates_utm_gdf = candidates_gdf.copy().to_crs(epsg=32617)
to_match_utm_gdf = to_match_gdf.copy().to_crs(epsg=32617)

# backup original geometry, we might need it later
to_match_utm_gdf["original_geometry"] = to_match_utm_gdf["geometry"].copy(deep=True)

# add 20 meters buffer to the features to match, to account for the fact that the overture features are not perfectly aligned with the features to be matched
to_match_utm_gdf["geometry"] = to_match_utm_gdf["original_geometry"].buffer(20, cap_style=2)

# spatial join between points and segments - get nearest overture feature, where distance < 100 meters
joined_gdf = sjoin(to_match_utm_gdf, candidates_utm_gdf, how="left", predicate="intersects")
joined_gdf.head(3)

Unnamed: 0,id_left,filename,track.number,track.link,track.name,track.segment.number,track.segment.split.number,track.description,times,geometry,...,index_right,id_right,theme,type,level,road,subType,connectors,properties_right,candidate_feature
0,trace#0,osm-traces-page-0.gpx,0,/user/sunnypilot/traces/7824504,2023_06_05T12_07_14.093431Z.gpx,0,0,Routes from sunnypilot 2022.11.13 (HYUNDAI SON...,"[2023-06-05 12:07:14+00:00, 2023-06-05 12:07:1...","POLYGON ((253843.595 3637795.083, 253843.700 3...",...,38065,8544c0bbfffffff-17976b4158ac1b2f,transportation,segment,,"{'class': 'motorway', 'surface': 'paved', 'fla...",road,"[8f44c0b85358892-13b7dce1a9fb1680, 8f44c0a36c0...",{'connectors': ['8f44c0b85358892-13b7dce1a9fb1...,"{""id"": ""8544c0bbfffffff-17976b4158ac1b2f"", ""ge..."
0,trace#0,osm-traces-page-0.gpx,0,/user/sunnypilot/traces/7824504,2023_06_05T12_07_14.093431Z.gpx,0,0,Routes from sunnypilot 2022.11.13 (HYUNDAI SON...,"[2023-06-05 12:07:14+00:00, 2023-06-05 12:07:1...","POLYGON ((253843.595 3637795.083, 253843.700 3...",...,37976,8544c0bbfffffff-13ff778bddf4a9fa,transportation,segment,,"{'class': 'motorway', 'surface': 'paved', 'fla...",road,"[8f44c0b8535cd53-139f9caef15c4a49, 8f44c0a36c6...",{'connectors': ['8f44c0b8535cd53-139f9caef15c4...,"{""id"": ""8544c0bbfffffff-13ff778bddf4a9fa"", ""ge..."
0,trace#0,osm-traces-page-0.gpx,0,/user/sunnypilot/traces/7824504,2023_06_05T12_07_14.093431Z.gpx,0,0,Routes from sunnypilot 2022.11.13 (HYUNDAI SON...,"[2023-06-05 12:07:14+00:00, 2023-06-05 12:07:1...","POLYGON ((253843.595 3637795.083, 253843.700 3...",...,38293,8744c0a36ffffff-13d7af21a18fd5b7,transportation,segment,,"{'class': 'motorway_link', 'surface': 'paved',...",road,"[8f44c0a3612ec01-17df38ba18201188, 8f44c0a36c0...",{'connectors': ['8f44c0a3612ec01-17df38ba18201...,"{""id"": ""8744c0a36ffffff-13d7af21a18fd5b7"", ""ge..."


In [8]:
# group join results by id_to_match, and aggregate the candidate features into a list
grouped_df = joined_gdf.groupby(["id_to_match"]).agg({"feature_to_match": "first", "candidate_feature": lambda x: list(x)})
grouped_df = grouped_df.reset_index()
grouped_df.head(3)

Unnamed: 0,id_to_match,feature_to_match,candidate_feature
0,trace#0,"{""id"": ""trace#0"", ""geometry"": ""LINESTRING (-83...","[{""id"": ""8544c0bbfffffff-17976b4158ac1b2f"", ""g..."
1,trace#1,"{""id"": ""trace#1"", ""geometry"": ""LINESTRING (-83...","[{""id"": ""8544c0bbfffffff-17976b4158ac1b2f"", ""g..."
2,trace#10,"{""id"": ""trace#10"", ""geometry"": ""LINESTRING (-8...","[{""id"": ""8544c0b3fffffff-17befb26dde99d29"", ""g..."


In [9]:
# run the trace matching algorithm for each trace feature to match
options = TraceSnapOptions(max_point_to_road_distance=10) 
grouped_df["match_result"] = grouped_df.apply(lambda row: get_trace_matches(row.feature_to_match, row.candidate_feature, options), axis=1)
grouped_df.head(3)

Unnamed: 0,id_to_match,feature_to_match,candidate_feature,match_result
0,trace#0,"{""id"": ""trace#0"", ""geometry"": ""LINESTRING (-83...","[{""id"": ""8544c0bbfffffff-17976b4158ac1b2f"", ""g...","{""id"": ""trace#0"", ""elapsed"": 1.066741599992383..."
1,trace#1,"{""id"": ""trace#1"", ""geometry"": ""LINESTRING (-83...","[{""id"": ""8544c0bbfffffff-17976b4158ac1b2f"", ""g...","{""id"": ""trace#1"", ""elapsed"": 0.321062799979699..."
2,trace#10,"{""id"": ""trace#10"", ""geometry"": ""LINESTRING (-8...","[{""id"": ""8544c0b3fffffff-17befb26dde99d29"", ""g...","{""id"": ""trace#10"", ""elapsed"": 0.02354870000272..."


In [10]:
# save the results to a file, result as one json line per feature to match
import numpy as np
results_df = grouped_df.apply(lambda x: x.match_result.to_json(), axis=1)
np.savetxt(output_file, results_df.values, fmt='%s')

In [11]:
# add some of the metrics from the result object as columns for analysis
grouped_df["source_length"] = grouped_df.apply(lambda x: x.match_result.source_length, axis=1)
grouped_df["route_length"] = grouped_df.apply(lambda x: x.match_result.route_length, axis=1)
grouped_df["points"] = grouped_df.apply(lambda x: len(x.match_result.points), axis=1)
grouped_df["points_with_matches"] = grouped_df.apply(lambda x: x.match_result.points_with_matches, axis=1)
grouped_df["avg_dist_to_road"] = grouped_df.apply(lambda x: x.match_result.avg_dist_to_road, axis=1)
grouped_df["sequence_breaks"] = grouped_df.apply(lambda x: x.match_result.sequence_breaks, axis=1)
grouped_df["revisited_via_points"] = grouped_df.apply(lambda x: x.match_result.revisited_via_points, axis=1)
grouped_df["revisited_segments"] = grouped_df.apply(lambda x: x.match_result.revisited_segments, axis=1)
grouped_df["candidates_count"] = grouped_df.apply(lambda x: x.match_result.target_candidates_count, axis=1)
grouped_df["matched_segments"] = grouped_df.apply(lambda x: len(x.match_result.matched_target_ids), axis=1)
grouped_df["elapsed"] = grouped_df.apply(lambda x: x.match_result.elapsed, axis=1)
grouped_df.head(3)

Unnamed: 0,id_to_match,feature_to_match,candidate_feature,match_result,source_length,route_length,points,points_with_matches,avg_dist_to_road,sequence_breaks,revisited_via_points,revisited_segments,candidates_count,matched_segments,elapsed
0,trace#0,"{""id"": ""trace#0"", ""geometry"": ""LINESTRING (-83...","[{""id"": ""8544c0bbfffffff-17976b4158ac1b2f"", ""g...","{""id"": ""trace#0"", ""elapsed"": 1.066741599992383...",5165.3,2896.77,101,58,2.21,3,0,0,31,6,1.066742
1,trace#1,"{""id"": ""trace#1"", ""geometry"": ""LINESTRING (-83...","[{""id"": ""8544c0bbfffffff-17976b4158ac1b2f"", ""g...","{""id"": ""trace#1"", ""elapsed"": 0.321062799979699...",5165.4,5167.13,101,101,3.12,0,0,0,18,4,0.321063
2,trace#10,"{""id"": ""trace#10"", ""geometry"": ""LINESTRING (-8...","[{""id"": ""8544c0b3fffffff-17befb26dde99d29"", ""g...","{""id"": ""trace#10"", ""elapsed"": 0.02354870000272...",3383.8,3210.86,37,35,4.06,0,0,0,1,1,0.023549


In [12]:
grouped_df.describe()

Unnamed: 0,source_length,route_length,points,points_with_matches,avg_dist_to_road,sequence_breaks,revisited_via_points,revisited_segments,candidates_count,matched_segments,elapsed
count,157.0,157.0,157.0,157.0,150.0,157.0,157.0,157.0,157.0,157.0,157.0
mean,3964.767771,3631.048854,65.242038,59.66879,2.9142,0.305732,0.044586,0.0,36.318471,8.292994,0.46132
std,1884.553842,1983.310331,29.280343,30.617656,1.25305,0.730844,0.207053,0.0,61.676268,13.659908,0.832454
min,518.05,0.0,11.0,0.0,0.23,0.0,0.0,0.0,1.0,0.0,0.00576
25%,2807.68,2098.5,38.0,36.0,2.05,0.0,0.0,0.0,2.0,1.0,0.033741
50%,3429.54,3399.45,67.0,66.0,2.81,0.0,0.0,0.0,9.0,2.0,0.068895
75%,5165.47,5165.92,101.0,89.0,3.69,0.0,0.0,0.0,41.0,7.0,0.482268
max,9143.78,8810.84,101.0,101.0,7.58,4.0,1.0,0.0,329.0,64.0,4.586015
