In [1]:
import geopandas as gpd
from geopandas.tools import sjoin
from match_classes import MatchableFeature
from match_generic import get_matches

input_to_match_file = "data/macon-line-segments.csv"
input_overture_file = "data/overture-transportation-macon.geojson"
output_file = "data/.output-macon-line-segments-matched.csv"

In [2]:
to_match_gdf = gpd.read_file(input_to_match_file, geometry="wkt")
to_match_gdf.crs = "epsg:4326"

# add column "id_to_match"; this will be used to group all match candidates for a feature, so it must be unique within your data set.
# if your data doesn't have an id column, you can use to_match_gdf.index for example
property_columns = to_match_gdf.columns.difference(["geometry", "my_id", "type"])
to_match_gdf["properties"] = to_match_gdf[property_columns].apply(lambda x: x.to_dict(), axis=1)
to_match_gdf["id_to_match"] = to_match_gdf["my_id"] 

# construct a MatchableFeature object for each feature
to_match_gdf["feature_to_match"] = to_match_gdf.apply(lambda row: MatchableFeature(row.my_id, row.geometry, row.properties), axis=1)

to_match_gdf

Unnamed: 0,my_id,wkt,payload,geometry,properties,id_to_match,feature_to_match
0,ID100A,"LINESTRING (-83.6411184 32.8294379, -83.641547...","mock payload1 - small segment, suburb","LINESTRING (-83.64112 32.82944, -83.64155 32.8...","{'payload': 'mock payload1 - small segment, su...",ID100A,"{""id"": ""ID100A"", ""geometry"": ""LINESTRING (-83...."
1,ID100B,"LINESTRING (-83.6499179 32.8333279, -83.649365...",mock payload2,"LINESTRING (-83.64992 32.83333, -83.64937 32.8...","{'payload': 'mock payload2', 'wkt': 'LINESTRIN...",ID100B,"{""id"": ""ID100B"", ""geometry"": ""LINESTRING (-83...."
2,ID100C,"LINESTRING (-83.6457156 32.8290998, -83.645183...","mock payload3 - spans two segments, suburb","LINESTRING (-83.64572 32.82910, -83.64518 32.8...",{'payload': 'mock payload3 - spans two segment...,ID100C,"{""id"": ""ID100C"", ""geometry"": ""LINESTRING (-83...."
3,ID100D,"LINESTRING (-83.6449266 32.8558541, -83.646460...",mock payload4 - spans multiple small intersect...,"LINESTRING (-83.64493 32.85585, -83.64646 32.8...",{'payload': 'mock payload4 - spans multiple sm...,ID100D,"{""id"": ""ID100D"", ""geometry"": ""LINESTRING (-83...."


In [3]:
overture_gdf = gpd.read_file(input_overture_file)
overture_gdf.crs = "epsg:4326"

# combine properties into a single column 
property_columns = overture_gdf.columns.difference(["geometry", "id", "type"])
overture_gdf["properties"] = overture_gdf[property_columns].apply(lambda x: x.to_dict(), axis=1)
overture_gdf["candidate_feature"] = overture_gdf.apply(lambda row: MatchableFeature(row.id, row.geometry, row.properties), axis=1)

overture_gdf.head(3)

Unnamed: 0,id,theme,type,level,road,subType,connectors,geometry,properties,candidate_feature
0,66292841,transportation,connector,,,,,POINT (-83.61940 32.85803),"{'connectors': nan, 'level': nan, 'road': nan,...","{""id"": ""66292841"", ""geometry"": ""POINT (-83.619..."
1,66292844,transportation,connector,,,,,POINT (-83.61940 32.85954),"{'connectors': nan, 'level': nan, 'road': nan,...","{""id"": ""66292844"", ""geometry"": ""POINT (-83.619..."
2,9024666,transportation,segment,,{'class': 'residential'},road,"[66292844, 66292841]","LINESTRING (-83.61940 32.85803, -83.61940 32.8...","{'connectors': ['66292844', '66292841'], 'leve...","{""id"": ""9024666"", ""geometry"": ""LINESTRING (-83..."


In [4]:
# filter the overture features that we want to match against - in this case road segments
candidates_gdf = overture_gdf[overture_gdf["type"] == "segment"]

# project to UTM 17N to get distances in meters
candidates_utm_gdf = candidates_gdf.copy().to_crs(epsg=32617)
to_match_utm_gdf = to_match_gdf.copy().to_crs(epsg=32617)

# backup original geometry before we add buffer
to_match_utm_gdf["original_geometry"] = to_match_utm_gdf["geometry"].copy(deep=True)

# add 20 meters buffer to the features to match, to account for the fact that the overture features are not perfectly aligned with the features to be matched
to_match_utm_gdf["geometry"] = to_match_utm_gdf["original_geometry"].buffer(20, cap_style=2)

# backup the geometry in a different column name before the join, we want to keep both geometries
#candidates_utm_gdf["candidate_geometry"] = candidates_utm_gdf["geometry"].copy()

# spatial join between points and segments - get nearest overture feature, where distance < 100 meters
joined_gdf = sjoin(to_match_utm_gdf, candidates_utm_gdf, how="left", predicate="intersects")

joined_gdf.head(3)

Unnamed: 0,my_id,wkt,payload,geometry,properties_left,id_to_match,feature_to_match,original_geometry,index_right,id,theme,type,level,road,subType,connectors,properties_right,candidate_feature
0,ID100A,"LINESTRING (-83.6411184 32.8294379, -83.641547...","mock payload1 - small segment, suburb","POLYGON ((252741.919 3635404.132, 252710.494 3...","{'payload': 'mock payload1 - small segment, su...",ID100A,"{""id"": ""ID100A"", ""geometry"": ""LINESTRING (-83....","LINESTRING (252767.739 3635469.246, 252726.206...",14286,100000000420,transportation,segment,,{'class': 'residential'},road,"[66310597, 66310592]","{'connectors': ['66310597', '66310592'], 'leve...","{""id"": ""100000000420"", ""geometry"": ""LINESTRING..."
1,ID100B,"LINESTRING (-83.6499179 32.8333279, -83.649365...",mock payload2,"POLYGON ((252017.089 3635901.304, 251993.858 3...","{'payload': 'mock payload2', 'wkt': 'LINESTRIN...",ID100B,"{""id"": ""ID100B"", ""geometry"": ""LINESTRING (-83....","LINESTRING (251954.610 3635921.312, 252005.474...",14675,100000000640,transportation,segment,,"{'class': 'tertiary', 'surface': 'paved'}",road,"[66314949, 1619461805]","{'connectors': ['66314949', '1619461805'], 'le...","{""id"": ""100000000640"", ""geometry"": ""LINESTRING..."
1,ID100B,"LINESTRING (-83.6499179 32.8333279, -83.649365...",mock payload2,"POLYGON ((252017.089 3635901.304, 251993.858 3...","{'payload': 'mock payload2', 'wkt': 'LINESTRIN...",ID100B,"{""id"": ""ID100B"", ""geometry"": ""LINESTRING (-83....","LINESTRING (251954.610 3635921.312, 252005.474...",4702,148887776,transportation,segment,,"{'class': 'tertiary', 'surface': 'paved', 'res...",road,"[66393574, 66314949]","{'connectors': ['66393574', '66314949'], 'leve...","{""id"": ""148887776"", ""geometry"": ""LINESTRING (-..."


In [5]:
# group join results by id_to_match, and aggregate the candidate features into a list
grouped_df = joined_gdf.groupby(['id_to_match']).agg({'feature_to_match': 'first', 'candidate_feature': lambda x: list(x)})
grouped_df = grouped_df.reset_index()
grouped_df.head()

Unnamed: 0,id_to_match,feature_to_match,candidate_feature
0,ID100A,"{""id"": ""ID100A"", ""geometry"": ""LINESTRING (-83....","[{""id"": ""100000000420"", ""geometry"": ""LINESTRIN..."
1,ID100B,"{""id"": ""ID100B"", ""geometry"": ""LINESTRING (-83....","[{""id"": ""100000000640"", ""geometry"": ""LINESTRIN..."
2,ID100C,"{""id"": ""ID100C"", ""geometry"": ""LINESTRING (-83....","[{""id"": ""100000010934"", ""geometry"": ""LINESTRIN..."
3,ID100D,"{""id"": ""ID100D"", ""geometry"": ""LINESTRING (-83....","[{""id"": ""100000013111"", ""geometry"": ""LINESTRIN..."


In [6]:
# construct a `MatchableFeature` object out of each feature-to-match and call `get_matches(feature_to_match, overture_feature_candidates)`
grouped_df["match_result"] = grouped_df.apply(lambda row: get_matches(row["feature_to_match"], row["candidate_feature"], buffer=0.0001, min_buffered_overlap_ratio=0.3), axis=1)
grouped_df.head()

Unnamed: 0,id_to_match,feature_to_match,candidate_feature,match_result
0,ID100A,"{""id"": ""ID100A"", ""geometry"": ""LINESTRING (-83....","[{""id"": ""100000000420"", ""geometry"": ""LINESTRIN...","{""id"": ""ID100A"", ""source_wkt"": ""LINESTRING (-8..."
1,ID100B,"{""id"": ""ID100B"", ""geometry"": ""LINESTRING (-83....","[{""id"": ""100000000640"", ""geometry"": ""LINESTRIN...","{""id"": ""ID100B"", ""source_wkt"": ""LINESTRING (-8..."
2,ID100C,"{""id"": ""ID100C"", ""geometry"": ""LINESTRING (-83....","[{""id"": ""100000010934"", ""geometry"": ""LINESTRIN...","{""id"": ""ID100C"", ""source_wkt"": ""LINESTRING (-8..."
3,ID100D,"{""id"": ""ID100D"", ""geometry"": ""LINESTRING (-83....","[{""id"": ""100000013111"", ""geometry"": ""LINESTRIN...","{""id"": ""ID100D"", ""source_wkt"": ""LINESTRING (-8..."


In [7]:
grouped_df["source_geometry"] = grouped_df["match_result"].apply(lambda x: x.source_feature.geometry)
grouped_df["elapsed"] = grouped_df["match_result"].apply(lambda x: x.elapsed)
grouped_df["matched_feature"] = grouped_df["match_result"].apply(lambda x: x.matched_features)
exploded_df = grouped_df.explode("matched_feature")
exploded_df["matched_gers_id"] = exploded_df["matched_feature"].apply(lambda x: x.id)
exploded_df["matched_geometry"] = exploded_df["matched_feature"].apply(lambda x: x.matched_feature.geometry)
exploded_df["matched_overlapping_geometry"] = exploded_df["matched_feature"].apply(lambda x: x.overlapping_geometry)
exploded_df["score"] = exploded_df["matched_feature"].apply(lambda x: x.score)
exploded_df["source_lr"] = exploded_df["matched_feature"].apply(lambda x: x.source_lr)
exploded_df["candidate_lr"] = exploded_df["matched_feature"].apply(lambda x: x.candidate_lr)
exploded_df.set_geometry("source_geometry")
exploded_df.head(3)

Unnamed: 0,id_to_match,feature_to_match,candidate_feature,match_result,source_geometry,elapsed,matched_feature,matched_gers_id,matched_geometry,matched_overlapping_geometry,score,source_lr,candidate_lr
0,ID100A,"{""id"": ""ID100A"", ""geometry"": ""LINESTRING (-83....","[{""id"": ""100000000420"", ""geometry"": ""LINESTRIN...","{""id"": ""ID100A"", ""source_wkt"": ""LINESTRING (-8...","LINESTRING (-83.6411184 32.8294379, -83.641547...",0.001637,"{""id"": ""100000000420"", ""candidate_wkt"": ""LINES...",100000000420,"LINESTRING (-83.640883 32.829695, -83.64130800...","LINESTRING (-83.6411184 32.8294379, -83.641547...",0.7637037,"[0.0, 1.0]","[0.23, 0.67]"
1,ID100B,"{""id"": ""ID100B"", ""geometry"": ""LINESTRING (-83....","[{""id"": ""100000000640"", ""geometry"": ""LINESTRIN...","{""id"": ""ID100B"", ""source_wkt"": ""LINESTRING (-8...","LINESTRING (-83.6499179 32.8333279, -83.649365...",0.003721,"{""id"": ""148887776"", ""candidate_wkt"": ""LINESTRI...",148887776,"LINESTRING (-83.649354 32.832986000000005, -83...","LINESTRING (-83.6499179 32.8333279, -83.649365...",0.8283938,"[0.0, 1.0]","[0.03, 1.0]"
1,ID100B,"{""id"": ""ID100B"", ""geometry"": ""LINESTRING (-83....","[{""id"": ""100000000640"", ""geometry"": ""LINESTRIN...","{""id"": ""ID100B"", ""source_wkt"": ""LINESTRING (-8...","LINESTRING (-83.6499179 32.8333279, -83.649365...",0.003721,"{""id"": ""893232463"", ""candidate_wkt"": ""LINESTRI...",893232463,LINESTRING (-83.64978980000001 32.832982300000...,LINESTRING (-83.64991523455099 32.833326378195...,4.680556e-11,"[0.0, 0.28]","[0.54, 0.99]"
