In [1]:
import os
import sys
import pandas as pd
import geopandas as gpd
import getpass

user = getpass.getuser().lower()

MAPPYMATCH_PATH = f"/Users/{user}/Documents/GitHub/mappymatch"
sys.path.insert(0, MAPPYMATCH_PATH)

from mappymatch import package_root
from mappymatch.constructs.trace import Trace
from mappymatch.utils.plot import plot_trace
from mappymatch.utils.plot import plot_geofence
from mappymatch.constructs.geofence import Geofence
from mappymatch.maps.nx.nx_map import NxMap, NetworkType
from mappymatch.utils.plot import plot_map
from mappymatch.matchers.lcss.lcss import LCSSMatcher
from mappymatch.utils.plot import plot_matches
from mappymatch.utils.plot import plot_path

# !pip install mappymatch

## Define functions

In [17]:
# create a batch process function to create a list of traces

def create_batch_traces(df, trip_id_column, xy=True):
    """Create a batch of traces from a dataframe with xy coordinates

    Args:
        df (Pandas Dataframe): Dataframe with xy coordinates in EPGS:4326.
        trip_id_column (String): Column name with unique trip ids.
        xy (bool, optional): Projects trace to EPSG:3857. Defaults to True.

    Returns:
        List: list of dictionaries with trip_id and trace.
    """
    from shapely.geometry import LineString

    unique_ids = df[trip_id_column].unique()
    batch_traces = []
    for i in unique_ids:
        filter_df = df[df["trip_id"] == i]
        gdf = gpd.GeoDataFrame(
            filter_df, geometry=gpd.points_from_xy(filter_df.lon, filter_df.lat), crs=4326
        )
        batch_trace = Trace.from_geo_dataframe(frame=gdf, xy=xy)

        # create a trace_line_gdf from the trace
        coords = [(p.x, p.y) for p in batch_trace.coords]
        line = LineString(coords)
        trace_line_gdf = gpd.GeoDataFrame([{"geometry": line}], crs="EPSG:3857")
        trace_line_gdf["trip_id"] = i

        # create a trace_gdf from the trace
        trace_gdf = batch_trace._frame
        trace_gdf["trip_id"] = i

        # create a dictionary with the trip_id, trace, trace_gdf, and trace_line_gdf and append to the batch_traces list
        trace_dict = {
            "trip_id": i,
            "trace": batch_trace,
            "trace_gdf": trace_gdf,
            "trace_line_gdf": trace_line_gdf,
        }
        batch_traces.append(trace_dict)
    return batch_traces

In [18]:
# # create a geodataframe from coordinates.
# # import geopandas as gpd
# from shapely.geometry import LineString

# coords = [(p.x, p.y) for p in batch_traces[0]["trace"].coords]

# # create a linestring from coordinates

# line = LineString(coords)

# gdf = gpd.GeoDataFrame([{'geometry': line}], crs="EPSG:3857")

In [19]:
# create a function that takes a list of traces and batch processes them using the LCSS matcher


def batch_process_traces(traces, geofence_buffer=1000, network_type=NetworkType.DRIVE):
    """Batch process traces using the LCSS matcher.

    The function creates a geofence around each trace and creates a networkx graph from the geofence.
    Returns a list of matched traces.

    Args:
        traces (List): list of dictionaries with trip_id and trace.
        geofence_buffer (int, optional): Buffer in meters. Defaults to 100.
        network_type (Enumerator, optional): Enumerator for Network Types supported by osmnx. Defaults to NetworkType.DRIVE.

    Returns:
        List: List of matched traces
    """
    import osmnx as ox
    import networkx as nx

    matched_traces = []
    for trace_dict in traces:
        # create a geofence around the trace
        geofence = Geofence.from_trace(trace_dict["trace"], padding=geofence_buffer)

        # create a networkx map from the geofence
        nx_map = NxMap.from_geofence(geofence, network_type=network_type)

        # match the trace to the map
        matcher = LCSSMatcher(nx_map)
        match_result = matcher.match_trace(trace_dict["trace"])

        # add full match result to the trace dictionary
        trace_dict["matched_result"] = match_result
        matched_traces.append(trace_dict)

        # create a geodataframe from the matches and add the trip_id; add the match result and matched df to the trace dictionary
        matched_df = match_result.matches_to_dataframe()
        matched_df["trip_id"] = trace_dict["trip_id"]
        matched_gdf = gpd.GeoDataFrame(matched_df, geometry="geom", crs="EPSG:3857")

        # create a geodataframe from the matched path and add the trip_id; add the match result and matched df to the trace dictionary
        matched_path_df = match_result.path_to_dataframe()
        matched_path_df["trip_id"] = trace_dict["trip_id"]
        matched_path_gdf = gpd.GeoDataFrame(matched_path_df, geometry="geom", crs="EPSG:3857")

        attrs = ["ref", "name", "maxspeed", "highway", "bridge", "tunnel"]
        for attr in attrs:
            # get attributes from the raw graph
            attr_dict = nx.get_edge_attributes(nx_map.g, attr)
            # add attributes to the matched gdf
            matched_gdf[attr] = matched_gdf["road_id"].map(attr_dict)
            # add attributes to the matched path gdf
            matched_path_gdf[attr] = matched_path_gdf["road_id"].map(attr_dict)

        trace_dict["matched_gdf"] = matched_gdf

        trace_dict["matched_path_gdf"] = matched_path_gdf

    return matched_traces

In [20]:
# create a function that takes a list of dictionaries with matched trace geodataframes, concatenates them, and returns a single geodataframe

def concatenate_matched_gdfs(matched_traces, match_type="matched_gdf"):
    """Concatenate matched trace geodataframes into a single geodataframe.

    Args:
        matched_traces (List): List of dictionaries with matched trace geodataframes.
        match_type (String, optional): Type of match to concatenate. Defaults to "matched_gdf". 
        Options are "matched_gdf", "matched_path_gdf", "trace_gdf".

    Returns:
        GeoDataFrame: Concatenated geodataframe.
    """
    matched_gdfs = []
    for trace_dict in matched_traces:
        matched_gdfs.append(trace_dict[match_type])
    matched_gdf = pd.concat(matched_gdfs)
    return matched_gdf

## Prepare the data

In [21]:
## Define file name
location_tbl = 'location.csv'
trip_tbl = 'trip.csv'

## Define Box System Root Directory
box_dir = os.path.join(
    "/Users", user, "Library", "CloudStorage", "Box-Box"
    )

## Define BAUS directory on Box for .csv output files
file_dir = os.path.join(
    box_dir, "Modeling and Surveys", "Surveys", "Travel Diary Survey",
    "Biennial Travel Diary Survey", "Data",'2023', "Full Unweighted 2023 Dataset"
)

location_path = os.path.join(file_dir, location_tbl)
trip_path = os.path.join(file_dir, trip_tbl)

In [22]:
# read location and trip 
location_df = pd.read_csv(location_path)
trip_df = pd.read_csv(trip_path)

In [23]:
# merge trips with locations
trip_locations = pd.merge(
    location_df,
    trip_df[
        [
            "trip_id",
            "o_in_region",
            "d_in_region",
            "mode_type",
            "mode_1",
            "mode_2",
            "mode_3",
            "mode_4",
        ]
    ],
    on="trip_id",
)

In [24]:
trip_locations.head()

Unnamed: 0,trip_id,collect_time,accuracy,bearing,speed,lat,lon,o_in_region,d_in_region,mode_type,mode_1,mode_2,mode_3,mode_4
0,2333407402022,2023-11-02T00:23:43Z,13.0,120.0,4.0,37.8527,-122.21255,1,1,2,2,995,995,995
1,2333407402022,2023-11-02T00:23:50Z,8.0,175.0,4.0,37.85227,-122.21236,1,1,2,2,995,995,995
2,2333407402022,2023-11-02T00:24:04Z,12.0,185.0,4.0,37.85163,-122.21239,1,1,2,2,995,995,995
3,2333407402022,2023-11-02T00:24:23Z,8.0,129.0,4.0,37.85092,-122.21197,1,1,2,2,995,995,995
4,2333407402022,2023-11-02T00:24:49Z,11.0,73.0,4.0,37.85138,-122.21071,1,1,2,2,995,995,995


In [25]:
# # filter trips_locations to only include trips with mode 8 (car) with origins and destinations in region
car_trips = trip_locations[
    (trip_locations["mode_type"] == 8)
    & (trip_locations["o_in_region"] == 1)
    & (trip_locations["d_in_region"] == 1)
]

In [26]:
# filter trips_locations to only include trips with mode 8 in mode_1 or mode_2 or mode_3 or mode_4 columns with origins and destinations in region

# car_trips = trip_locations[
#     (
#         (trip_locations["mode_1"] == 8)
#         | (trip_locations["mode_2"] == 8)
#         | (trip_locations["mode_3"] == 8)
#         | (trip_locations["mode_4"] == 8)
#     )
#     & (trip_locations["o_in_region"] == 1)
#     & (trip_locations["d_in_region"] == 1)
# ]

In [27]:
car_trips.head()

Unnamed: 0,trip_id,collect_time,accuracy,bearing,speed,lat,lon,o_in_region,d_in_region,mode_type,mode_1,mode_2,mode_3,mode_4
157,2333407402024,2023-11-02T02:30:10Z,14.0,-1.0,-1.0,37.85953,-122.26371,1,1,8,6,995,995,995
158,2333407402024,2023-11-02T02:30:52Z,40.0,-1.0,-1.0,37.85855,-122.2667,1,1,8,6,995,995,995
159,2333407402024,2023-11-02T02:30:58Z,21.0,242.0,1.0,37.85817,-122.26681,1,1,8,6,995,995,995
160,2333407402024,2023-11-02T02:31:00Z,4.0,262.0,2.0,37.85849,-122.2669,1,1,8,6,995,995,995
161,2333407402024,2023-11-02T02:31:38Z,14.0,239.0,2.0,37.85838,-122.26789,1,1,8,6,995,995,995


In [28]:
car_trips.shape

(4419119, 14)

In [29]:
test_list = [
    2304076901001, #highway
    2333407402028, #highway
    2304076901002, #highway
    2347455701047, #highway
    # 2333407402031, #might be too long
    2333407402037,
]

In [30]:
car_trips_test = car_trips[car_trips['trip_id'].isin(test_list)]

In [31]:
# create batch traces
batch_traces = create_batch_traces(car_trips_test, trip_id_column="trip_id", xy=True)
# batch_traces = create_batch_traces(car_trips, unique_ids, xy=True)

## Explore using the LCSS Matcher

In [35]:
match_result = batch_process_traces(
    traces=batch_traces, geofence_buffer=1000, network_type=NetworkType.DRIVE
)



In [21]:
test_df = concatenate_matched_gdfs(match_result, match_type="trace_gdf")

In [None]:
match_result[1]["matched_path_gdf"].explore()

In [None]:
match_result[0]['matched_result']

In [None]:
match_result[0].path_to_dataframe()

In [None]:
display(match_result[0].path)

In [None]:
plot_path(match_result[0].path, crs=3857)

In [None]:
match_result[0].matches_to_dataframe()

In [None]:
result_df = match_result[0].path_to_dataframe()
gpd.GeoDataFrame(result_df, geometry='geom', crs="EPSG:3857").explore()

In [None]:
# plot the matched results
plot_matches(match_result[0])

In [None]:
# plot the trace, along with the estimated path through the network
plot_trace(batch_traces[0], point_color="blue", m=plot_path(match_result[0].path, crs=3857))

In [None]:
# plot the matched results
plot_matches(match_result[1])

In [None]:
# plot the trace, along with the estimated path through the network
plot_trace(batch_traces[1], point_color="blue", m=plot_path(match_result[1].path, crs=3857))

In [None]:
# plot the matched results
plot_matches(match_result[3].matches)

In [None]:
# plot the trace, along with the estimated path through the network
plot_trace(batch_traces[3], point_color="blue", m=plot_path(match_result[3].path, crs=3857))

In [None]:
# plot the matched results
plot_matches(match_result[4].matches)

In [None]:
# plot the trace, along with the estimated path through the network
plot_trace(batch_traces[4], point_color="blue", m=plot_path(match_result[4].path, crs=3857))

In [None]:
# result_df = match_result.matches_to_dataframe()
# result_df

In [None]:
# gpd.GeoDataFrame(result_df, geometry='geom', crs="EPSG:3857").explore()

## Explore the OSRM Matcher

In [None]:
from mappymatch.matchers.osrm import OsrmMatcher

In [None]:
matcher = OsrmMatcher()

In [None]:
# create batch traces in 4326
batch_traces_4326 = create_batch_traces(car_trips, test_list, xy=False)

In [None]:
plot_trace(batch_traces_4326[0], line_color="yellow", point_color="black")

In [None]:
match_result = matcher.match_trace(batch_traces_4326[0])

In [None]:
match_result.matches

In [None]:
# no geometry :( 

In [None]:
# plot_trace(trace, point_color="blue", m=plot_path(match_result.path, crs=3857))

## Explore using the Valhalla Matcher

In [None]:
from mappymatch.matchers.valhalla import ValhallaMatcher

In [None]:
matcher = ValhallaMatcher(attributes={'edge.length', 'edge.speed', 'edge.names', 'edge.road_class'})

In [None]:
# trace = Trace.from_geo_dataframe(gdf, xy=False)

In [None]:
match_result = matcher.match_trace_batch(batch_traces)

In [None]:
# match_result[9]

In [None]:
plot_matches(match_result[0].matches, crs="EPSG:4326")

In [None]:
plot_trace(batch_traces[0], point_color="blue", m=plot_path(match_result[0].path, crs="EPSG:4326"))

In [None]:
plot_matches(match_result[1].matches, crs="EPSG:4326")

In [None]:
plot_trace(batch_traces[1], point_color="blue", m=plot_path(match_result[1].path, crs="EPSG:4326"))

In [None]:
plot_matches(match_result[3].matches, crs="EPSG:4326")

In [None]:
plot_trace(batch_traces[3], point_color="blue", m=plot_path(match_result[3].path, crs="EPSG:4326"))

In [None]:
plot_matches(match_result[4].matches, crs="EPSG:4326")

In [None]:
plot_trace(batch_traces[4], point_color="blue", m=plot_path(match_result[4].path, crs="EPSG:4326"))