In [None]:
# for reading and validating data
import emeval.input.spec_details as eisd
import emeval.input.phone_view as eipv
import emeval.input.eval_view as eiev

In [None]:
# Visualization helpers
import emeval.viz.phone_view as ezpv
import emeval.viz.eval_view as ezev
import pandas as pd

In [None]:
# For computation
import numpy as np
import math
import scipy.stats as stats
import matplotlib.pyplot as plt

In [None]:
import geopandas as gpd
import shapely as shp
import scipy.interpolate as spi

In [None]:
import arrow

In [None]:
R = 6371000

In [None]:
DATASTORE_URL = "http://cardshark.cs.berkeley.edu"
AUTHOR_EMAIL = "shankari@eecs.berkeley.edu"
sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "unimodal_trip_car_bike_mtv_la")
sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "car_scooter_brex_san_jose")
sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "train_bus_ebike_mtv_ucb")

In [None]:
import importlib
importlib.reload(ezpv)

In [None]:
pv_la = eipv.PhoneView(sd_la)

In [None]:
pv_sj = eipv.PhoneView(sd_sj)

In [None]:
pv_ucb = eipv.PhoneView(sd_ucb)

### Experimenting with creating temporal reference trajectories

####  `shapely.interp1d`: Easy case (timestamp based)

Proposed algorithm:
- filter out points that are too far away from the spatial ground truth
- Next try two approaches:
    - map ts -> lat, and ts -> lng *for the filtered points only* for trajectories
        - or parts of trajectories (?) that have a reasonable density of filtered points
        - reconstitute reference trajectories for new timestamps based on the new functions
        - if both trajectories are sufficiently dense, can take mean? can pick one?
    - for the remaining points, map (lat,lng) -> ts *for the filtered points only* 
    - for the remaining points, map distance along line -> ts *for the filtered points only* 
        - and do the same things for both

In [None]:
android_trip = pv_la.map()["android"]["ucb-sdb-android-1"]["evaluation_ranges"][1]["evaluation_trip_ranges"][0]["evaluation_section_ranges"][1]
ios_trip = pv_la.map()["ios"]["ucb-sdb-ios-1"]["evaluation_ranges"][1]["evaluation_trip_ranges"][0]["evaluation_section_ranges"][1]
android_accuracy_df = android_trip["location_df"]
ios_accuracy_df = ios_trip["location_df"]
gt_leg = sd_la.get_ground_truth_for_leg(android_trip["trip_id_base"], ios_trip["trip_id_base"])
gt_linestring = shp.geometry.LineString(coordinates=gt_leg["route_coords"]["geometry"]["coordinates"])

In [None]:
android_control_gpdf = gpd.GeoDataFrame(
        android_accuracy_df, geometry=android_accuracy_df.apply(lambda lr: shp.geometry.Point(lr.longitude, lr.latitude), axis=1))
ios_control_gpdf = gpd.GeoDataFrame(
        ios_accuracy_df, geometry=ios_accuracy_df.apply(lambda lr: shp.geometry.Point(lr.longitude, lr.latitude), axis=1))

In [None]:
android_gt_distances = android_control_gpdf.distance(gt_linestring) * (R / 360)
ios_gt_distances = ios_control_gpdf.distance(gt_linestring) * (R / 360)

In [None]:
filtered_android_control_gpdf = android_control_gpdf[android_gt_distances <= 5]
filtered_ios_control_gpdf = ios_control_gpdf[ios_gt_distances <= 5]

In [None]:
len(filtered_android_control_gpdf)/len(android_control_gpdf), len(filtered_ios_control_gpdf)/len(ios_control_gpdf)

In [None]:
ifig, ax_array = plt.subplots(nrows=1, ncols=2, figsize=(20,20))
android_control_gpdf.plot(ax=ax_array[0])
filtered_android_control_gpdf.plot(ax=ax_array[0])
ios_control_gpdf.plot(ax=ax_array[1])
filtered_ios_control_gpdf.plot(ax=ax_array[1])

In [None]:
len(android_control_gpdf), len(ios_control_gpdf)

In [None]:
import scipy.interpolate as sci

In [None]:
android_lat_fn = sci.interp1d(x=filtered_android_control_gpdf.ts, y=filtered_android_control_gpdf.latitude, fill_value="extrapolate")
android_lon_fn = sci.interp1d(x=filtered_android_control_gpdf.ts, y=filtered_android_control_gpdf.longitude, fill_value="extrapolate")
ios_lat_fn = sci.interp1d(x=filtered_ios_control_gpdf.ts, y=filtered_ios_control_gpdf.latitude, fill_value="extrapolate")
ios_lon_fn = sci.interp1d(x=filtered_ios_control_gpdf.ts, y=filtered_ios_control_gpdf.longitude, fill_value="extrapolate")

In [None]:
new_ts_range = np.arange(android_trip["start_ts"], android_trip["end_ts"], 1)
new_fmt_time_range = [arrow.get(ts).to(sd_la.eval_tz) for ts in new_ts_range]

In [None]:
new_android_lat = android_lat_fn(new_ts_range)
new_android_lng = android_lon_fn(new_ts_range)
new_ios_lat = ios_lat_fn(new_ts_range)
new_ios_lng = ios_lon_fn(new_ts_range)
new_android_gpdf = gpd.GeoDataFrame({
    "latitude": new_android_lat,
    "longitude": new_android_lng,
    "ts": new_ts_range,
    "fmt_time": new_fmt_time_range,
    "geometry": [shp.geometry.Point(x, y) for x, y in zip(new_android_lng, new_android_lat)]
})
new_ios_gpdf = gpd.GeoDataFrame({
    "latitude": new_ios_lat,
    "longitude": new_ios_lng,
    "ts": new_ts_range,
    "fmt_time": new_fmt_time_range,
    "geometry": [shp.geometry.Point(x, y) for x, y in zip(new_ios_lng, new_ios_lat)]
})

In [None]:
new_android_gpdf[["fmt_time", "geometry"]].head(), new_ios_gpdf[["fmt_time", "geometry"]].head()

In [None]:
distances = new_android_gpdf.distance(gt_linestring) * (R/360); distances.head()

In [None]:
valid_mask = distances <= 2; np.nonzero(np.logical_not(valid_mask))

In [None]:
filtered_new_android_gpdf = new_android_gpdf[new_android_gpdf.distance(gt_linestring) * (R/360) <= 5]; filtered_new_android_gpdf.head()
filtered_new_ios_gpdf = new_ios_gpdf[new_android_gpdf.distance(gt_linestring) * (R/360) <= 5]; filtered_new_ios_gpdf.head()

In [None]:
ifig, ax_array = plt.subplots(nrows=1, ncols=2, figsize=(20,20))
new_android_gpdf.plot(ax=ax_array[0])
filtered_new_android_gpdf.plot(ax=ax_array[0])
new_ios_gpdf.plot(ax=ax_array[1])
filtered_new_ios_gpdf.plot(ax=ax_array[1])

In [None]:
NAN = float("nan")
merged_gpdf = pd.merge(filtered_new_android_gpdf, filtered_new_ios_gpdf, on="ts", how="outer", suffixes=("_a", "_i")); merged_gpdf.head()

In [None]:
def merge_row(loc_row):
    # print("merging %s" % loc_row)
    if pd.isnull(loc_row.geometry_i):
        assert not np.isnull(loc_row.geometry_a)
        final_geom = loc_row.geometry_a
    elif pd.isnull(loc_row.geometry_a):
        assert not pd.isnull(loc_row.geometry_i)
        final_geom = loc_row.geometry_i
    else:
        assert not pd.isnull(loc_row.geometry_i) and not pd.isnull(loc_row.geometry_a)
        midpoint = shp.geometry.LineString(coordinates=[loc_row.geometry_a, loc_row.geometry_i]).interpolate(0.5, normalized=True)
        # print(midpoint)
        final_geom = midpoint
    return {
        "ts": loc_row.ts,
        "fmt_time": arrow.get(loc_row.ts).to("America/Los_Angeles"),
        "longitude": final_geom.x,
        "latitude": final_geom.y,
        "geometry": final_geom
    }

In [None]:
reference_gpdf = gpd.GeoDataFrame(data=list(merged_gpdf.apply(merge_row, axis=1))); reference_gpdf.head()

In [None]:
valid_ratio = len(filtered_new_android_gpdf)/len(new_android_gpdf)
assert valid_ratio == len(filtered_new_ios_gpdf)/len(new_ios_gpdf)
valid_ratio

In [None]:
import geojson as gj

In [None]:
import folium

In [None]:
curr_map = folium.Map()
gt_leg_gj = sd_ucb.get_geojson_for_leg(gt_leg)
sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(filtered_new_android_gpdf.longitude, new_android_gpdf.latitude))),properties={"style": {"color": "red"}, "ts": list(new_android_gpdf.ts)})
old_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(android_control_gpdf.longitude, android_control_gpdf.latitude))),properties={"style": {"color": "yellow"}, "ts": list(android_control_gpdf.ts)})
filtered_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(reference_gpdf.longitude, reference_gpdf.latitude))),properties={"style": {"color": "blue"}, "ts": list(reference_gpdf.ts)})
gt_leg_gj_feature = folium.GeoJson(gt_leg_gj, name="ground_truth")
gt_leg_gj_points = ezpv.get_point_markers(gt_leg_gj[2], name="ground_truth_points", color="green")
sensed_leg_gj_feature = folium.GeoJson(sensed_section_gj, name="sensed_values")
old_sensed_leg_gj_feature = folium.GeoJson(old_sensed_section_gj, name="old sensed_values")
filtered_sensed_leg_gj_feature = folium.GeoJson(filtered_sensed_section_gj, name="filtered sensed_values")
sensed_leg_gj_points = ezpv.get_point_markers(sensed_section_gj, name="sensed_points", color="red", tz="America/Los_Angeles")
old_sensed_leg_gj_points = ezpv.get_point_markers(old_sensed_section_gj, name="old_sensed_points", color="yellow", tz="America/Los_Angeles")
filtered_sensed_leg_gj_points = ezpv.get_point_markers(filtered_sensed_section_gj, name="filtered_sensed_points", color="blue", tz="America/Los_Angeles")
curr_map.add_child(gt_leg_gj_feature)
curr_map.add_child(gt_leg_gj_points)
curr_map.add_child(sensed_leg_gj_feature)
curr_map.add_child(sensed_leg_gj_points)
curr_map.add_child(old_sensed_leg_gj_feature)
curr_map.add_child(old_sensed_leg_gj_points)
curr_map.add_child(filtered_sensed_leg_gj_feature)
curr_map.add_child(filtered_sensed_leg_gj_points)
curr_map.fit_bounds(sensed_leg_gj_feature.get_bounds())
folium.LayerControl().add_to(curr_map)
curr_map

####  `shapely.interp1d`: Hard case (android locations are just really messed up)

- Works poorly with tolerance = 2m (25% valid ratio)
- Increasing tolerance to 5m bumps up the valid ratio to 50%, but then it introduces points that are not really close enough to ground truth (e.g. around loc 219)
- Increasing tolerance to 10m bumps up the valid ratio to 63%, but then it makes the ground truth even worse (e.g. around loc 250-268 and 390-393)

In [None]:
android_trip = pv_la.map()["android"]["ucb-sdb-android-1"]["evaluation_ranges"][0]["evaluation_trip_ranges"][0]
ios_trip = pv_la.map()["ios"]["ucb-sdb-ios-1"]["evaluation_ranges"][0]["evaluation_trip_ranges"][0]
android_accuracy_df = android_trip["location_df"]
ios_accuracy_df = ios_trip["location_df"]
gt_leg = sd_la.get_ground_truth_for_leg(android_trip["trip_id_base"], ios_trip["trip_id_base"])
gt_linestring = shp.geometry.LineString(coordinates=gt_leg["route_coords"]["geometry"]["coordinates"])

In [None]:
android_control_gpdf = gpd.GeoDataFrame(
        android_accuracy_df, geometry=android_accuracy_df.apply(lambda lr: shp.geometry.Point(lr.longitude, lr.latitude), axis=1))
ios_control_gpdf = gpd.GeoDataFrame(
        ios_accuracy_df, geometry=ios_accuracy_df.apply(lambda lr: shp.geometry.Point(lr.longitude, lr.latitude), axis=1))

In [None]:
android_gt_distances = android_control_gpdf.distance(gt_linestring) * (R / 360)
ios_gt_distances = ios_control_gpdf.distance(gt_linestring) * (R / 360)

In [None]:
filtered_android_control_gpdf = android_control_gpdf[android_gt_distances <= 5]
filtered_ios_control_gpdf = ios_control_gpdf[ios_gt_distances <= 5]

In [None]:
len(filtered_android_control_gpdf)/len(android_control_gpdf), len(filtered_ios_control_gpdf)/len(ios_control_gpdf)

In [None]:
ifig, ax_array = plt.subplots(nrows=1, ncols=2, figsize=(20,20))
android_control_gpdf.plot(ax=ax_array[0])
filtered_android_control_gpdf.plot(ax=ax_array[0])
ios_control_gpdf.plot(ax=ax_array[1])
filtered_ios_control_gpdf.plot(ax=ax_array[1])

In [None]:
len(android_control_gpdf), len(ios_control_gpdf)

In [None]:
import scipy.interpolate as sci

In [None]:
android_lat_fn = sci.interp1d(x=filtered_android_control_gpdf.ts, y=filtered_android_control_gpdf.latitude, fill_value="extrapolate")
android_lon_fn = sci.interp1d(x=filtered_android_control_gpdf.ts, y=filtered_android_control_gpdf.longitude, fill_value="extrapolate")
ios_lat_fn = sci.interp1d(x=filtered_ios_control_gpdf.ts, y=filtered_ios_control_gpdf.latitude, fill_value="extrapolate")
ios_lon_fn = sci.interp1d(x=filtered_ios_control_gpdf.ts, y=filtered_ios_control_gpdf.longitude, fill_value="extrapolate")

In [None]:
new_ts_range = np.arange(android_trip["start_ts"], android_trip["end_ts"], 1)
new_fmt_time_range = [arrow.get(ts).to(sd_la.eval_tz) for ts in new_ts_range]

In [None]:
new_android_lat = android_lat_fn(new_ts_range)
new_android_lng = android_lon_fn(new_ts_range)
new_ios_lat = ios_lat_fn(new_ts_range)
new_ios_lng = ios_lon_fn(new_ts_range)
new_android_gpdf = gpd.GeoDataFrame({
    "latitude": new_android_lat,
    "longitude": new_android_lng,
    "ts": new_ts_range,
    "fmt_time": new_fmt_time_range,
    "geometry": [shp.geometry.Point(x, y) for x, y in zip(new_android_lng, new_android_lat)]
})
new_ios_gpdf = gpd.GeoDataFrame({
    "latitude": new_ios_lat,
    "longitude": new_ios_lng,
    "ts": new_ts_range,
    "fmt_time": new_fmt_time_range,
    "geometry": [shp.geometry.Point(x, y) for x, y in zip(new_ios_lng, new_ios_lat)]
})

In [None]:
new_android_gpdf[["fmt_time", "geometry"]].head(), new_ios_gpdf[["fmt_time", "geometry"]].head()

In [None]:
distances = new_android_gpdf.distance(gt_linestring) * (R/360); distances.head()

In [None]:
valid_mask = distances <= 2; np.nonzero(np.logical_not(valid_mask))

In [None]:
new_android_gpdf["distance_along"] = new_android_gpdf.geometry.apply(lambda p: gt_linestring.project(p))
new_ios_gpdf["distance_along"] = new_ios_gpdf.geometry.apply(lambda p: gt_linestring.project(p))

In [None]:
filtered_new_android_gpdf = new_android_gpdf[new_android_gpdf.distance(gt_linestring) * (R/360) <= 5]; filtered_new_android_gpdf.head()
filtered_new_ios_gpdf = new_ios_gpdf[new_ios_gpdf.distance(gt_linestring) * (R/360) <= 5]; filtered_new_ios_gpdf.head()

In [None]:
ezpv.display_map_detail_from_df(curr_map=None, sel_location_df=filtered_new_android_gpdf[filtered_new_android_gpdf.distance_along.diff() < 0])

In [None]:
ifig, ax_array = plt.subplots(nrows=1, ncols=2, figsize=(20,20))
# new_android_gpdf.plot(ax=ax_array[0])
filtered_new_android_gpdf.plot(ax=ax_array[0])
# new_ios_gpdf.plot(ax=ax_array[1])
filtered_new_ios_gpdf.plot(ax=ax_array[1])

In [None]:
NAN = float("nan")
merged_gpdf = pd.merge(filtered_new_android_gpdf, filtered_new_ios_gpdf, on="ts", how="outer", suffixes=("_a", "_i")).sort_values(by="ts", axis="index"); merged_gpdf.head()

In [None]:
def merge_row(loc_row):
    # print("merging %s" % loc_row)
    if pd.isnull(loc_row.geometry_i):
        assert not np.isnull(loc_row.geometry_a)
        final_geom = loc_row.geometry_a
    elif pd.isnull(loc_row.geometry_a):
        assert not pd.isnull(loc_row.geometry_i)
        final_geom = loc_row.geometry_i
    else:
        assert not pd.isnull(loc_row.geometry_i) and not pd.isnull(loc_row.geometry_a)
        choice_series = gpd.GeoSeries([loc_row.geometry_a, loc_row.geometry_i])
        closer_idx = choice_series.distance(gt_linestring).idxmin()
        final_geom = choice_series.loc[closer_idx]
    return {
        "ts": loc_row.ts,
        "fmt_time": arrow.get(loc_row.ts).to("America/Los_Angeles"),
        "longitude": final_geom.x,
        "latitude": final_geom.y,
        "geometry": final_geom
    }

In [None]:
distance_so_far = 0
reference_gpdf = gpd.GeoDataFrame(data=list(merged_gpdf.apply(merge_row_with_distance, axis=1))); reference_gpdf.head()

In [None]:
reference_gpdf = reference_gpdf[reference_gpdf.latitude.notnull()]; len(reference_gpdf)/len(new_ts_range)

This looks good in this small view, but looking at it in a map, we can see some weirdnesses at the end and at the curve on Miramonte

In [None]:
import arrow

In [None]:
import folium

In [None]:
importlib.reload(ezpv)

In [None]:
curr_map = folium.Map()
gt_leg_gj = sd_ucb.get_geojson_for_leg(gt_leg)
sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(filtered_new_android_gpdf.longitude, filtered_new_android_gpdf.latitude))),properties={"style": {"color": "red"}, "ts": list(filtered_new_android_gpdf.ts)})
i_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(filtered_new_ios_gpdf.longitude, filtered_new_ios_gpdf.latitude))),properties={"style": {"color": "purple"}, "ts": list(filtered_new_ios_gpdf.ts)})
old_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(android_control_gpdf.longitude, android_control_gpdf.latitude))),properties={"style": {"color": "yellow"}, "ts": list(android_control_gpdf.ts)})
filtered_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(reference_gpdf.longitude, reference_gpdf.latitude))),properties={"style": {"color": "blue"}, "ts": list(reference_gpdf.ts)})
gt_leg_gj_feature = folium.GeoJson(gt_leg_gj, name="ground_truth")
gt_leg_gj_points = ezpv.get_point_markers(gt_leg_gj[2], name="ground_truth_points", color="green")
sensed_leg_gj_feature = folium.GeoJson(sensed_section_gj, name="sensed_values (android)")
i_sensed_leg_gj_feature = folium.GeoJson(i_sensed_section_gj, name="sensed_values (ios)")
old_sensed_leg_gj_feature = folium.GeoJson(old_sensed_section_gj, name="old sensed_values")
filtered_sensed_leg_gj_feature = folium.GeoJson(filtered_sensed_section_gj, name="filtered sensed_values")
sensed_leg_gj_points = ezpv.get_point_markers(sensed_section_gj, name="sensed_points(android) ", color="red", tz="America/Los_Angeles")
i_sensed_leg_gj_points = ezpv.get_point_markers(i_sensed_section_gj, name="sensed_points (ios)", color="purple", tz="America/Los_Angeles")
old_sensed_leg_gj_points = ezpv.get_point_markers(old_sensed_section_gj, name="old_sensed_points", color="yellow", tz="America/Los_Angeles")
filtered_sensed_leg_gj_points = ezpv.get_point_markers(filtered_sensed_section_gj, name="filtered_sensed_points", color="blue", tz="America/Los_Angeles")
curr_map.add_child(gt_leg_gj_feature)
curr_map.add_child(gt_leg_gj_points)
curr_map.add_child(sensed_leg_gj_feature)
curr_map.add_child(sensed_leg_gj_points)
curr_map.add_child(i_sensed_leg_gj_feature)
curr_map.add_child(i_sensed_leg_gj_points)
curr_map.add_child(old_sensed_leg_gj_feature)
curr_map.add_child(old_sensed_leg_gj_points)
curr_map.add_child(filtered_sensed_leg_gj_feature)
curr_map.add_child(filtered_sensed_leg_gj_points)
curr_map.fit_bounds(sensed_leg_gj_feature.get_bounds())
folium.LayerControl().add_to(curr_map)
curr_map

In [None]:
merged_gpdf.loc[919:953].sort_values(by="ts", axis="index")

In [None]:
reference_gpdf[reference_gpdf.distance_along < 0]

####  `shapely.interp1d`: Temporal accuracy check

Now let's check this for the temporal accuracy motivating use case
Note that for `merge_row`, this returns the kinds of zigzags that we see in the raw data.
I wonder if, at least on android, fused location collection uses some kind of closeness distance metric like we do, which fails like we do here.

In [None]:
android_trip = pv_ucb.map()["android"]["ucb-sdb-android-1"]["evaluation_ranges"][0]["evaluation_trip_ranges"][0]["evaluation_section_ranges"][5]
ios_trip = pv_ucb.map()["ios"]["ucb-sdb-ios-1"]["evaluation_ranges"][0]["evaluation_trip_ranges"][0]["evaluation_section_ranges"][5]
android_accuracy_df = android_trip["location_df"]
ios_accuracy_df = ios_trip["location_df"]
gt_leg = sd_ucb.get_ground_truth_for_leg("mtv_to_berkeley_sf_bart", "subway_underground")
gt_linestring = shp.geometry.LineString(coordinates=gt_leg["route_coords"]["geometry"]["coordinates"])

In [None]:
android_control_gpdf = gpd.GeoDataFrame(
        android_accuracy_df, geometry=android_accuracy_df.apply(lambda lr: shp.geometry.Point(lr.longitude, lr.latitude), axis=1))
ios_control_gpdf = gpd.GeoDataFrame(
        ios_accuracy_df, geometry=ios_accuracy_df.apply(lambda lr: shp.geometry.Point(lr.longitude, lr.latitude), axis=1))

In [None]:
android_gt_distances = android_control_gpdf.distance(gt_linestring) * (R / 360)
ios_gt_distances = ios_control_gpdf.distance(gt_linestring) * (R / 360)

In [None]:
filtered_android_control_gpdf = android_control_gpdf[android_gt_distances <= 5]
filtered_ios_control_gpdf = ios_control_gpdf[ios_gt_distances <= 5]

In [None]:
len(filtered_android_control_gpdf)/len(android_control_gpdf), len(filtered_ios_control_gpdf)/len(ios_control_gpdf)

In [None]:
ifig, ax_array = plt.subplots(nrows=1, ncols=2, figsize=(20,20))
android_control_gpdf.plot(ax=ax_array[0])
filtered_android_control_gpdf.plot(ax=ax_array[0])
ios_control_gpdf.plot(ax=ax_array[1])
filtered_ios_control_gpdf.plot(ax=ax_array[1])

In [None]:
filtered_android_control_gpdf.ts.iloc[0], filtered_android_control_gpdf.ts.iloc[-1], filtered_ios_control_gpdf.ts.iloc[0], filtered_ios_control_gpdf.ts.iloc[-1]

In [None]:
len(android_control_gpdf), len(ios_control_gpdf)

In [None]:
import scipy.interpolate as sci

In [None]:
android_lat_fn = sci.interp1d(x=filtered_android_control_gpdf.ts, y=filtered_android_control_gpdf.latitude, fill_value="extrapolate")
android_lon_fn = sci.interp1d(x=filtered_android_control_gpdf.ts, y=filtered_android_control_gpdf.longitude, fill_value="extrapolate")
ios_lat_fn = sci.interp1d(x=filtered_ios_control_gpdf.ts, y=filtered_ios_control_gpdf.latitude, fill_value="extrapolate")
ios_lon_fn = sci.interp1d(x=filtered_ios_control_gpdf.ts, y=filtered_ios_control_gpdf.longitude, fill_value="extrapolate")

In [None]:
new_ts_range = np.arange(android_trip["start_ts"], android_trip["end_ts"], 1)
new_fmt_time_range = [arrow.get(ts).to(sd_la.eval_tz) for ts in new_ts_range]

In [None]:
new_android_lat = android_lat_fn(new_ts_range)
new_android_lng = android_lon_fn(new_ts_range)
new_ios_lat = ios_lat_fn(new_ts_range)
new_ios_lng = ios_lon_fn(new_ts_range)
new_android_gpdf = gpd.GeoDataFrame({
    "latitude": new_android_lat,
    "longitude": new_android_lng,
    "ts": new_ts_range,
    "fmt_time": new_fmt_time_range,
    "geometry": [shp.geometry.Point(x, y) for x, y in zip(new_android_lng, new_android_lat)]
})
new_ios_gpdf = gpd.GeoDataFrame({
    "latitude": new_ios_lat,
    "longitude": new_ios_lng,
    "ts": new_ts_range,
    "fmt_time": new_fmt_time_range,
    "geometry": [shp.geometry.Point(x, y) for x, y in zip(new_ios_lng, new_ios_lat)]
})
new_android_gpdf["error"] = new_android_gpdf.distance(gt_linestring) * (R/360)
new_ios_gpdf["error"] = new_ios_gpdf.distance(gt_linestring) * (R/360)
new_android_gpdf["distance_along"] = new_android_gpdf.distance(gt_linestring) * (R/360)
new_ios_gpdf["distance_along"] = new_ios_gpdf.distance(gt_linestring) * (R/360)

In [None]:
new_android_gpdf[["fmt_time", "geometry", "error"]].head(), new_ios_gpdf[["fmt_time", "geometry", "error"]].head()

In [None]:
new_android_gpdf[["fmt_time", "geometry", "error"]].tail(), new_ios_gpdf[["fmt_time", "geometry", "error"]].tail()

In [None]:
new_android_gpdf.error[new_android_gpdf.error == new_android_gpdf.error.iloc[-1]].head()

In [None]:
new_android_gpdf[["fmt_time", "geometry", "error"]].loc[1210:1220], new_ios_gpdf[["fmt_time", "geometry", "error"]].loc[1210:1220]

In [None]:
filtered_new_android_gpdf = new_android_gpdf[new_android_gpdf.error <= 5]
filtered_new_ios_gpdf = new_ios_gpdf[new_ios_gpdf.error <= 5]
filtered_new_android_gpdf.fmt_time.head(), filtered_new_android_gpdf.fmt_time.tail(), filtered_new_ios_gpdf.fmt_time.head(), filtered_new_ios_gpdf.fmt_time.tail()

In [None]:
ifig, ax_array = plt.subplots(nrows=2, ncols=2, figsize=(20,20))
new_android_gpdf.plot(ax=ax_array[0][0])
filtered_new_android_gpdf.plot(ax=ax_array[0][1])
new_ios_gpdf.plot(ax=ax_array[1][0])
filtered_new_ios_gpdf.plot(ax=ax_array[1][1])

In [None]:
NAN = float("nan")
merged_gpdf = pd.merge(filtered_new_android_gpdf, filtered_new_ios_gpdf, on="ts", how="outer", suffixes=("_a", "_i")).sort_values(by="ts", axis="index"); merged_gpdf.head()

In [None]:
def merge_row(loc_row):
    # print("merging %s" % loc_row)
    if pd.isnull(loc_row.geometry_i):
        assert not np.isnull(loc_row.geometry_a)
        final_geom = loc_row.geometry_a
    elif pd.isnull(loc_row.geometry_a):
        assert not pd.isnull(loc_row.geometry_i)
        final_geom = loc_row.geometry_i
    else:
        assert not pd.isnull(loc_row.geometry_i) and not pd.isnull(loc_row.geometry_a)
        choice_series = gpd.GeoSeries([loc_row.geometry_a, loc_row.geometry_i])
        closer_idx = choice_series.distance(gt_linestring).idxmin()
        final_geom = choice_series.loc[closer_idx]
    return {
        "ts": loc_row.ts,
        "fmt_time": arrow.get(loc_row.ts).to("America/Los_Angeles"),
        "longitude": final_geom.x,
        "latitude": final_geom.y,
        "geometry": final_geom
    }

In [None]:
# use a variable outside the function to store distance so far
# TODO: figure out whether it is better to do this or to write a loop manually
distance_so_far = 0
def merge_row_with_distance(loc_row):
    global distance_so_far
    source = None
    more_details = False
    # print("merging %s at distance %s" % (loc_row, distance_so_far))
    if 1020 <= loc_row.name <= 1030:
        more_details = True
        print("merging %s at distance %s" % (loc_row, distance_so_far))
    if pd.isnull(loc_row.geometry_i):
        assert not pd.isnull(loc_row.geometry_a)
        final_geom = loc_row.geometry_a
        source = "android"
    elif pd.isnull(loc_row.geometry_a):
        assert not pd.isnull(loc_row.geometry_i)
        final_geom = loc_row.geometry_i
        source = "ios"
    else:
        assert not pd.isnull(loc_row.geometry_i) and not pd.isnull(loc_row.geometry_a)
        choice_series = gpd.GeoSeries([loc_row.geometry_a, loc_row.geometry_i])
        distance_along_line_series = choice_series.apply(lambda p: gt_linestring.project(p))
        if more_details:
            print("distance_along_line = %s" % distance_along_line_series)
        distance_from_last_series = distance_along_line_series.apply(lambda d: d - distance_so_far)
        if more_details:
            print("distance_from_last_series = %s" % distance_from_last_series)

        # assert not (distance_from_last_series < 0).all(), "distance_so_far = %s, distance_from_last = %s" % (distance_so_far, distance_from_last_series)
        if (distance_from_last_series < 0).all():
            if more_details:
                print("all distances are negative, skipping...")
            return {
                "ts": loc_row.ts,
                "fmt_time": arrow.get(loc_row.ts).to("America/Los_Angeles"),
                "longitude": np.nan,
                "latitude": np.nan,
                "geometry": shp.geometry.Point()
            }
        else:
            if (distance_from_last_series < 0).any():
                # avoid going backwards along the linestring (wonder how this works with San Jose u-turn)
                closer_idx = distance_from_last_series.idxmax()
                if more_details:
                    print("one distance is going backwards, found closer_idx = %d" % closer_idx)

            else:
                closer_idx = choice_series.distance(gt_linestring).idxmin()
                if more_details:
                    print("both distances are positive, found closer_idx = %d" % closer_idx)

            if closer_idx == 0:
                source = "android"
            else:
                source = "ios"
            final_geom = choice_series.loc[closer_idx]
        
    distance_so_far = gt_linestring.project(final_geom)
    return {
        "ts": loc_row.ts,
        "fmt_time": arrow.get(loc_row.ts).to("America/Los_Angeles"),
        "longitude": final_geom.x,
        "latitude": final_geom.y,
        "geometry": final_geom,
        "distance_along": gt_linestring.project(final_geom),
        "error": final_geom.distance(gt_linestring),
        "source": source
    }

In [None]:
distance_so_far = 0
reference_gpdf = gpd.GeoDataFrame(data=list(merged_gpdf.apply(merge_row_with_distance, axis=1))); reference_gpdf.head()

In [None]:
reference_gpdf = reference_gpdf[reference_gpdf.latitude.notnull()]; len(reference_gpdf)/len(new_ts_range)

In [None]:
len(filtered_new_android_gpdf)/len(new_android_gpdf), len(filtered_new_ios_gpdf)/len(new_ios_gpdf)

In [None]:
import folium

In [None]:
curr_map = folium.Map()
gt_leg_gj = sd_ucb.get_geojson_for_leg(gt_leg)
sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(filtered_new_android_gpdf.longitude, filtered_new_android_gpdf.latitude))),properties={"style": {"color": "red"}, "ts": list(filtered_new_android_gpdf.ts)})
i_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(filtered_new_ios_gpdf.longitude, filtered_new_ios_gpdf.latitude))),properties={"style": {"color": "purple"}, "ts": list(filtered_new_ios_gpdf.ts)})
old_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(android_control_gpdf.longitude, android_control_gpdf.latitude))),properties={"style": {"color": "yellow"}, "ts": list(android_control_gpdf.ts)})
filtered_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(reference_gpdf.longitude, reference_gpdf.latitude))),properties={"style": {"color": "blue"}, "ts": list(reference_gpdf.ts)})
gt_leg_gj_feature = folium.GeoJson(gt_leg_gj, name="ground_truth")
gt_leg_gj_points = ezpv.get_point_markers(gt_leg_gj[2], name="ground_truth_points", color="green")
sensed_leg_gj_feature = folium.GeoJson(sensed_section_gj, name="sensed_values (android)")
i_sensed_leg_gj_feature = folium.GeoJson(i_sensed_section_gj, name="sensed_values (ios)")
old_sensed_leg_gj_feature = folium.GeoJson(old_sensed_section_gj, name="old sensed_values")
filtered_sensed_leg_gj_feature = folium.GeoJson(filtered_sensed_section_gj, name="filtered sensed_values")
sensed_leg_gj_points = ezpv.get_point_markers(sensed_section_gj, name="sensed_points(android) ", color="red", tz="America/Los_Angeles")
i_sensed_leg_gj_points = ezpv.get_point_markers(i_sensed_section_gj, name="sensed_points (ios)", color="purple", tz="America/Los_Angeles")
old_sensed_leg_gj_points = ezpv.get_point_markers(old_sensed_section_gj, name="old_sensed_points", color="yellow", tz="America/Los_Angeles")
filtered_sensed_leg_gj_points = ezpv.get_point_markers(filtered_sensed_section_gj, name="filtered_sensed_points", color="blue", tz="America/Los_Angeles")
curr_map.add_child(gt_leg_gj_feature)
curr_map.add_child(gt_leg_gj_points)
curr_map.add_child(sensed_leg_gj_feature)
curr_map.add_child(sensed_leg_gj_points)
curr_map.add_child(i_sensed_leg_gj_feature)
curr_map.add_child(i_sensed_leg_gj_points)
curr_map.add_child(old_sensed_leg_gj_feature)
curr_map.add_child(old_sensed_leg_gj_points)
curr_map.add_child(filtered_sensed_leg_gj_feature)
curr_map.add_child(filtered_sensed_leg_gj_points)
curr_map.fit_bounds(sensed_leg_gj_feature.get_bounds())
folium.LayerControl().add_to(curr_map)
curr_map

####  `shapely.interp1d`: Temporal accuracy check

Let's try to stop the interpolation at the point where we have data instead of extrapolating

In [None]:
android_trip = pv_ucb.map()["android"]["ucb-sdb-android-1"]["evaluation_ranges"][0]["evaluation_trip_ranges"][0]["evaluation_section_ranges"][5]
ios_trip = pv_ucb.map()["ios"]["ucb-sdb-ios-1"]["evaluation_ranges"][0]["evaluation_trip_ranges"][0]["evaluation_section_ranges"][5]
android_accuracy_df = android_trip["location_df"]
ios_accuracy_df = ios_trip["location_df"]
gt_leg = sd_ucb.get_ground_truth_for_leg("mtv_to_berkeley_sf_bart", "subway_underground")
gt_linestring = shp.geometry.LineString(coordinates=gt_leg["route_coords"]["geometry"]["coordinates"])

In [None]:
android_control_gpdf = gpd.GeoDataFrame(
        android_accuracy_df, geometry=android_accuracy_df.apply(lambda lr: shp.geometry.Point(lr.longitude, lr.latitude), axis=1))
ios_control_gpdf = gpd.GeoDataFrame(
        ios_accuracy_df, geometry=ios_accuracy_df.apply(lambda lr: shp.geometry.Point(lr.longitude, lr.latitude), axis=1))

In [None]:
android_gt_distances = android_control_gpdf.distance(gt_linestring) * (R / 360)
ios_gt_distances = ios_control_gpdf.distance(gt_linestring) * (R / 360)

In [None]:
filtered_android_control_gpdf = android_control_gpdf[android_gt_distances <= 5]
filtered_ios_control_gpdf = ios_control_gpdf[ios_gt_distances <= 5]

In [None]:
len(filtered_android_control_gpdf)/len(android_control_gpdf), len(filtered_ios_control_gpdf)/len(ios_control_gpdf)

In [None]:
ifig, ax_array = plt.subplots(nrows=1, ncols=2, figsize=(20,20))
android_control_gpdf.plot(ax=ax_array[0])
filtered_android_control_gpdf.plot(ax=ax_array[0])
ios_control_gpdf.plot(ax=ax_array[1])
filtered_ios_control_gpdf.plot(ax=ax_array[1])

In [None]:
filtered_android_control_gpdf.ts.iloc[0], filtered_android_control_gpdf.ts.iloc[-1], filtered_ios_control_gpdf.ts.iloc[0], filtered_ios_control_gpdf.ts.iloc[-1]

In [None]:
len(android_control_gpdf), len(ios_control_gpdf)

In [None]:
import scipy.interpolate as sci

In [None]:
android_lat_fn = sci.interp1d(x=filtered_android_control_gpdf.ts, y=filtered_android_control_gpdf.latitude)
android_lon_fn = sci.interp1d(x=filtered_android_control_gpdf.ts, y=filtered_android_control_gpdf.longitude)
ios_lat_fn = sci.interp1d(x=filtered_ios_control_gpdf.ts, y=filtered_ios_control_gpdf.latitude)
ios_lon_fn = sci.interp1d(x=filtered_ios_control_gpdf.ts, y=filtered_ios_control_gpdf.longitude)

In [None]:
new_android_ts_range = np.arange(math.ceil(filtered_android_control_gpdf.ts.iloc[0]), math.floor(filtered_android_control_gpdf.ts.iloc[-1]), 1).tolist()
new_android_fmt_time_range = [arrow.get(ts).to(sd_la.eval_tz) for ts in new_android_ts_range]
new_ios_ts_range = np.arange(math.ceil(filtered_ios_control_gpdf.ts.iloc[0]), math.floor(filtered_ios_control_gpdf.ts.iloc[-1]), 1).tolist()
new_ios_fmt_time_range = [arrow.get(ts).to(sd_la.eval_tz) for ts in new_ios_ts_range]

In [None]:
new_android_lat = android_lat_fn(new_android_ts_range)
new_android_lng = android_lon_fn(new_android_ts_range)
new_ios_lat = ios_lat_fn(new_ios_ts_range)
new_ios_lng = ios_lon_fn(new_ios_ts_range)
new_android_gpdf = gpd.GeoDataFrame({
    "latitude": new_android_lat,
    "longitude": new_android_lng,
    "ts": new_android_ts_range,
    "fmt_time": new_android_fmt_time_range,
    "geometry": [shp.geometry.Point(x, y) for x, y in zip(new_android_lng, new_android_lat)]
})
new_ios_gpdf = gpd.GeoDataFrame({
    "latitude": new_ios_lat,
    "longitude": new_ios_lng,
    "ts": new_ios_ts_range,
    "fmt_time": new_ios_fmt_time_range,
    "geometry": [shp.geometry.Point(x, y) for x, y in zip(new_ios_lng, new_ios_lat)]
})
new_android_gpdf["error"] = new_android_gpdf.distance(gt_linestring) * (R/360)
new_ios_gpdf["error"] = new_ios_gpdf.distance(gt_linestring) * (R/360)
new_android_gpdf["distance_along"] = new_android_gpdf.geometry.apply(lambda p: gt_linestring.project(p) * (R/360))
new_ios_gpdf["distance_along"] = new_ios_gpdf.geometry.apply(lambda p: gt_linestring.project(p) * (R/360))

In [None]:
new_android_gpdf[["fmt_time", "geometry", "error"]].head(), new_ios_gpdf[["fmt_time", "geometry", "error"]].head()

In [None]:
new_android_gpdf[["fmt_time", "geometry", "error"]].tail(), new_ios_gpdf[["fmt_time", "geometry", "error"]].tail()

In [None]:
new_android_gpdf.error[new_android_gpdf.error == new_android_gpdf.error.iloc[-1]].head()

In [None]:
new_android_gpdf[["fmt_time", "geometry", "error"]].loc[1210:1220], new_ios_gpdf[["fmt_time", "geometry", "error"]].loc[1210:1220]

In [None]:
filtered_new_android_gpdf = new_android_gpdf[new_android_gpdf.error <= 5]
filtered_new_ios_gpdf = new_ios_gpdf[new_ios_gpdf.error <= 5]
filtered_new_android_gpdf.fmt_time.head(), filtered_new_android_gpdf.fmt_time.tail(), filtered_new_ios_gpdf.fmt_time.head(), filtered_new_ios_gpdf.fmt_time.tail()

In [None]:
ifig, ax_array = plt.subplots(nrows=2, ncols=2, figsize=(20,20))
new_android_gpdf.plot(ax=ax_array[0][0])
filtered_new_android_gpdf.plot(ax=ax_array[0][1])
new_ios_gpdf.plot(ax=ax_array[1][0])
filtered_new_ios_gpdf.plot(ax=ax_array[1][1])

In [None]:
NAN = float("nan")
merged_gpdf = pd.merge(filtered_new_android_gpdf, filtered_new_ios_gpdf, on="ts", how="outer", suffixes=("_a", "_i")).sort_values(by="ts", axis="index"); merged_gpdf.head()

In [None]:
np.count_nonzero(merged_gpdf.distance_along_i.diff() < 0 ), len(merged_gpdf.distance_along_i)

In [None]:
def merge_row(loc_row):
    # print("merging %s" % loc_row)
    if pd.isnull(loc_row.geometry_i):
        assert not np.isnull(loc_row.geometry_a)
        final_geom = loc_row.geometry_a
    elif pd.isnull(loc_row.geometry_a):
        assert not pd.isnull(loc_row.geometry_i)
        final_geom = loc_row.geometry_i
    else:
        assert not pd.isnull(loc_row.geometry_i) and not pd.isnull(loc_row.geometry_a)
        choice_series = gpd.GeoSeries([loc_row.geometry_a, loc_row.geometry_i])
        closer_idx = choice_series.distance(gt_linestring).idxmin()
        final_geom = choice_series.loc[closer_idx]
    return {
        "ts": loc_row.ts,
        "fmt_time": arrow.get(loc_row.ts).to("America/Los_Angeles"),
        "longitude": final_geom.x,
        "latitude": final_geom.y,
        "geometry": final_geom
    }

In [None]:
# use a variable outside the function to store distance so far
# TODO: figure out whether it is better to do this or to write a loop manually
distance_so_far = 0
EMPTY_POINT = shp.geometry.Point()

def merge_row_with_distance(loc_row):
    global distance_so_far
    source = None
    more_details = False
    # print("merging %s at distance %s" % (loc_row, distance_so_far))
    if 75 <= loc_row.name <= 80 or 816 <= loc_row.name <= 820:
        more_details = True
        print("merging %s at distance %s" % (loc_row, distance_so_far))
    if pd.isnull(loc_row.geometry_i):
        assert not pd.isnull(loc_row.geometry_a)
        if loc_row.distance_along_a > distance_so_far:
            final_geom = loc_row.geometry_a
            source = "android"
        else:
            final_geom = EMPTY_POINT
    elif pd.isnull(loc_row.geometry_a):
        assert not pd.isnull(loc_row.geometry_i)
        if loc_row.distance_along_i > distance_so_far:
            final_geom = loc_row.geometry_i
            source = "ios"
        else:
            final_geom = EMPTY_POINT
    else:
        assert not pd.isnull(loc_row.geometry_i) and not pd.isnull(loc_row.geometry_a)
        choice_series = gpd.GeoSeries([loc_row.geometry_a, loc_row.geometry_i])
        distance_along_line_series = gpd.GeoSeries([loc_row.distance_along_a, loc_row.distance_along_i])
        if more_details:
            print("distance_along_line = %s" % distance_along_line_series)
        distance_from_last_series = distance_along_line_series.apply(lambda d: d - distance_so_far)
        if more_details:
            print("distance_from_last_series = %s" % distance_from_last_series)

        # assert not (distance_from_last_series < 0).all(), "distance_so_far = %s, distance_from_last = %s" % (distance_so_far, distance_from_last_series)
        if (distance_from_last_series < 0).all():
            if more_details:
                print("all distances are negative, skipping...")
            final_geom = EMPTY_POINT
        else:
            if (distance_from_last_series < 0).any():
                # avoid going backwards along the linestring (wonder how this works with San Jose u-turn)
                closer_idx = distance_from_last_series.idxmax()
                if more_details:
                    print("one distance is going backwards, found closer_idx = %d" % closer_idx)

            else:
                closer_idx = distance_from_last_series.idxmin()
                if more_details:
                    print("both distances are positive, found closer_idx = %d" % closer_idx)

            if closer_idx == 0:
                source = "android"
            else:
                source = "ios"
            final_geom = choice_series.loc[closer_idx]

    if final_geom != EMPTY_POINT:
        distance_so_far = gt_linestring.project(final_geom) * (R/360)
        
    if more_details:
        print("final_geom = %s, new_distance_so_far = %s" % (final_geom, distance_so_far))
    if final_geom == EMPTY_POINT:
        return {
            "ts": loc_row.ts,
            "fmt_time": arrow.get(loc_row.ts).to("America/Los_Angeles"),
            "longitude": np.nan,
            "latitude": np.nan,
            "geometry": EMPTY_POINT    
        }
    else:
        return {
            "ts": loc_row.ts,
            "fmt_time": arrow.get(loc_row.ts).to("America/Los_Angeles"),
            "longitude": final_geom.x,
            "latitude": final_geom.y,
            "geometry": final_geom,
            "distance_along": gt_linestring.project(final_geom),
            "error": final_geom.distance(gt_linestring),
            "source": source
        }

In [None]:
distance_so_far = 0
initial_reference_gpdf = gpd.GeoDataFrame(data=list(merged_gpdf.apply(merge_row_with_distance, axis=1))); reference_gpdf.head()

In [None]:
reference_gpdf = initial_reference_gpdf[initial_reference_gpdf.latitude.notnull()]; print(len(reference_gpdf)/len(initial_reference_gpdf), len(reference_gpdf)/len(new_ts_range))

In [None]:
len(filtered_new_android_gpdf)/len(new_android_gpdf), len(filtered_new_ios_gpdf)/len(new_ios_gpdf)

In [None]:
import folium

In [None]:
filtered_new_android_gpdf.ts.head()

In [None]:
filtered_new_android_gpdf.latitude.head()

In [None]:
curr_map = folium.Map()
gt_leg_gj = sd_ucb.get_geojson_for_leg(gt_leg)
sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(filtered_new_android_gpdf.longitude, filtered_new_android_gpdf.latitude))),properties={"style": {"color": "red"}})
# i_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(filtered_new_ios_gpdf.longitude, filtered_new_ios_gpdf.latitude))),properties={"style": {"color": "purple"}, "ts": list(filtered_new_ios_gpdf.ts)})
old_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(android_control_gpdf.longitude, android_control_gpdf.latitude))),properties={"style": {"color": "yellow"}, "ts": list(android_control_gpdf.ts)})
filtered_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(reference_gpdf.longitude, reference_gpdf.latitude))),properties={"style": {"color": "blue"}})
gt_leg_gj_feature = folium.GeoJson(gt_leg_gj, name="ground_truth")
gt_leg_gj_points = ezpv.get_point_markers(gt_leg_gj[2], name="ground_truth_points", color="green")
sensed_leg_gj_feature = folium.GeoJson(sensed_section_gj, name="sensed_values (android)")
# i_sensed_leg_gj_feature = folium.GeoJson(i_sensed_section_gj, name="sensed_values (ios)")
old_sensed_leg_gj_feature = folium.GeoJson(old_sensed_section_gj, name="old sensed_values")
filtered_sensed_leg_gj_feature = folium.GeoJson(filtered_sensed_section_gj, name="filtered sensed_values")
# sensed_leg_gj_points = ezpv.get_point_markers(sensed_section_gj, name="sensed_points(android) ", color="red", tz="America/Los_Angeles")
# i_sensed_leg_gj_points = ezpv.get_point_markers(i_sensed_section_gj, name="sensed_points (ios)", color="purple", tz="America/Los_Angeles")
old_sensed_leg_gj_points = ezpv.get_point_markers(old_sensed_section_gj, name="old_sensed_points", color="yellow", tz="America/Los_Angeles")
filtered_sensed_leg_gj_points = ezpv.get_point_markers(filtered_sensed_section_gj, name="filtered_sensed_points", color="blue", tz="America/Los_Angeles")
curr_map.add_child(gt_leg_gj_feature)
curr_map.add_child(gt_leg_gj_points)
curr_map.add_child(sensed_leg_gj_feature)
# curr_map.add_child(sensed_leg_gj_points)
# curr_map.add_child(i_sensed_leg_gj_feature)
# curr_map.add_child(i_sensed_leg_gj_points)
curr_map.add_child(old_sensed_leg_gj_feature)
curr_map.add_child(old_sensed_leg_gj_points)
curr_map.add_child(filtered_sensed_leg_gj_feature)
curr_map.add_child(filtered_sensed_leg_gj_points)
curr_map.fit_bounds(sensed_leg_gj_feature.get_bounds())
folium.LayerControl().add_to(curr_map)
curr_map

In [None]:
pd.concat([reference_gpdf.loc[75:85]], axis='index')

In [None]:
merged_gpdf[merged_gpdf.longitude_a == -122.40730863534677]

In [None]:
reference_gpdf[reference_gpdf.longitude == -122.46802100069702]

####  `shapely.interp1d`: Temporal accuracy check with U-turn

Let's try to stop the interpolation at the point where we have data instead of extrapolating

In [None]:
android_trip = pv_sj.map()["android"]["ucb-sdb-android-1"]["evaluation_ranges"][0]["evaluation_trip_ranges"][1]["evaluation_section_ranges"][1]
ios_trip = pv_sj.map()["ios"]["ucb-sdb-ios-1"]["evaluation_ranges"][0]["evaluation_trip_ranges"][1]["evaluation_section_ranges"][1]
android_accuracy_df = android_trip["location_df"]
ios_accuracy_df = ios_trip["location_df"]
gt_leg = sd_sj.get_ground_truth_for_leg("bus trip with e-scooter access", "city_escooter")
gt_linestring = shp.geometry.LineString(coordinates=gt_leg["route_coords"]["geometry"]["coordinates"])

In [None]:
android_control_gpdf = gpd.GeoDataFrame(
        android_accuracy_df, geometry=android_accuracy_df.apply(lambda lr: shp.geometry.Point(lr.longitude, lr.latitude), axis=1))
ios_control_gpdf = gpd.GeoDataFrame(
        ios_accuracy_df, geometry=ios_accuracy_df.apply(lambda lr: shp.geometry.Point(lr.longitude, lr.latitude), axis=1))

In [None]:
android_gt_distances = android_control_gpdf.distance(gt_linestring) * (R / 360)
ios_gt_distances = ios_control_gpdf.distance(gt_linestring) * (R / 360)

In [None]:
filtered_android_control_gpdf = android_control_gpdf[android_gt_distances <= 5]
filtered_ios_control_gpdf = ios_control_gpdf[ios_gt_distances <= 5]

In [None]:
len(filtered_android_control_gpdf)/len(android_control_gpdf), len(filtered_ios_control_gpdf)/len(ios_control_gpdf)

In [None]:
ifig, ax_array = plt.subplots(nrows=1, ncols=2, figsize=(20,20))
android_control_gpdf.plot(ax=ax_array[0])
filtered_android_control_gpdf.plot(ax=ax_array[0])
ios_control_gpdf.plot(ax=ax_array[1])
filtered_ios_control_gpdf.plot(ax=ax_array[1])

In [None]:
filtered_android_control_gpdf.ts.iloc[0], filtered_android_control_gpdf.ts.iloc[-1], filtered_ios_control_gpdf.ts.iloc[0], filtered_ios_control_gpdf.ts.iloc[-1]

In [None]:
len(android_control_gpdf), len(ios_control_gpdf)

In [None]:
import scipy.interpolate as sci

In [None]:
android_lat_fn = sci.interp1d(x=filtered_android_control_gpdf.ts, y=filtered_android_control_gpdf.latitude)
android_lon_fn = sci.interp1d(x=filtered_android_control_gpdf.ts, y=filtered_android_control_gpdf.longitude)
ios_lat_fn = sci.interp1d(x=filtered_ios_control_gpdf.ts, y=filtered_ios_control_gpdf.latitude)
ios_lon_fn = sci.interp1d(x=filtered_ios_control_gpdf.ts, y=filtered_ios_control_gpdf.longitude)

In [None]:
new_android_ts_range = np.arange(math.ceil(filtered_android_control_gpdf.ts.iloc[0]), math.floor(filtered_android_control_gpdf.ts.iloc[-1]), 1).tolist()
new_android_fmt_time_range = [arrow.get(ts).to(sd_la.eval_tz) for ts in new_android_ts_range]
new_ios_ts_range = np.arange(math.ceil(filtered_ios_control_gpdf.ts.iloc[0]), math.floor(filtered_ios_control_gpdf.ts.iloc[-1]), 1).tolist()
new_ios_fmt_time_range = [arrow.get(ts).to(sd_la.eval_tz) for ts in new_ios_ts_range]

In [None]:
new_android_lat = android_lat_fn(new_android_ts_range)
new_android_lng = android_lon_fn(new_android_ts_range)
new_ios_lat = ios_lat_fn(new_ios_ts_range)
new_ios_lng = ios_lon_fn(new_ios_ts_range)
new_android_gpdf = gpd.GeoDataFrame({
    "latitude": new_android_lat,
    "longitude": new_android_lng,
    "ts": new_android_ts_range,
    "fmt_time": new_android_fmt_time_range,
    "geometry": [shp.geometry.Point(x, y) for x, y in zip(new_android_lng, new_android_lat)]
})
new_ios_gpdf = gpd.GeoDataFrame({
    "latitude": new_ios_lat,
    "longitude": new_ios_lng,
    "ts": new_ios_ts_range,
    "fmt_time": new_ios_fmt_time_range,
    "geometry": [shp.geometry.Point(x, y) for x, y in zip(new_ios_lng, new_ios_lat)]
})
new_android_gpdf["error"] = new_android_gpdf.distance(gt_linestring) * (R/360)
new_ios_gpdf["error"] = new_ios_gpdf.distance(gt_linestring) * (R/360)
new_android_gpdf["distance_along"] = new_android_gpdf.geometry.apply(lambda p: gt_linestring.project(p) * (R/360))
new_ios_gpdf["distance_along"] = new_ios_gpdf.geometry.apply(lambda p: gt_linestring.project(p) * (R/360))

In [None]:
new_android_gpdf[["fmt_time", "geometry", "error"]].head(), new_ios_gpdf[["fmt_time", "geometry", "error"]].head()

In [None]:
new_android_gpdf[["fmt_time", "geometry", "error"]].tail(), new_ios_gpdf[["fmt_time", "geometry", "error"]].tail()

In [None]:
filtered_new_android_gpdf = new_android_gpdf[new_android_gpdf.error <= 5]
filtered_new_ios_gpdf = new_ios_gpdf[new_ios_gpdf.error <= 5]
filtered_new_android_gpdf.fmt_time.head(), filtered_new_android_gpdf.fmt_time.tail(), filtered_new_ios_gpdf.fmt_time.head(), filtered_new_ios_gpdf.fmt_time.tail()

In [None]:
ifig, ax_array = plt.subplots(nrows=2, ncols=2, figsize=(20,20))
new_android_gpdf.plot(ax=ax_array[0][0])
filtered_new_android_gpdf.plot(ax=ax_array[0][1])
new_ios_gpdf.plot(ax=ax_array[1][0])
filtered_new_ios_gpdf.plot(ax=ax_array[1][1])

In [None]:
NAN = float("nan")
merged_gpdf = pd.merge(filtered_new_android_gpdf, filtered_new_ios_gpdf, on="ts", how="outer", suffixes=("_a", "_i")).sort_values(by="ts", axis="index"); merged_gpdf.head()

In [None]:
def merge_row(loc_row):
    # print("merging %s" % loc_row)
    if pd.isnull(loc_row.geometry_i):
        assert not np.isnull(loc_row.geometry_a)
        final_geom = loc_row.geometry_a
    elif pd.isnull(loc_row.geometry_a):
        assert not pd.isnull(loc_row.geometry_i)
        final_geom = loc_row.geometry_i
    else:
        assert not pd.isnull(loc_row.geometry_i) and not pd.isnull(loc_row.geometry_a)
        choice_series = gpd.GeoSeries([loc_row.geometry_a, loc_row.geometry_i])
        closer_idx = choice_series.distance(gt_linestring).idxmin()
        final_geom = choice_series.loc[closer_idx]
    return {
        "ts": loc_row.ts,
        "fmt_time": arrow.get(loc_row.ts).to("America/Los_Angeles"),
        "longitude": final_geom.x,
        "latitude": final_geom.y,
        "geometry": final_geom
    }

In [None]:
# use a variable outside the function to store distance so far
# TODO: figure out whether it is better to do this or to write a loop manually
distance_so_far = 0
EMPTY_POINT = shp.geometry.Point()

def merge_row_with_distance(loc_row):
    global distance_so_far
    source = None
    more_details = False
    # print("merging %s at distance %s" % (loc_row, distance_so_far))
    if 430 <= loc_row.name <= 440:
        more_details = True
        print("merging %s at distance %s" % (loc_row, distance_so_far))
    if pd.isnull(loc_row.geometry_i):
        assert not pd.isnull(loc_row.geometry_a)
        if loc_row.distance_along_a > distance_so_far:
            final_geom = loc_row.geometry_a
            source = "android"
        else:
            final_geom = EMPTY_POINT
    elif pd.isnull(loc_row.geometry_a):
        assert not pd.isnull(loc_row.geometry_i)
        if loc_row.distance_along_i > distance_so_far:
            final_geom = loc_row.geometry_i
            source = "ios"
        else:
            final_geom = EMPTY_POINT
    else:
        assert not pd.isnull(loc_row.geometry_i) and not pd.isnull(loc_row.geometry_a)
        choice_series = gpd.GeoSeries([loc_row.geometry_a, loc_row.geometry_i])
        distance_along_line_series = gpd.GeoSeries([loc_row.distance_along_a, loc_row.distance_along_i])
        if more_details:
            print("distance_along_line = %s" % distance_along_line_series)
        distance_from_last_series = distance_along_line_series.apply(lambda d: d - distance_so_far)
        if more_details:
            print("distance_from_last_series = %s" % distance_from_last_series)

        # assert not (distance_from_last_series < 0).all(), "distance_so_far = %s, distance_from_last = %s" % (distance_so_far, distance_from_last_series)
        if (distance_from_last_series < 0).all():
            if more_details:
                print("all distances are negative, skipping...")
            final_geom = EMPTY_POINT
        else:
            if (distance_from_last_series < 0).any():
                # avoid going backwards along the linestring (wonder how this works with San Jose u-turn)
                closer_idx = distance_from_last_series.idxmax()
                if more_details:
                    print("one distance is going backwards, found closer_idx = %d" % closer_idx)

            else:
                closer_idx = distance_from_last_series.idxmin()
                if more_details:
                    print("both distances are positive, found closer_idx = %d" % closer_idx)

            if closer_idx == 0:
                source = "android"
            else:
                source = "ios"
            final_geom = choice_series.loc[closer_idx]

    if final_geom != EMPTY_POINT:
        distance_so_far = gt_linestring.project(final_geom) * (R/360)
        
    if more_details:
        print("final_geom = %s, new_distance_so_far = %s" % (final_geom, distance_so_far))
    if final_geom == EMPTY_POINT:
        return {
            "ts": loc_row.ts,
            "fmt_time": arrow.get(loc_row.ts).to("America/Los_Angeles"),
            "longitude": np.nan,
            "latitude": np.nan,
            "geometry": EMPTY_POINT    
        }
    else:
        return {
            "ts": loc_row.ts,
            "fmt_time": arrow.get(loc_row.ts).to("America/Los_Angeles"),
            "longitude": final_geom.x,
            "latitude": final_geom.y,
            "geometry": final_geom,
            "distance_along": gt_linestring.project(final_geom),
            "error": final_geom.distance(gt_linestring),
            "source": source
        }

In [None]:
distance_so_far = 0
initial_reference_gpdf = gpd.GeoDataFrame(data=list(merged_gpdf.apply(merge_row_with_distance, axis=1))); reference_gpdf.head()

In [None]:
reference_gpdf = initial_reference_gpdf[initial_reference_gpdf.latitude.notnull()]; print(len(reference_gpdf)/len(initial_reference_gpdf), len(reference_gpdf)/len(new_ts_range))

In [None]:
import folium

In [None]:
filtered_new_android_gpdf.latitude.head()

In [None]:
curr_map = folium.Map()
gt_leg_gj = sd_ucb.get_geojson_for_leg(gt_leg)
sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(filtered_new_android_gpdf.longitude, filtered_new_android_gpdf.latitude))),properties={"style": {"color": "red"}, "ts": [int(t) for t in filtered_new_android_gpdf.ts]})
i_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(filtered_new_ios_gpdf.longitude, filtered_new_ios_gpdf.latitude))),properties={"style": {"color": "purple"}, "ts": [int(t) for t in filtered_new_ios_gpdf.ts]})
old_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(android_control_gpdf.longitude, android_control_gpdf.latitude))),properties={"style": {"color": "yellow"}, "ts": [int(t) for t in android_control_gpdf.ts]})
filtered_sensed_section_gj = gj.Feature(geometry=gj.LineString(coordinates=list(zip(reference_gpdf.longitude, reference_gpdf.latitude))),properties={"style": {"color": "blue"}, "ts": [int(t) for t in reference_gpdf.ts]})
gt_leg_gj_feature = folium.GeoJson(gt_leg_gj, name="ground_truth")
gt_leg_gj_points = ezpv.get_point_markers(gt_leg_gj[2], name="ground_truth_points", color="green")
sensed_leg_gj_feature = folium.GeoJson(sensed_section_gj, name="sensed_values (android)")
i_sensed_leg_gj_feature = folium.GeoJson(i_sensed_section_gj, name="sensed_values (ios)")
old_sensed_leg_gj_feature = folium.GeoJson(old_sensed_section_gj, name="old sensed_values")
filtered_sensed_leg_gj_feature = folium.GeoJson(filtered_sensed_section_gj, name="filtered sensed_values")
sensed_leg_gj_points = ezpv.get_point_markers(sensed_section_gj, name="sensed_points(android) ", color="red", tz="America/Los_Angeles")
i_sensed_leg_gj_points = ezpv.get_point_markers(i_sensed_section_gj, name="sensed_points (ios)", color="purple", tz="America/Los_Angeles")
old_sensed_leg_gj_points = ezpv.get_point_markers(old_sensed_section_gj, name="old_sensed_points", color="yellow", tz="America/Los_Angeles")
filtered_sensed_leg_gj_points = ezpv.get_point_markers(filtered_sensed_section_gj, name="filtered_sensed_points", color="blue", tz="America/Los_Angeles")
curr_map.add_child(gt_leg_gj_feature)
curr_map.add_child(gt_leg_gj_points)
curr_map.add_child(sensed_leg_gj_feature)
curr_map.add_child(sensed_leg_gj_points)
curr_map.add_child(i_sensed_leg_gj_feature)
curr_map.add_child(i_sensed_leg_gj_points)
curr_map.add_child(old_sensed_leg_gj_feature)
curr_map.add_child(old_sensed_leg_gj_points)
curr_map.add_child(filtered_sensed_leg_gj_feature)
curr_map.add_child(filtered_sensed_leg_gj_points)
curr_map.fit_bounds(sensed_leg_gj_feature.get_bounds())
folium.LayerControl().add_to(curr_map)
curr_map

In [None]:
reference_gpdf.iloc[430:435]

In [None]:
merged_gpdf[merged_gpdf.longitude_i == -121.89511652921553]

In [None]:
pd.concat([merged_gpdf.loc[430:440]], axis=0)

In [None]:
pd.concat([reference_gpdf.iloc[430:440]], axis='index')

In [None]:
reference_gpdf[reference_gpdf.longitude == -122.46802100069702]