## Set up the dependencies

In [None]:
# for reading and validating data
import emeval.input.spec_details as eisd
import emeval.input.phone_view as eipv
import emeval.input.eval_view as eiev

In [None]:
# Visualization helpers
import emeval.viz.phone_view as ezpv
import emeval.viz.eval_view as ezev

In [None]:
# Metrics helpers
import emeval.metrics.baseline_segmentation as embs

In [None]:
# For plots
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
%matplotlib inline

In [None]:
# For maps
import folium
import branca.element as bre

In [None]:
# For easier debugging while working on modules
import importlib

In [None]:
import pandas as pd
import numpy as np
pd.options.display.float_format = '{:.6f}'.format
import arrow

In [None]:
THIRTY_MINUTES = 30 * 60
TIME_THRESHOLD = THIRTY_MINUTES

## The spec

The spec defines what experiments were done, and over which time ranges. Once the experiment is complete, most of the structure is read back from the data, but we use the spec to validate that it all worked correctly. The spec also contains the ground truth for the legs. Here, we read the spec for the trip to UC Berkeley.

In [None]:
DATASTORE_URL = "http://cardshark.cs.berkeley.edu"
AUTHOR_EMAIL = "shankari@eecs.berkeley.edu"
sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "unimodal_trip_car_bike_mtv_la")
sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "car_scooter_brex_san_jose")
sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "train_bus_ebike_mtv_ucb")
sd_ucb_reroute = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "train_bus_ebike_sm_reroute_mtv_ucb")

## The views

There are two main views for the data - the phone view and the evaluation view. 

### Phone view

In the phone view, the phone is primary, and then there is a tree that you can traverse to get the data that you want. Traversing that tree typically involves nested for loops; here's an example of loading the phone view and traversing it. You can replace the print statements with real code. When you are ready to check this in, please move the function to one of the python modules so that we can invoke it more generally

In [None]:
importlib.reload(eipv)

In [None]:
pv_la = eipv.PhoneView(sd_la)

In [None]:
pv_sj = eipv.PhoneView(sd_sj)

In [None]:
pv_ucb = eipv.PhoneView(sd_ucb)

In [None]:
pv_ucb_reroute = eipv.PhoneView(sd_ucb_reroute)

## Number of detected trips versus ground truth trips

Checks to see how many spurious transitions there were

In [None]:
importlib.reload(eapv)

In [None]:
av_la = eapv.create_analysed_view(pv_la, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")
av_sj = eapv.create_analysed_view(pv_sj, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")
av_ucb = eapv.create_analysed_view(pv_ucb, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")
av_ucb_reroute = eapv.create_analysed_view(pv_ucb_reroute, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")

### Start and end times mismatch

In [None]:
curr_run = av_sj.map()["android"]["ucb-sdb-android-3"]["evaluation_ranges"][4]
print(curr_run.keys())
print([sr["data"]["start_fmt_time"] for sr in av_sj.map()["android"]["ucb-sdb-android-3"]["sensed_trip_ranges"]])
print([sr["data"]["start_fmt_time"] for sr in curr_run["sensed_trip_ranges"]])
embs.find_matching_segments(curr_run["evaluation_trip_ranges"], "trip_id",
                            [sr["data"] for sr in curr_run["sensed_trip_ranges"]])

In [None]:
[1,2,3][1:2]

In [None]:
def get_tradeoff_entries(pv):
    tradeoff_entry_list = []
    for phone_os, phone_map in pv.map().items():
        print(15 * "=*")
        print(phone_os, phone_map.keys())
        for phone_label, phone_detail_map in phone_map.items():
            print(4 * ' ', 15 * "-*")
            print(4 * ' ', phone_label, phone_detail_map.keys())
            if "control" in phone_detail_map["role"]:
                print("Ignoring %s phone %s since they are always on" % (phone_detail_map["role"], phone_label))
                continue
            # this spec does not have any calibration ranges, but evaluation ranges are actually cooler
            for r in phone_detail_map["evaluation_ranges"]:
                print(8 * ' ', 30 * "=")
                print(8 * ' ',r.keys())
                print(8 * ' ',r["trip_id"], r["eval_common_trip_id"], r["eval_role"], len(r["evaluation_trip_ranges"]))
                bcs = r["battery_df"]["battery_level_pct"]
                delta_battery = bcs.iloc[0] - bcs.iloc[-1]
                print("Battery starts at %d, ends at %d, drain = %d" % (bcs.iloc[0], bcs.iloc[-1], delta_battery))
                sensed_trips = len(r["sensed_trip_ranges"])
                matching_trip_map = embs.find_matching_segments(r["evaluation_trip_ranges"], "trip_id", 
                                                                [tr["data"] for tr in r["sensed_trip_ranges"]])
                print(matching_trip_map)
                for trip in r["evaluation_trip_ranges"]:
                    sensed_trip_range = matching_trip_map[trip["trip_id"]]
                    trip["matching_trip_range"] = sensed_trip_range
                    results = embs.get_count_start_end_ts_diff(trip, sensed_trip_range)
                    print("Got results %s" % results)
                    tradeoff_entry = {"phone_os": phone_os, "phone_label": phone_label,
                                      "timeline": pv.spec_details.curr_spec["id"],
                                      "range_id": r["trip_id"],
                                     "run": r["trip_run"], "duration": r["duration"],
                                     "role": r["eval_role_base"], "battery_drain": delta_battery,
                                     "trip_count": sensed_trips,
                                      "trip_id": trip["trip_id"]}
                    tradeoff_entry.update(results)
                    tradeoff_entry_list.append(tradeoff_entry)
    return tradeoff_entry_list

In [None]:
# We are not going to look at battery life at the evaluation trip level; we will end with evaluation range
# since we want to capture the overall drain for the timeline
tradeoff_entries_list = []
tradeoff_entries_list.extend(get_tradeoff_entries(av_la))
tradeoff_entries_list.extend(get_tradeoff_entries(av_sj))
tradeoff_entries_list.extend(get_tradeoff_entries(av_ucb))
tradeoff_entries_list.extend(get_tradeoff_entries(av_ucb_reroute))
tradeoff_df = pd.DataFrame(tradeoff_entries_list)

In [None]:
# Let's merge the reroutes
tradeoff_df.timeline.replace("train_bus_ebike_sm_reroute_mtv_ucb", "train_bus_ebike_mtv_ucb", inplace=True)

In [None]:
r2q_map = {"power_control": 0, "HAMFDC": 1, "MAHFDC": 2, "HAHFDC": 3, "accuracy_control": 4}
q2r_map = {0: "power", 1: "HAMFDC", 2: "MAHFDC", 3: "HAHFDC", 4: "accuracy"}

In [None]:
# Make a number so that can get the plots to come out in order
tradeoff_df["quality"] = tradeoff_df.role.apply(lambda r: r2q_map[r])
tradeoff_df["count_diff"] = tradeoff_df[["count"]] - 1

In [None]:
import itertools

## Trip count analysis

### Scatter plot

In [None]:
ifig, ax = plt.subplots(nrows=1, ncols=1, figsize=(12,4))
errorboxes = []
for key, df in tradeoff_df.query("phone_os == 'android'").groupby(["role", "timeline"]):
    print(key, df)
    tcd = df.trip_count
    bd = df.battery_drain
    print("Plotting rect with params %s, %d, %d" % (str((tcd.min(), bd.min())),
                                                    tcd.max() - tcd.min(),
                                                    bd.max() - bd.min()))
    print(tcd.min(), tcd.max(), tcd.std())
    xerror = np.array([[tcd.min(), tcd.max()]])
    print(xerror.shape)
    ax.errorbar(x=tcd.mean(), y=bd.mean(), xerr=[[tcd.min()], [tcd.max()]], yerr=[[bd.min()], [bd.max()]], label=key)
plt.legend()

### Timeline + trip specific variation

How many sensed trips matched to each ground truth trip?

In [None]:
ifig, ax_array = plt.subplots(nrows=2,ncols=3,figsize=(9,6), sharex=False, sharey=True)
timeline_list = ["train_bus_ebike_mtv_ucb", "car_scooter_brex_san_jose", "unimodal_trip_car_bike_mtv_la"]
for i, tl in enumerate(timeline_list):
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[0][i], column=["count_diff"], by=["quality"])
    ax_array[0][i].set_title(tl)
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[1][i], column=["count_diff"], by=["quality"])
    ax_array[1][i].set_title("")
    # tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[2][i], column=["visit_reports"], by=["quality"])
    # ax_array[2][i].set_title("")

    # print(android_ax_returned.shape, ios_ax_returned.shape)

for i, ax in enumerate(ax_array[0]):
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

for i, ax in enumerate(ax_array[1]):
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

# for ax in ax_array[1]:
#     ax.set_xticklabels(q2r_ios_list[1:])
#     ax.set_xlabel("")

# for ax in ax_array[2]:
#     ax.set_xticklabels(q2r_ios_list[1:])
#     ax.set_xlabel("")

ax_array[0][0].set_ylabel("Difference in trip counts (android)")
ax_array[1][0].set_ylabel("Difference in trip counts (ios)")
# ax_array[2][0].set_ylabel("Difference in visit reports (ios)")
ifig.suptitle("Trip count differences v/s configured quality over multiple timelines")
# ifig.tight_layout()

### Timeline specific variation

In [None]:
def plot_count_with_errors(ax_array, phone_os):
    for i, (tl, trip_gt) in enumerate(timeline_trip_gt.items()):
        ax_array[i].bar(0, trip_gt)
        for q in range(1,4):
            curr_df = tradeoff_df.query("timeline == @tl & phone_os == @phone_os & quality == @q")
            print("%s %s %s values = %s %s %s" % (phone_os, tl, q2r_map[q], curr_df.trip_count.min(), curr_df.trip_count.mean(), curr_df.trip_count.max()))
            lower_error = curr_df.trip_count.mean() - curr_df.trip_count.min()
            upper_error = curr_df.trip_count.max() - curr_df.trip_count.mean()
            ax_array[i].bar(x=q, height=curr_df.trip_count.mean(),
                            yerr=[[lower_error], [upper_error]])
            print("%s %s %s errors = %s %s %s" % (phone_os, tl, q2r_map[q], lower_error, curr_df.trip_count.mean(), upper_error))
            ax_array[i].set_title(tl)

In [None]:
ifig, ax_array = plt.subplots(nrows=2,ncols=3,figsize=(10,5), sharex=False, sharey=True)
timeline_trip_gt = {"train_bus_ebike_mtv_ucb": 3,
                    "car_scooter_brex_san_jose": 2,
                    "unimodal_trip_car_bike_mtv_la": 2}

plot_count_with_errors(ax_array[0], "android")
plot_count_with_errors(ax_array[1], "ios")

for ax in ax_array[0]:
    ax.set_xticks(range(0,4))
    ax.set_xticklabels(["truth"] + [q2r_map[r] for r in range(1,4)])
    ax.set_yticks(range(0,tradeoff_df.trip_count.max(),3))
    
for ax in ax_array[1]:
    ax.set_xticks(range(0,4))
    ax.set_xticklabels(["truth"] + [q2r_map[r] for r in range(1,4)])
    ax.set_yticks(range(0,tradeoff_df.trip_count.max(),3))
    
ax_array[0,0].set_ylabel("nTrips (android)")
ax_array[1,0].set_ylabel("nTrips (ios)")
    
ifig.tight_layout(pad=0.85)

## Start end results

In [None]:
for r, df in tradeoff_df.query("timeline == @tl & phone_os == 'android'").groupby("role"):
    print(r, df.trip_count.mean() , df.trip_count.min(), df.trip_count.max())

The HAHFDC phone ran out of battery on all three runs of the `train_bus_ebike_mtv_ucb` timeline, so the trips never ended. Let's remove those so that they don't obfuscate the values from the other runs. 

In [None]:
out_of_battery_phones = tradeoff_df.query("timeline=='train_bus_ebike_mtv_ucb' & role=='HAHFDC' & trip_id=='berkeley_to_mtv_SF_express_bus_0' & phone_os == 'android'")
for i in out_of_battery_phones.index:
    tradeoff_df.loc[i,"end_diff_mins"] = float('nan')

In [None]:
tradeoff_df.query("timeline=='train_bus_ebike_mtv_ucb' & role=='HAHFDC' & trip_id=='berkeley_to_mtv_SF_express_bus_0' & phone_os == 'android'")

### Overall results

In [None]:
ifig, ax_array = plt.subplots(nrows=1,ncols=4,figsize=(12,3), sharex=False, sharey=True)
tradeoff_df.query("phone_os == 'android'").boxplot(ax = ax_array[0], column=["start_diff_mins"], by=["quality"])
ax_array[0].set_title("start time (android)")
tradeoff_df.query("phone_os == 'android'").boxplot(ax = ax_array[1], column=["end_diff_mins"], by=["quality"])
ax_array[1].set_title("end time (android)")
tradeoff_df.query("phone_os == 'ios'").boxplot(ax = ax_array[2], column=["start_diff_mins"], by=["quality"])
ax_array[2].set_title("start_time (ios)")
tradeoff_df.query("phone_os == 'ios'").boxplot(ax = ax_array[3], column=["end_diff_mins"], by=["quality"])
ax_array[3].set_title("end_time (ios)")

    # print(android_ax_returned.shape, ios_ax_returned.shape)

ax_array[0].set_xticklabels([q2r_map[int(t.get_text())] for t in ax_array[0].get_xticklabels()])
ax_array[1].set_xticklabels([q2r_map[int(t.get_text())] for t in ax_array[1].get_xticklabels()])
ax_array[2].set_xticklabels([q2r_map[int(t.get_text())] for t in ax_array[2].get_xticklabels()])
ax_array[3].set_xticklabels([q2r_map[int(t.get_text())] for t in ax_array[3].get_xticklabels()])

for ax in ax_array:
    ax.set_xlabel("")

ax_array[1].text(0.55,25,"Excluding trips where battery ran out")

ax_array[0].set_ylabel("Diff (mins)")
# ifig.suptitle("Trip start end accuracy v/s configured quality")
ifig.suptitle("")
# ifig.tight_layout(pad=1.7)

### Timeline specific

In [None]:
ifig, ax_array = plt.subplots(nrows=4,ncols=3,figsize=(10,10), sharex=False, sharey=True)
timeline_list = ["train_bus_ebike_mtv_ucb", "car_scooter_brex_san_jose", "unimodal_trip_car_bike_mtv_la"]
for i, tl in enumerate(timeline_list):
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[0][i], column=["start_diff_mins"], by=["quality"])
    ax_array[0][i].set_title(tl)
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[1][i], column=["end_diff_mins"], by=["quality"])
    ax_array[1][i].set_title("")
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[2][i], column=["start_diff_mins"], by=["quality"])
    ax_array[2][i].set_title("")
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[3][i], column=["end_diff_mins"], by=["quality"])
    ax_array[3][i].set_title("")

    # print(android_ax_returned.shape, ios_ax_returned.shape)

for ax in ax_array[0]:
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")
    
for ax in ax_array[1]:
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

ax_array[1,0].text(0.55,25,"Excluding trips where battery ran out")

for ax in ax_array[2]:
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

for ax in ax_array[3]:
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

ax_array[0][0].set_ylabel("Start time diff (android)")
ax_array[1][0].set_ylabel("End time diff (android)")
ax_array[2][0].set_ylabel("Start time diff (ios)")
ax_array[3][0].set_ylabel("End time diff (ios)")
ifig.suptitle("Trip start end accuracy (mins) v/s configured quality over multiple timelines")

# ifig.tight_layout(pad=2.5)

## Outlier checks

We can have unexpected values for both time and count. Unfortunately, there is no overlap between the two (intersection is zero). So we will look at a random sample from both cases

In [None]:
expected_legs = "&".join(["not (trip_id == 'bus trip with e-scooter access_0' & count == 2)",
                            "not (trip_id == 'mtv_to_berkeley_sf_bart_0' & count == 3)"])
count_outliers = tradeoff_df.query("count > 1 & %s" % expected_legs)
count_outliers[["phone_os", "range_id", "trip_id", "run", "role", "count", "start_diff_mins", "end_diff_mins"]].head()

In [None]:
tradeoff_df.query("count < 1 & role == 'HAHFDC'")

In [None]:
time_outliers = tradeoff_df.query("start_diff_mins == 30 | end_diff_mins == 30")
time_outliers[["phone_os", "range_id", "trip_id", "run", "role", "start_diff_mins", "end_diff_mins"]].head()

In [None]:
print(len(time_outliers.index.union(count_outliers.index)), len(time_outliers.index.intersection(count_outliers.index)))

In [None]:
time_outliers.sample(n=3, random_state=1)[["phone_os", "range_id", "trip_id", "run", "role", "count", "start_diff_mins", "end_diff_mins"]]

In [None]:
count_outliers.sample(n=3, random_state=1)[["phone_os", "range_id", "trip_id", "run", "role", "count", "start_diff_mins", "end_diff_mins"]]

In [None]:
tradeoff_df.query("timeline == 'train_bus_ebike_mtv_ucb' & quality == 3 & phone_os == 'ios'").sort_values(by="start_diff_mins")

In [None]:
fmt = lambda ts: arrow.get(ts).to("America/Los_Angeles")

In [None]:
import IPython.display as ipyd

def check_outlier(eval_range, trip_idx, mismatch_key):
    eval_trip_range = eval_range["evaluation_trip_ranges"][trip_idx]
    print("Trip %s, ground truth experiment for metric %s, experiment %s -> %s, trip %s -> %s" %
          (eval_range["trip_id"], mismatch_key,
           fmt(eval_range["start_ts"]), fmt(eval_range["end_ts"]),
           fmt(eval_trip_range["start_ts"]), fmt(eval_trip_range["end_ts"])))
    print([(fmt(sr["start_ts"]), fmt(sr["end_ts"])) for sr in eval_trip_range["matching_trip_range"]["match"]])
    print("**** For entire experiment ***")
    print([(fmt(sr["data"]["start_ts"]), fmt(sr["data"]["end_ts"])) for sr in eval_range["sensed_trip_ranges"]])
    ipyd.display(eval_range["transition_df"][["transition", "fmt_time"]])
    if mismatch_key == "end_ts":
        # print("Transitions after trip end")
        # print(eval_range["transition_df"].query("ts > %s" % eval_trip_range["end_ts"])[["transition", "fmt_time"]])
        return ezpv.display_map_detail_from_df(eval_trip_range["location_df"])
    else:
        return ezpv.display_map_detail_from_df(eval_trip_range["location_df"])

#####  MAHFDC is just terrible

It looks like with MAHFDC, we essentially get no trip ends on android. Let's investigate these a bit further.
- run 0: trip never ended: trip actually ended just before next trip started `15:01:26`. And then next trip had geofence exit, but we didn't detect it because it never ended, so we didn't create a sensed range for it.
- run 1: trip ended but after 30 mins: similar behavior; trip ended just before next trip started `15:49:39`.

In [None]:
tradeoff_df.query("phone_os == 'android' & role == 'MAHFDC' & timeline == 'car_scooter_brex_san_jose'")[["range_id", "trip_id", "run", "role", "count", "start_diff_mins", "end_diff_mins"]]

In [None]:
FMT_STRING = "HH:mm:SS"
for t in av_sj.map()["android"]["ucb-sdb-android-3"]["evaluation_ranges"][3]["evaluation_trip_ranges"]:
    print(sd_sj.fmt(t["start_ts"], FMT_STRING), "->", sd_sj.fmt(t["end_ts"], FMT_STRING))
av_sj.map()["android"]["ucb-sdb-android-3"]["evaluation_ranges"][3]["transition_df"]

In [None]:
check_outlier(av_sj.map()["android"]["ucb-sdb-android-3"]["evaluation_ranges"][4], 0, "end_ts")

In [None]:
check_outlier(av_sj.map()["android"]["ucb-sdb-android-3"]["evaluation_ranges"][4], 1, "end_ts")

In [None]:
FMT_STRING = "HH:mm:SS"
for t in pv_sj.map()["android"]["ucb-sdb-android-3"]["evaluation_ranges"][4]["evaluation_trip_ranges"]:
    print(sd_sj.fmt(t["start_ts"], FMT_STRING), "->", sd_sj.fmt(t["end_ts"], FMT_STRING))
pv_sj.map()["android"]["ucb-sdb-android-3"]["evaluation_ranges"][4]["transition_df"]

##### HAHFDC detection errors on the second run 

In [None]:
# 155 	38.000000 	1 	44696.648613 	5.613607 	ucb-sdb-ios-2 	ios 	HAHFDC v/s HAMFDC:HAHFDC_1 	HAHFDC 	1 	12.156766 	train_bus_ebike_mtv_ucb 	3 	berkeley_to_mtv_SF_express_bus_0 	3 	0
# 153 	38.000000 	1 	44696.648613 	0.287331 	ucb-sdb-ios-2 	ios 	HAHFDC v/s HAMFDC:HAHFDC_1 	HAHFDC 	1 	14.105514 	train_bus_ebike_mtv_ucb 	3 	mtv_to_berkeley_sf_bart_0 	3 	0
    
check_outlier(av_ucb_reroute.map()["ios"]["ucb-sdb-ios-2"]["evaluation_ranges"][1], 0, "start_ts")

In [None]:
check_outlier(av_ucb_reroute.map()["ios"]["ucb-sdb-ios-2"]["evaluation_ranges"][1], 2, "start_ts")

##### Visit detection kicked in almost at the end of the trip

In [None]:
# 44 	ios 	suburb_city_driving_weekend_0 	1 	HAMFDC 	0 	30.000000 	30.000000
check_outlier(av_la.map()["ios"]["ucb-sdb-ios-3"]["evaluation_ranges"][4], 0, "start_ts")

##### Trip end never detected

Trip ended at 14:11, experiment ended at 14:45. No stopped_moving for the last trip

In [None]:
# 65 	android 	bus trip with e-scooter access_0 	2 	HAMFDC 	1 	3.632239 	30.000000
check_outlier(av_sj.map()["android"]["ucb-sdb-android-3"]["evaluation_ranges"][2], 1, "end_ts")

##### Trip end detection errors on iOS
Original experiment, explanation for the outliers on the HAHFDC and MAHFDC first runs to San Jose
- HAHFDC: Trip end detected 1.5 hours after real end, but before next trip start
- MAHFDC: Trip end detected 5 hours after real end, at the end of the next trip
- MAHFDC: Clearly this was not even detected as a separate trip, so this is correct. There was a spurious trip from `17:42:22` - `17:44:22` which ended up matching this. But clearly because of the missing trip end detection, both the previous trip and this one were incorrect. You can click on the points at the Mountain View library to confirm when the trip ended.

In [None]:
fig = bre.Figure()
fig.add_subplot(1,3,1).add_child(check_outlier(av_sj.map()["ios"]["ucb-sdb-ios-2"]["evaluation_ranges"][0], 0, "end_ts"))
fig.add_subplot(1,3,2).add_child(check_outlier(av_sj.map()["ios"]["ucb-sdb-ios-3"]["evaluation_ranges"][0], 0, "end_ts"))
fig.add_subplot(1,3,3).add_child(check_outlier(av_sj.map()["ios"]["ucb-sdb-ios-3"]["evaluation_ranges"][0], 1, "start_ts"))
# check_outlier(pv_sj.map()["ios"]["ucb-sdb-ios-2"]["evaluation_ranges"][0], 0, "end_ts")

##### No geofence exit ever detected

On the middle trip of the second round of data collection to the San Jose library, we got no geofence exits. The entire list of transitions is 

```
transition                          fmt_time
3     T_VISIT_ENDED  2019-08-06T11:29:20.573817-07:00
6   T_VISIT_STARTED  2019-08-06T11:29:20.911773-07:00
8     T_VISIT_ENDED  2019-08-06T11:35:38.250980-07:00
9   T_VISIT_STARTED  2019-08-06T12:00:05.445936-07:00
12     T_TRIP_ENDED  2019-08-06T12:00:07.093790-07:00
15    T_VISIT_ENDED  2019-08-06T15:59:13.998068-07:00
18  T_VISIT_STARTED  2019-08-06T17:12:38.808743-07:00
21     T_TRIP_ENDED  2019-08-06T17:12:40.504285-07:00
```

We did get visit notifications, so we did track location points (albeit after a long time), and we did get the trip end notifications, but we have no sensed trips. Had to handle this in the code as well

In [None]:
check_outlier(av_sj.map()["ios"]["ucb-sdb-ios-2"]["evaluation_ranges"][4], 0, "start_ts")

##### No geofence exit ever detected

On the middle trip of the second round of data collection to the San Jose library, we got no geofence exits.
We did get visit notifications, so we did track location points (albeit after a long time), and we did get the trip end notifications, but we have no sensed trips. Had to handle this in the code as well

In [None]:
# 81 	ios 	bus trip with e-scooter access_0 	1 	HAHFDC 	0 	30.000000 	30.000000
check_outlier(av_sj.map()["ios"]["ucb-sdb-ios-2"]["evaluation_ranges"][4], 1, "end_ts")

### 7 mapped trips for one

This is essentially from the time that I wandered around looking for the bikeshare bike. This raises the question of whether I should filter out the points within the polygon in this case too. Overall, I think not. The only part within the polygon that we don't guarantee is the ground truth trajectory. We still do have the ground truth of the trip/section start end, and there really is no reason why we should have had so many "trips" when I was walking around. I certainly didn't wait for too long while walking and this was not semantically a "trip" by any stretch of the imagination.

In [None]:
# 113 	android 	berkeley_to_mtv_SF_express_bus_0 	2 	HAMFDC 	7 	2.528077 	3.356611
check_outlier(av_ucb.map()["android"]["ucb-sdb-android-3"]["evaluation_ranges"][2], 2, "end_ts")

### Trip split into two in medium accuracy *only*

Actual trip ends at `14:21`. In medium accuracy, detected trips were `14:12:15 -> 14:17:33` and  `14:22:14 -> 14:24:15`. This was after we reached the destination, but there is a large gap because we basically got no points for a large part of the trip. This seems correct - it looks like iOS is just prematurely detecting the trip end in the MA case.

In [None]:
# 127 	ios 	walk_urban_university_0 	1 	MAHFDC 	2 	4.002549 	2.352913
fig = bre.Figure()

def compare_med_high_accuracy():
    trip_idx = 1
    mismatch_key = "end_ts"
    ha_range = av_ucb.map()["ios"]["ucb-sdb-ios-2"]["evaluation_ranges"][1]
    ha_trip_range = ha_range["evaluation_trip_ranges"][trip_idx]
    eval_range = av_ucb.map()["ios"]["ucb-sdb-ios-3"]["evaluation_ranges"][1]
    eval_trip_range = eval_range["evaluation_trip_ranges"][trip_idx]
    print("Trip %s, ground truth experiment for metric %s, %s, trip %s, high accuracy %s" %
          (eval_range["trip_id"], mismatch_key,
           fmt(eval_range[mismatch_key]), fmt(eval_trip_range[mismatch_key]), fmt(ha_trip_range[mismatch_key])))
    print(eval_trip_range["transition_df"][["transition", "fmt_time"]])
    print("**** Expanded ***")
    print(eval_range["transition_df"].query("%s < ts < %s" %
        ((eval_trip_range["end_ts"] - 30*60), (eval_trip_range["end_ts"] + 30*60)))[["transition", "fmt_time"]])
    fig = bre.Figure()
    fig.add_subplot(1,2,1).add_child(ezpv.display_map_detail_from_df(ha_trip_range["location_df"]))
    fig.add_subplot(1,2,2).add_child(ezpv.display_map_detail_from_df(eval_trip_range["location_df"]))
    return fig
compare_med_high_accuracy()

In [None]:
[{'start_ts': fmt(1564089135.368705), 'end_ts': fmt(1564089453.8783798)},
{'start_ts': fmt(1564089734.305933), 'end_ts': fmt(1564089855.8683748)}]

### We just didn't detect any trip ends in the middle

We only detected a trip end at the Mountain View station. This is arguably more correct than the multiple trips that we get with a dwell time.

In [None]:
# 120 	ios 	mtv_to_berkeley_sf_bart_0 	2 	HAHFDC 	2 	3.175024 	1.046759
check_outlier(av_ucb.map()["ios"]["ucb-sdb-ios-2"]["evaluation_ranges"][2], 0, "end_ts")