## Set up the dependencies

In [None]:
# for reading and validating data
import emeval.input.spec_details as eisd
import emeval.input.phone_view as eipv
import emeval.input.eval_view as eiev

In [None]:
# Visualization helpers
import emeval.viz.phone_view as ezpv
import emeval.viz.eval_view as ezev

In [None]:
# For plots
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
%matplotlib inline

In [None]:
# For maps
import folium
import branca.element as bre

In [None]:
# For easier debugging while working on modules
import importlib

In [None]:
import pandas as pd
pd.options.display.float_format = '{:.6f}'.format
import arrow

In [None]:
THIRTY_MINUTES = 30 * 60

## The spec

The spec defines what experiments were done, and over which time ranges. Once the experiment is complete, most of the structure is read back from the data, but we use the spec to validate that it all worked correctly. The spec also contains the ground truth for the legs. Here, we read the spec for the trip to UC Berkeley.

In [None]:
DATASTORE_URL = "http://cardshark.cs.berkeley.edu"
AUTHOR_EMAIL = "shankari@eecs.berkeley.edu"
sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "unimodal_trip_car_bike_mtv_la")
sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "car_scooter_brex_san_jose")
sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "train_bus_ebike_mtv_ucb")

## The views

There are two main views for the data - the phone view and the evaluation view. 

### Phone view

In the phone view, the phone is primary, and then there is a tree that you can traverse to get the data that you want. Traversing that tree typically involves nested for loops; here's an example of loading the phone view and traversing it. You can replace the print statements with real code. When you are ready to check this in, please move the function to one of the python modules so that we can invoke it more generally

In [None]:
importlib.reload(eipv)

In [None]:
pv_la = eipv.PhoneView(sd_la)

In [None]:
pv_sj = eipv.PhoneView(sd_sj)

In [None]:
pv_ucb = eipv.PhoneView(sd_ucb)

## Number of detected trips versus ground truth trips

Checks to see how many spurious transitions there were

In [None]:
import arrow

In [None]:
def find_ranges(transition_df, start_transition, end_transition):
    """
    Return ranges formed by alternating start and end transition pairs.
    Unexpected transitions are ignored
    So S, S, E, E, E, E -> 1
    S, E, S, E, S, S -> 2
    E, S, E, S -> 1
    """
    start_ts = None
    range_list = []
    for t in transition_df.to_dict(orient='records'):
        # print("Considering transition %s" % t)
        if start_ts is None and t["transition"] == start_transition:
            start_ts = t["ts"]
        elif start_ts is not None and t["transition"] == end_transition:
            range_list.append({"start_ts": start_ts, "end_ts": t["ts"]})
            start_ts = None
    # print("Returning %s" % range_list)
    return range_list

In [None]:
def fill_sensed_trip_ranges(pv):
    for phone_os, phone_map in pv.map().items():
        print(15 * "=*")
        print(phone_os, phone_map.keys())
        for phone_label, phone_detail_map in phone_map.items():
            print(4 * ' ', 15 * "-*")
            print(4 * ' ', phone_label, phone_detail_map["role"], phone_detail_map.keys())
            if "control" in phone_detail_map["role"]:
                print("Ignoring %s phone %s since they are always on" % (phone_detail_map["role"], phone_label))
                continue
            # this spec does not have any calibration ranges, but evaluation ranges are actually cooler
            for r in phone_detail_map["evaluation_ranges"]:
                print(8 * ' ', 30 * "=")
                print(8 * ' ',r.keys())
                print(8 * ' ',r["trip_id"], r["eval_common_trip_id"], r["eval_role"], len(r["evaluation_trip_ranges"]))
                # print(r["transition_df"][["transition", "fmt_time"]])
                if phone_os == "android":
                    query_str = "transition == 'local.transition.exited_geofence' | transition == 'local.transition.stopped_moving'"
                else:
                    assert phone_os == "ios"
                    query_str = "transition == 'T_EXITED_GEOFENCE' | transition == 'T_VISIT_ENDED' | transition == 'T_VISIT_STARTED' | transition == 'T_TRIP_ENDED'"

                sensed_transitions = r["transition_df"].query(query_str)
                print(sensed_transitions[["transition", "fmt_time"]])
                if phone_os == "android":
                    r["sensed_trip_ranges"] = find_ranges(sensed_transitions, "local.transition.exited_geofence", "local.transition.stopped_moving")
                    r["visit_sensed_trip_ranges"] = []
                else:
                    assert phone_os == "ios"
                    r["sensed_trip_ranges"] = find_ranges(sensed_transitions, "T_EXITED_GEOFENCE", "T_TRIP_ENDED")
                    r["visit_sensed_trip_ranges"] = find_ranges(sensed_transitions, "T_VISIT_ENDED", "T_VISIT_STARTED")

                ground_truth_ranges = r["evaluation_trip_ranges"]
                # print([(r["start_ts"], arrow.get(r["start_ts"]).to("America/Los_Angeles"), r["end_ts"], arrow.get(r["end_ts"]).to("America/Los_Angeles")) for r in ground_truth_ranges])
                print(8 * ' ', len(r["sensed_trip_ranges"]), len(r["visit_sensed_trip_ranges"]), len(ground_truth_ranges))

In [None]:
fill_sensed_trip_ranges(pv_la)
fill_sensed_trip_ranges(pv_sj)
fill_sensed_trip_ranges(pv_ucb)

### Start and end times mismatch

In [None]:
def find_closest_trip_idx(gt, sensed_trips, key):
    ts_diffs = [abs(gt[key] - st[key]) for st in sensed_trips]
    min_diff = min(ts_diffs)
    if min_diff > THIRTY_MINUTES:
        # too far out, maybe this gt_trip doesn't have any matching trip
        return None
    else:
        min_index = ts_diffs.index(min_diff)
        return min_index

In [None]:
def find_matching_trips(gt_trips, sensed_trips):
    matching_trips_map = {}
    if len(gt_trips) == len(sensed_trips):
        print("Found matching lengths %d = %d" % (len(gt_trips), len(sensed_trips)))
        for gt, st in zip(gt_trips, sensed_trips):
            matching_trips_map[gt["trip_id"]] = {"type": "both", "match": [st]}
    else:
        print("Found mismatched lengths %d != %d, need to use more complex matching" % (len(gt_trips), len(sensed_trips)))
        for gt in gt_trips:
            start_trip_idx = find_closest_trip_idx(gt, sensed_trips, "start_ts")
            end_trip_idx = find_closest_trip_idx(gt, sensed_trips, "end_ts")
            if start_trip_idx is not None and end_trip_idx is not None:
                # we found both start and end within a reasonable timeframe
                matching_trips_map[gt["trip_id"]] = {"type": "both", "match": sensed_trips[start_trip_idx:end_trip_idx+1]}
            elif start_trip_idx is not None:
                # we find a trip that starts pretty close by but ends super early, let's pick it anyway
                assert end_trip_idx is None
                matching_trips_map[gt["trip_id"]] = {"type": "start_ts", "match": [sensed_trips[start_trip_idx]]}
            elif end_trip_idx is not None:
                # we find a trip that ends pretty close by but starts super early/late, let's pick it anyway
                assert start_trip_idx is None
                matching_trips_map[gt["trip_id"]] = {"type": "end_ts", "match": [sensed_trips[end_trip_idx]]}
            else:
                # we find nothing that is close to either the start or the end; no matching trips
                assert start_trip_idx is None and end_trip_idx is None
                matching_trips_map[gt["trip_id"]] = {"type": "none", "match": []}

    return matching_trips_map

In [None]:
curr_run = pv_la.map()["android"]["ucb-sdb-android-2"]["evaluation_ranges"][0]
print(curr_run.keys())
find_matching_trips(curr_run["evaluation_trip_ranges"], curr_run["sensed_trip_ranges"])

In [None]:
[1,2,3][1:2]

In [None]:
def get_tradeoff_entries(pv):
    tradeoff_entry_list = []
    for phone_os, phone_map in pv.map().items():
        print(15 * "=*")
        print(phone_os, phone_map.keys())
        for phone_label, phone_detail_map in phone_map.items():
            print(4 * ' ', 15 * "-*")
            print(4 * ' ', phone_label, phone_detail_map.keys())
            if "control" in phone_detail_map["role"]:
                print("Ignoring %s phone %s since they are always on" % (phone_detail_map["role"], phone_label))
                continue
            # this spec does not have any calibration ranges, but evaluation ranges are actually cooler
            for r in phone_detail_map["evaluation_ranges"]:
                print(8 * ' ', 30 * "=")
                print(8 * ' ',r.keys())
                print(8 * ' ',r["trip_id"], r["eval_common_trip_id"], r["eval_role"], len(r["evaluation_trip_ranges"]))
                bcs = r["battery_df"]["battery_level_pct"]
                delta_battery = bcs.iloc[0] - bcs.iloc[-1]
                print("Battery starts at %d, ends at %d, drain = %d" % (bcs.iloc[0], bcs.iloc[-1], delta_battery))
                delta_trips = abs(len(r["evaluation_trip_ranges"]) - len(r["sensed_trip_ranges"]))
                delta_visit_reports = abs(len(r["evaluation_trip_ranges"]) - len(r["visit_sensed_trip_ranges"]))
                matching_trip_map = find_matching_trips(r["evaluation_trip_ranges"], r["sensed_trip_ranges"])
                print(matching_trip_map)
                for trip in r["evaluation_trip_ranges"]:
                    sensed_trip_range = matching_trip_map[trip["trip_id"]]
                    if len(sensed_trip_range["match"]) > 0:
                        if sensed_trip_range["type"] == "both" or sensed_trip_range["type"] == "start_ts":
                            start_ts_diff = abs(trip["start_ts"] - sensed_trip_range["match"][0]["start_ts"])
                        else:
                            start_ts_diff = THIRTY_MINUTES
                            
                        if sensed_trip_range["type"] == "both" or sensed_trip_range["type"] == "end_ts":
                            end_ts_diff = abs(trip["end_ts"] - sensed_trip_range["match"][-1]["end_ts"])
                        else:
                            end_ts_diff = THIRTY_MINUTES
                    else:
                        start_ts_diff = THIRTY_MINUTES
                        end_ts_diff = THIRTY_MINUTES
                    tradeoff_entry = {"phone_os": phone_os, "phone_label": phone_label,
                                      "timeline": pv.spec_details.curr_spec["id"],
                                     "run": r["trip_run"], "duration": r["duration"],
                                     "role": r["eval_role_base"], "battery_drain": delta_battery,
                                     "trip_count_diff": delta_trips, "visit_report_diff": delta_visit_reports,
                                      "trip_id": trip["trip_id"],
                                     "start_ts_diff": start_ts_diff / 60, "end_ts_diff": end_ts_diff / 60}
                    tradeoff_entry_list.append(tradeoff_entry)
    return tradeoff_entry_list

In [None]:
# We are not going to look at battery life at the evaluation trip level; we will end with evaluation range
# since we want to capture the overall drain for the timeline
tradeoff_entries_list = []
tradeoff_entries_list.extend(get_tradeoff_entries(pv_la))
tradeoff_entries_list.extend(get_tradeoff_entries(pv_sj))
tradeoff_entries_list.extend(get_tradeoff_entries(pv_ucb))
tradeoff_df = pd.DataFrame(tradeoff_entries_list)

In [None]:
tradeoff_df[tradeoff_df.phone_os == "ios"]

In [None]:
ifig, ax = plt.subplots(nrows=1, ncols=1)
errorboxes = []
for key, df in tradeoff_df.groupby("role"):
    tcd = df.trip_count_diff
    bd = df.battery_drain
    print("Plotting rect with params %s, %d, %d" % (str((tcd.min(), bd.min())),
                                                    tcd.max() - tcd.min(),
                                                    bd.max() - bd.min()))
    rect = Rectangle((tcd.min(), bd.min()), tcd.max() - tcd.min(), bd.max()-bd.min())
    errorboxes.append(rect)
    

# Create patch collection with specified colour/alpha
pc = PatchCollection(errorboxes, facecolor="green", alpha=0.75,
                        edgecolor="red")

# Add collection to axes
ax.add_collection(pc)
ax.set_xlim(-2, 10)
ax.set_ylim(-5, 30)

In [None]:
r2q_map = {"power_control": 0, "HAMFDC": 1, "MAHFDC": 1, "HAHFDC": 2, "accuracy_control": 3}
q2r_android_list = ["power_control", "HAMFDC", "HAHFDC", "accuracy_control"]
q2r_ios_list = ["power_control", "MAHFDC", "HAHFDC", "accuracy_control"]

In [None]:
# Make a number so that can get the plots to come out in order
tradeoff_df["quality"] = tradeoff_df.role.apply(lambda r: r2q_map[r])

In [None]:
tradeoff_df.groupby("quality")

In [None]:
ifig, ax_array = plt.subplots(nrows=3,ncols=3,figsize=(16,16), sharex=False, sharey=True)
timeline_list = ["train_bus_ebike_mtv_ucb", "car_scooter_brex_san_jose", "unimodal_trip_car_bike_mtv_la"]
for i, tl in enumerate(timeline_list):
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[0][i], column=["trip_count_diff"], by=["quality"], showbox=False, whis="range")
    ax_array[0][i].set_title(tl)
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[1][i], column=["trip_count_diff"], by=["quality"], showbox=False, whis="range")
    ax_array[1][i].set_title("")
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[2][i], column=["visit_report_diff"], by=["quality"], showbox=False, whis="range")
    ax_array[2][i].set_title("")

    # print(android_ax_returned.shape, ios_ax_returned.shape)

for ax in ax_array[0]:
    ax.set_xticklabels(q2r_android_list[1:])
    ax.set_xlabel("")

for ax in ax_array[1]:
    ax.set_xticklabels(q2r_ios_list[1:])
    ax.set_xlabel("")

for ax in ax_array[2]:
    ax.set_xticklabels(q2r_ios_list[1:])
    ax.set_xlabel("")

ax_array[0][0].set_ylabel("Difference in trip counts (android)")
ax_array[1][0].set_ylabel("Difference in trip counts (ios)")
ax_array[2][0].set_ylabel("Difference in visit reports (ios)")
ifig.suptitle("Trip start/end detection v/s configured quality over multiple timelines")
# ifig.tight_layout()

In [None]:
ifig, ax_array = plt.subplots(nrows=4,ncols=3,figsize=(16,16), sharex=False, sharey=True)
timeline_list = ["train_bus_ebike_mtv_ucb", "car_scooter_brex_san_jose", "unimodal_trip_car_bike_mtv_la"]
for i, tl in enumerate(timeline_list):
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[0][i], column=["start_ts_diff"], by=["quality"], showbox=False)
    ax_array[0][i].set_title(tl)
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[1][i], column=["end_ts_diff"], by=["quality"], showbox=False)
    ax_array[1][i].set_title("")
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[2][i], column=["start_ts_diff"], by=["quality"], showbox=False)
    ax_array[2][i].set_title("")
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[3][i], column=["end_ts_diff"], by=["quality"], showbox=False)
    ax_array[3][i].set_title("")

    # print(android_ax_returned.shape, ios_ax_returned.shape)

for ax in ax_array[0]:
    ax.set_xticklabels(q2r_android_list[1:])
    ax.set_xlabel("")
    
for ax in ax_array[1]:
    ax.set_xticklabels(q2r_android_list[1:])
    ax.set_xlabel("")


for ax in ax_array[2]:
    ax.set_xticklabels(q2r_ios_list[1:])
    ax.set_xlabel("")

for ax in ax_array[3]:
    ax.set_xticklabels(q2r_ios_list[1:])
    ax.set_xlabel("")

ax_array[0][0].set_ylabel("Start time diff mins (android)")
ax_array[1][0].set_ylabel("End time diff mins (android)")
ax_array[2][0].set_ylabel("Start time diff mins (ios)")
ax_array[3][0].set_ylabel("End time diff mins (ios)")
ifig.suptitle("Trip start end accuracy v/s configured quality over multiple timelines")
# ifig.tight_layout()