## Set up the dependencies

In [None]:
# for reading and validating data
import emeval.input.spec_details as eisd
import emeval.input.phone_view as eipv
import emeval.input.eval_view as eiev

In [None]:
# Visualization helpers
import emeval.viz.phone_view as ezpv
import emeval.viz.eval_view as ezev

In [None]:
# For plots
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
%matplotlib inline

In [None]:
# For maps
import folium
import branca.element as bre

In [None]:
# For easier debugging while working on modules
import importlib

In [None]:
import pandas as pd
pd.options.display.float_format = '{:.6f}'.format
import arrow
import numpy as np

In [None]:
THIRTY_MINUTES = 30 * 60
TEN_MINUTES = 10 * 60

## The spec

The spec defines what experiments were done, and over which time ranges. Once the experiment is complete, most of the structure is read back from the data, but we use the spec to validate that it all worked correctly. The spec also contains the ground truth for the legs. Here, we read the spec for the trip to UC Berkeley.

In [None]:
DATASTORE_URL = "http://cardshark.cs.berkeley.edu"
AUTHOR_EMAIL = "shankari@eecs.berkeley.edu"
sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "unimodal_trip_car_bike_mtv_la")
sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "car_scooter_brex_san_jose")
sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "train_bus_ebike_mtv_ucb")

## The views

There are two main views for the data - the phone view and the evaluation view. 

### Phone view

In the phone view, the phone is primary, and then there is a tree that you can traverse to get the data that you want. Traversing that tree typically involves nested for loops; here's an example of loading the phone view and traversing it. You can replace the print statements with real code. When you are ready to check this in, please move the function to one of the python modules so that we can invoke it more generally

In [None]:
importlib.reload(eipv)

In [None]:
pv_la = eipv.PhoneView(sd_la)

In [None]:
pv_sj = eipv.PhoneView(sd_sj)

In [None]:
pv_ucb = eipv.PhoneView(sd_ucb)

## Number of transitions in a section

We should ideally have only one transition in every TRAVEL section

In [None]:
def get_transition_mask_android(df):
    return df.zzbhB.diff() > 0

In [None]:
def get_transition_mask_ios(df):
    if len(df) == 0:
        return np.array([])
    
    ret_list = [False]
    valid_modes = ["walking", "cycling", "running", "automotive"]
    for row in np.diff(df[valid_modes], axis=0):
        ret_list.append(row.any())
    ret_array = np.array(ret_list)
    print(df.shape, ret_array.shape, ret_array)
    return ret_array

In [None]:
def get_count_start_end_diff(sr, ma_df, jba_df, transition_mask_fn):
    count = np.count_nonzero(ma_df[transition_mask_fn(ma_df)] > 0)
    ma_transition_points = ma_df[transition_mask_fn(ma_df)]
    if len(ma_transition_points) > 0:
        start_ts_diff = abs(sr["start_ts"] - ma_transition_points.iloc[0].ts)
        end_ts_diff = abs(sr["end_ts"] - ma_transition_points.iloc[-1].ts)
    else:
        start_ts_diff = THIRTY_MINUTES
        end_ts_diff = THIRTY_MINUTES

    jba_transition_points = jba_df[transition_mask_fn(jba_df)]
    if len(jba_transition_points) > 0:
        jba_start_ts_diff = (jba_transition_points.ts - sr["start_ts"]).abs().min()
        jba_end_ts_diff = (jba_transition_points.ts - sr["end_ts"]).abs().min()
    else:
        jba_start_ts_diff = THIRTY_MINUTES
        jba_end_ts_diff = THIRTY_MINUTES
    return {
        "count": count,
        "start_ts_diff": start_ts_diff / 60,
        "end_ts_diff": end_ts_diff / 60,
        "expanded_start_ts_diff": jba_start_ts_diff / 60,
        "expanded_end_ts_diff": jba_end_ts_diff / 60
    }

In [None]:
def get_tradeoff_entries(pv):
    tradeoff_entry_list = []
    for phone_os, phone_map in pv.map().items():
        print(15 * "=*")
        print(phone_os, phone_map.keys())
        for phone_label, phone_detail_map in phone_map.items():
            print(4 * ' ', 15 * "-*")
            print(4 * ' ', phone_label, phone_detail_map.keys())
            if "control" in phone_detail_map["role"]:
                print("Ignoring %s phone %s since they are always on" % (phone_detail_map["role"], phone_label))
                continue
            # this spec does not have any calibration ranges, but evaluation ranges are actually cooler
            for r in phone_detail_map["evaluation_ranges"]:
                print(8 * ' ', 30 * "=")
                print(8 * ' ',r.keys())
                print(8 * ' ',r["trip_id"], r["eval_common_trip_id"], r["eval_role"], len(r["evaluation_trip_ranges"]))
                bcs = r["battery_df"]["battery_level_pct"]
                delta_battery = bcs.iloc[0] - bcs.iloc[-1]
                print("Battery starts at %d, ends at %d, drain = %d" % (bcs.iloc[0], bcs.iloc[-1], delta_battery))
                for tr in r["evaluation_trip_ranges"]:
                    for section in tr["evaluation_section_ranges"]:
                        ma_df = section["motion_activity_df"]
                        # Expand the range in order to allow for detection beyond ground truth bounds
                        jba_df = r["motion_activity_df"].query("ts > %s & ts < %s" % (sr["start_ts"] - THIRTY_MINUTES, sr["end_ts"] + THIRTY_MINUTES))
                        if phone_os == "android":
                            valid_entries_query = "zzbhB not in [3,4,5]"
                            csed_df_entry = get_count_start_end_diff(sr,
                                                ma_df.query(valid_entries_query),
                                                jba_df.query(valid_entries_query),
                                                get_transition_mask_android)
                        else:
                            valid_entries_query = "automotive == True | cycling == True | running == True | walking == True"
                            csed_df_entry = get_count_start_end_diff(sr,
                                                ma_df.query(valid_entries_query),
                                                jba_df.query(valid_entries_query),
                                                get_transition_mask_ios)                                  
                        tradeoff_entry = {"phone_os": phone_os, "phone_label": phone_label,
                                      "timeline": pv.spec_details.curr_spec["id"],
                                     "run": r["trip_run"], "duration": r["duration"],
                                     "role": r["eval_role_base"], "battery_drain": delta_battery,
                                      "trip_id": tr["trip_id"], "section_id": section["trip_id"]}
                        tradeoff_entry.update(csed_df_entry)
                        tradeoff_entry_list.append(tradeoff_entry)

    return tradeoff_entry_list

In [None]:
r = pv_la.map()["ios"]["ucb-sdb-ios-3"]["evaluation_ranges"][0]
print(r.keys())
sr = r["evaluation_trip_ranges"][0]["evaluation_section_ranges"][1]
print(sr["trip_id"], r.keys())
ma_df = sr["motion_activity_df"]
jba_df = r["motion_activity_df"].query("ts > %s & ts < %s" % (sr["start_ts"] - TEN_MINUTES, sr["end_ts"] + TEN_MINUTES))
print(get_count_start_end_diff(sr, ma_df, jba_df, get_transition_mask_ios))
valid_query = "automotive == True | cycling == True | running == True | walking == True"
print(ma_df.columns)
print(len(ma_df.query(valid_query)))
print(jba_df.columns)
print(len(jba_df.query(valid_query)))
print(get_count_start_end_diff(sr, ma_df.query(valid_query), jba_df.query(valid_query), get_transition_mask_ios))

In [None]:
ma_df.query(valid_query)

In [None]:
ret_list = []
for row in np.diff(ma_df.query(valid_query)[["walking", "cycling", "running", "automotive"]], axis=0):
    ret_list.append(row.any())
ret_list

In [None]:
(transition_points.ts - 1564274403.318182).min()

In [None]:
# We are not going to look at battery life at the evaluation trip level; we will end with evaluation range
# since we want to capture the overall drain for the timeline
tradeoff_entries_list = []
tradeoff_entries_list.extend(get_tradeoff_entries(pv_la))
tradeoff_entries_list.extend(get_tradeoff_entries(pv_sj))
tradeoff_entries_list.extend(get_tradeoff_entries(pv_ucb))
tradeoff_df = pd.DataFrame(tradeoff_entries_list)

In [None]:
tradeoff_df.query("phone_os == 'ios' & timeline == 'train_bus_ebike_mtv_ucb'")

In [None]:
r2q_map = {"power_control": 0, "HAMFDC": 1, "MAHFDC": 1, "HAHFDC": 2, "accuracy_control": 3}
q2r_android_list = ["power_control", "HAMFDC", "HAHFDC", "accuracy_control"]
q2r_ios_list = ["power_control", "MAHFDC", "HAHFDC", "accuracy_control"]

In [None]:
# Make a number so that can get the plots to come out in order
tradeoff_df["quality"] = tradeoff_df.role.apply(lambda r: r2q_map[r])

In [None]:
ifig, ax_array = plt.subplots(nrows=4,ncols=3,figsize=(16,16), sharex=False, sharey=True)
timeline_list = ["train_bus_ebike_mtv_ucb", "car_scooter_brex_san_jose", "unimodal_trip_car_bike_mtv_la"]
for i, tl in enumerate(timeline_list):
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[0][i], column=["start_ts_diff"], by=["quality"])
    ax_array[0][i].set_title(tl)
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[1][i], column=["end_ts_diff"], by=["quality"])
    ax_array[1][i].set_title("")
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[2][i], column=["start_ts_diff"], by=["quality"], showbox=False, whis="range")
    ax_array[2][i].set_title("")
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[3][i], column=["end_ts_diff"], by=["quality"], showbox=False, whis="range")
    ax_array[3][i].set_title("")

    # print(android_ax_returned.shape, ios_ax_returned.shape)

for ax in ax_array[0]:
    ax.set_xticklabels(q2r_android_list[1:])
    ax.set_xlabel("")

for ax in ax_array[1]:
    ax.set_xticklabels(q2r_android_list[1:])
    ax.set_xlabel("")

for ax in ax_array[2]:
    ax.set_xticklabels(q2r_ios_list[1:])
    ax.set_xlabel("")

for ax in ax_array[3]:
    ax.set_xticklabels(q2r_android_list[1:])
    ax.set_xlabel("")    

ax_array[0][0].set_ylabel("Difference in secs between section start and first valid transition (android)")
ax_array[1][0].set_ylabel("Difference in secs between section end and last valid transition (android)")
ax_array[2][0].set_ylabel("Difference in secs between section start and first valid transition (ios)")
ax_array[3][0].set_ylabel("Difference in secs between section end and last valid transition (ios)")
ifig.suptitle("Section start/end accuracy v/s quality over multiple timelines")
# ifig.tight_layout()

In [None]:
ifig, ax_array = plt.subplots(nrows=4,ncols=3,figsize=(16,16), sharex=False, sharey=True)
timeline_list = ["train_bus_ebike_mtv_ucb", "car_scooter_brex_san_jose", "unimodal_trip_car_bike_mtv_la"]
for i, tl in enumerate(timeline_list):
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[0][i], column=["expanded_start_ts_diff"], by=["quality"])
    ax_array[0][i].set_title(tl)
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[1][i], column=["expanded_end_ts_diff"], by=["quality"])
    ax_array[1][i].set_title("")
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[2][i], column=["expanded_start_ts_diff"], by=["quality"])
    ax_array[2][i].set_title("")
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[3][i], column=["expanded_end_ts_diff"], by=["quality"])
    ax_array[3][i].set_title("")

    # print(android_ax_returned.shape, ios_ax_returned.shape)

for ax in ax_array[0]:
    ax.set_xticklabels(q2r_android_list[1:])
    ax.set_xlabel("")

for ax in ax_array[1]:
    ax.set_xticklabels(q2r_android_list[1:])
    ax.set_xlabel("")

for ax in ax_array[2]:
    ax.set_xticklabels(q2r_ios_list[1:])
    ax.set_xlabel("")

for ax in ax_array[3]:
    ax.set_xticklabels(q2r_ios_list[1:])
    ax.set_xlabel("")    

ax_array[0][0].set_ylabel("section start - closest (android)")
ax_array[1][0].set_ylabel("section end - closest (android)")
ax_array[2][0].set_ylabel("section start - closest (ios)")
ax_array[3][0].set_ylabel("section end - closest (ios)")

# ax_array[2][0].set_ylabel("Battery drain (ios)")
# ax_array[3][0].set_ylabel("Difference in trip counts (ios)")
ifig.suptitle("Expanded section start/end accuracy for valid transitions v/s quality over multiple timelines")
# ifig.tight_layout()