## Set up the dependencies

In [None]:
# for reading and validating data
import emeval.input.spec_details as eisd
import emeval.input.phone_view as eipv
import emeval.input.eval_view as eiev

In [None]:
# Visualization helpers
import emeval.viz.phone_view as ezpv
import emeval.viz.eval_view as ezev
import emeval.viz.geojson as ezgj

In [None]:
# Analytics results
import emeval.metrics.baseline_segmentation as embs

In [None]:
# For plots
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
%matplotlib inline

import IPython.display as ipyd

In [None]:
# For maps
import folium
import branca.element as bre

In [None]:
# For easier debugging while working on modules
import importlib

In [None]:
import pandas as pd
pd.options.display.float_format = '{:.6f}'.format
import arrow
import numpy as np

In [None]:
THIRTY_MINUTES = 30 * 60
TEN_MINUTES = 10 * 60

## The spec

The spec defines what experiments were done, and over which time ranges. Once the experiment is complete, most of the structure is read back from the data, but we use the spec to validate that it all worked correctly. The spec also contains the ground truth for the legs. Here, we read the spec for the trip to UC Berkeley.

In [None]:
DATASTORE_URL = "http://cardshark.cs.berkeley.edu"
AUTHOR_EMAIL = "shankari@eecs.berkeley.edu"
sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "unimodal_trip_car_bike_mtv_la")
sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "car_scooter_brex_san_jose")
sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "train_bus_ebike_mtv_ucb")
sd_ucb_reroute = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "train_bus_ebike_sm_reroute_mtv_ucb")

## The views

There are two main views for the data - the phone view and the evaluation view. 

### Phone view

In the phone view, the phone is primary, and then there is a tree that you can traverse to get the data that you want. Traversing that tree typically involves nested for loops; here's an example of loading the phone view and traversing it. You can replace the print statements with real code. When you are ready to check this in, please move the function to one of the python modules so that we can invoke it more generally

In [None]:
importlib.reload(eipv)

In [None]:
pv_la = eipv.PhoneView(sd_la)

In [None]:
pv_sj = eipv.PhoneView(sd_sj)

In [None]:
pv_ucb = eipv.PhoneView(sd_ucb)

In [None]:
pv_ucb_reroute = eipv.PhoneView(sd_ucb_reroute)

In [None]:
import emeval.analysed.phone_view as eapv
importlib.reload(eapv)

In [None]:
av_la = eapv.create_analysed_view(pv_la, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")
av_sj = eapv.create_analysed_view(pv_sj, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")
av_ucb = eapv.create_analysed_view(pv_ucb, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")
av_ucb_reroute = eapv.create_analysed_view(pv_ucb_reroute, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")

In [None]:
eapv.create_analysed_view(pv_la, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")

In [None]:
ANDROID_MODE_MAP = {0: "AUTOMOTIVE", 1: "CYCLING", 2: "WALKING", 3: "STATIONARY"}
ANDROID_MAP_FN = lambda t: ANDROID_MODE_MAP[t["zzbhB"]]

def IOS_MAP_FN(t):
    t_series = pd.Series(t)
    all_true = t_series[t_series == True].index.tolist()
    if len(all_true) == 1:
        return all_true[0].upper()
    else:
        # Do something more sophisticated here?
        return "INVALID"

MAP_FNS = {"android": ANDROID_MAP_FN, "ios": IOS_MAP_FN}
TRANSITION_FNS = {"android": embs.get_transition_mask_android, "ios": embs.get_transition_mask_ios}

In [None]:
def get_tradeoff_entries(pv):
    tradeoff_entry_list = []
    for phone_os, phone_map in pv.map().items():
        print(15 * "=*")
        print(phone_os, phone_map.keys())
        for phone_label, phone_detail_map in phone_map.items():
            print(4 * ' ', 15 * "-*")
            print(4 * ' ', phone_label, phone_detail_map.keys())
            if "control" in phone_detail_map["role"]:
                print("Ignoring %s phone %s since they are always on" % (phone_detail_map["role"], phone_label))
                continue
            # this spec does not have any calibration ranges, but evaluation ranges are actually cooler
            for r in phone_detail_map["evaluation_ranges"]:
                print(8 * ' ', 30 * "=")
                print(8 * ' ',r.keys())
                print(8 * ' ',r["trip_id"], r["eval_common_trip_id"], r["eval_role"], len(r["evaluation_trip_ranges"]))
                bcs = r["battery_df"]["battery_level_pct"]
                delta_battery = bcs.iloc[0] - bcs.iloc[-1]
                print("Battery starts at %d, ends at %d, drain = %d" % (bcs.iloc[0], bcs.iloc[-1], delta_battery))
                for tr in r["evaluation_trip_ranges"]:
                    matching_section_map = embs.find_matching_segments(tr["evaluation_section_ranges"], 
                                                                      "trip_id",
                                                                       [sr["data"] for sr in tr["sensed_section_ranges"]])
                    print("For trip %s, found matching ranges %s" % (tr["trip_id"], matching_section_map))
                    for section in tr["evaluation_section_ranges"]:
                        section_gt_leg = pv.spec_details.get_ground_truth_for_leg(tr["trip_id_base"],
                                                                                  section["trip_id_base"])
                        if section_gt_leg["type"] == "WAITING":
                            print("Skipping WAITING section %s %s with potential partway transitions" %
                                  (tr["trip_id"], section["trip_id"]))
                            continue
                        print(12 * ' ',section["trip_id"], section["trip_id_base"], tr["trip_id"])
                        sensed_section_range = matching_section_map[section["trip_id"]]
                        results = embs.get_count_start_end_ts_diff(section, sensed_section_range)
                        tradeoff_entry = {"phone_os": phone_os, "phone_label": phone_label,
                                      "timeline": pv.spec_details.curr_spec["id"],
                                      "range_id": r["trip_id"],
                                     "run": r["trip_run"], "duration": r["duration"],
                                     "role": r["eval_role_base"], "battery_drain": delta_battery,
                                     "section_count": len(tr["sensed_section_ranges"]),
                                     "trip_id": tr["trip_id"],
                                     "section_id": section["trip_id"],
                                     "section_type": section_gt_leg["type"]}
                        tradeoff_entry.update(results)
                        tradeoff_entry_list.append(tradeoff_entry)

    return tradeoff_entry_list

In [None]:
# We are not going to look at battery life at the evaluation trip level; we will end with evaluation range
# since we want to capture the overall drain for the timeline
tradeoff_entries_list = []
tradeoff_entries_list.extend(get_tradeoff_entries(av_la))
tradeoff_entries_list.extend(get_tradeoff_entries(av_sj))
tradeoff_entries_list.extend(get_tradeoff_entries(av_ucb))
tradeoff_entries_list.extend(get_tradeoff_entries(av_ucb_reroute))
tradeoff_df = pd.DataFrame(tradeoff_entries_list)

## Add in other entries to the dataframe to allow us to plot better

In [None]:
# Let's merge the reroutes
tradeoff_df.timeline.replace("train_bus_ebike_sm_reroute_mtv_ucb", "train_bus_ebike_mtv_ucb", inplace=True)

In [None]:
r2q_map = {"power_control": 0, "HAMFDC": 1, "MAHFDC": 2, "HAHFDC": 3, "accuracy_control": 4}
q2r_map = {0: "power", 1: "HAMFDC", 2: "MAHFDC", 3: "HAHFDC", 4: "accuracy"}

In [None]:
# Make a number so that can get the plots to come out in order
tradeoff_df["quality"] = tradeoff_df.role.apply(lambda r: r2q_map[r])
tradeoff_df["count_diff"] = tradeoff_df[["count"]] - 1

## Timeline + section count variations

We should ideally have only one transition in every TRAVEL section

In [None]:
tradeoff_df.query("timeline=='unimodal_trip_car_bike_mtv_la' & run == 1 & role == 'HAMFDC'").section_id

In [None]:
ifig, ax_array = plt.subplots(nrows=2,ncols=3,figsize=(9,6), sharex=False, sharey=False)
timeline_list = ["train_bus_ebike_mtv_ucb", "car_scooter_brex_san_jose", "unimodal_trip_car_bike_mtv_la"]
for i, tl in enumerate(timeline_list):
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[0][i], column=["count_diff"], by=["quality"])
    ax_array[0][i].set_title(tl)
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[1][i], column=["count_diff"], by=["quality"])
    ax_array[1][i].set_title("")
    # tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[2][i], column=["visit_reports"], by=["quality"])
    # ax_array[2][i].set_title("")

    # print(android_ax_returned.shape, ios_ax_returned.shape)

for i, ax in enumerate(ax_array[0]):
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

for i, ax in enumerate(ax_array[1]):
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

# for ax in ax_array[1]:
#     ax.set_xticklabels(q2r_ios_list[1:])
#     ax.set_xlabel("")

# for ax in ax_array[2]:
#     ax.set_xticklabels(q2r_ios_list[1:])
#     ax.set_xlabel("")

ax_array[0][0].set_ylabel("Difference in section counts (android)")
ax_array[1][0].set_ylabel("Difference in section counts (ios)")
# ax_array[2][0].set_ylabel("Difference in visit reports (ios)")
ifig.suptitle("Section count differences v/s configured quality over multiple timelines")
# ifig.tight_layout()

In [None]:
embs.find_section_transitions(pv_la.map()["android"]["ucb-sdb-android-2"]["evaluation_ranges"][3]["evaluation_trip_ranges"][0]["motion_activity_df"].query(embs.ANDROID_VALID_QUERY_NO_STILL), embs.TRANSITION_FNS["android"])

In [None]:
def plot_count_with_errors(ax_array, phone_os):
    for i, (tl, trip_gt) in enumerate(timeline_trip_gt.items()):
        ax_array[i].bar(0, trip_gt)
        for q in range(1,4):
            curr_df = tradeoff_df.query("timeline == @tl & phone_os == @phone_os & quality == @q")
            print("%s %s %s values = %s %s %s" % (phone_os, tl, q2r_map[q], curr_df.section_count.min(), curr_df.section_count.mean(), curr_df.section_count.max()))
            lower_error = curr_df.section_count.mean() - curr_df.section_count.min()
            upper_error = curr_df.section_count.max() - curr_df.section_count.mean()
            ax_array[i].bar(x=q, height=curr_df.section_count.mean(),
                            yerr=[[lower_error], [upper_error]])
            print("%s %s %s errors = %s %s %s" % (phone_os, tl, q2r_map[q], lower_error, curr_df.section_count.mean(), upper_error))
            ax_array[i].set_title(tl)

In [None]:
ifig, ax_array = plt.subplots(nrows=2,ncols=3,figsize=(9,4.5), sharex=False, sharey=False)
section_count = lambda pv: sum([len(t["legs"]) for t in pv.spec_details.curr_spec["evaluation_trips"]])
timeline_trip_gt = {"train_bus_ebike_mtv_ucb": section_count(pv_ucb),
                    "car_scooter_brex_san_jose": section_count(pv_sj),
                    "unimodal_trip_car_bike_mtv_la": section_count(pv_la)}

plot_count_with_errors(ax_array[0], "android")
plot_count_with_errors(ax_array[1], "ios")

for ax in ax_array[0]:
    ax.set_xticks(range(0,4))
    ax.set_xticklabels([q2r_map[r] for r in range(0,4)])
    ax.set_yticks(range(0,tradeoff_df.section_count.max(),3))
    
for ax in ax_array[1]:
    ax.set_xticks(range(0,4))
    ax.set_xticklabels([q2r_map[r] for r in range(0,4)])
    ax.set_yticks(range(0,tradeoff_df.section_count.max(),3))
    
ax_array[0,0].set_ylabel("nTrips (android)")
ax_array[1,0].set_ylabel("nTrips (ios)")
    
ifig.tight_layout(pad=0.85)

In [None]:
out_of_battery_phones = tradeoff_df.query("timeline=='train_bus_ebike_mtv_ucb' & role=='HAHFDC' & trip_id=='berkeley_to_mtv_SF_express_bus_0' & phone_os == 'android'")
for i in out_of_battery_phones.index:
    tradeoff_df.loc[i,"end_diff_mins"] = float('nan')

## Timeline + section count variations (TRAVEL sections only)

We should ideally have only one transition in every TRAVEL section

In [None]:
tradeoff_df.query("timeline=='unimodal_trip_car_bike_mtv_la' & run == 1 & role == 'HAMFDC'").section_id

In [None]:
ifig, ax_array = plt.subplots(nrows=2,ncols=3,figsize=(9,6), sharex=False, sharey=False)
timeline_list = ["train_bus_ebike_mtv_ucb", "car_scooter_brex_san_jose", "unimodal_trip_car_bike_mtv_la"]
for i, tl in enumerate(timeline_list):
    tradeoff_df.query("timeline == @tl & phone_os == 'android'").boxplot(ax = ax_array[0][i], column=["count_diff"], by=["quality"])
    ax_array[0][i].set_title(tl)
    tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[1][i], column=["count_diff"], by=["quality"])
    ax_array[1][i].set_title("")
    # tradeoff_df.query("timeline == @tl & phone_os == 'ios'").boxplot(ax = ax_array[2][i], column=["visit_reports"], by=["quality"])
    # ax_array[2][i].set_title("")

    # print(android_ax_returned.shape, ios_ax_returned.shape)

for i, ax in enumerate(ax_array[0]):
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

for i, ax in enumerate(ax_array[1]):
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

# for ax in ax_array[1]:
#     ax.set_xticklabels(q2r_ios_list[1:])
#     ax.set_xlabel("")

# for ax in ax_array[2]:
#     ax.set_xticklabels(q2r_ios_list[1:])
#     ax.set_xlabel("")

ax_array[0][0].set_ylabel("Difference in section counts (android)")
ax_array[1][0].set_ylabel("Difference in section counts (ios)")
# ax_array[2][0].set_ylabel("Difference in visit reports (ios)")
ifig.suptitle("Section count differences v/s configured quality over multiple timelines")
# ifig.tight_layout()

In [None]:
tradeoff_df.query("count_diff < 0 & section_type == 'TRAVEL' & end_diff_mins > 0")

In [None]:
ipyd.display(tradeoff_df.query("count_diff > 0 & section_type == 'TRAVEL'").section_id.value_counts())

### Start-end results

#### Overall

In [None]:
ifig, ax_array = plt.subplots(nrows=1,ncols=4,figsize=(12,3), sharex=False, sharey=True)
tradeoff_df.query("phone_os == 'android' & start_diff_mins").boxplot(ax = ax_array[0], column=["start_diff_mins"], by=["quality"])
ax_array[0].set_title("start time (android)")
tradeoff_df.query("phone_os == 'android' & start_diff_mins").boxplot(ax = ax_array[1], column=["end_diff_mins"], by=["quality"])
ax_array[1].set_title("end time (android)")
tradeoff_df.query("phone_os == 'ios' & start_diff_mins").boxplot(ax = ax_array[2], column=["start_diff_mins"], by=["quality"])
ax_array[2].set_title("start_time (ios)")
tradeoff_df.query("phone_os == 'ios' & start_diff_mins").boxplot(ax = ax_array[3], column=["end_diff_mins"], by=["quality"])
ax_array[3].set_title("end_time (ios)")

    # print(android_ax_returned.shape, ios_ax_returned.shape)

ax_array[0].set_xticklabels([q2r_map[int(t.get_text())] for t in ax_array[0].get_xticklabels()])
ax_array[1].set_xticklabels([q2r_map[int(t.get_text())] for t in ax_array[1].get_xticklabels()])
ax_array[2].set_xticklabels([q2r_map[int(t.get_text())] for t in ax_array[2].get_xticklabels()])
ax_array[3].set_xticklabels([q2r_map[int(t.get_text())] for t in ax_array[3].get_xticklabels()])

for ax in ax_array:
    ax.set_xlabel("")

ax_array[1].text(0.55,25,"Excluding trips where battery ran out")

ax_array[0].set_ylabel("Diff (mins)")
# ifig.suptitle("Section start end accuracy v/s configured quality")
ifig.suptitle("")
# ifig.tight_layout(pad=1.7)

### Timeline specific

In [None]:
ifig, ax_array = plt.subplots(nrows=4,ncols=3,figsize=(10,10), sharex=False, sharey=True)
timeline_list = ["train_bus_ebike_mtv_ucb", "car_scooter_brex_san_jose", "unimodal_trip_car_bike_mtv_la"]
for i, tl in enumerate(timeline_list):
    tradeoff_df.query("timeline == @tl & phone_os == 'android' & start_diff_mins < 30").boxplot(ax = ax_array[0][i], column=["start_diff_mins"], by=["quality"])
    ax_array[0][i].set_title(tl)
    tradeoff_df.query("timeline == @tl & phone_os == 'android' & end_diff_mins < 30").boxplot(ax = ax_array[1][i], column=["end_diff_mins"], by=["quality"])
    ax_array[1][i].set_title("")
    tradeoff_df.query("timeline == @tl & phone_os == 'ios' & start_diff_mins < 30").boxplot(ax = ax_array[2][i], column=["start_diff_mins"], by=["quality"])
    ax_array[2][i].set_title("")
    tradeoff_df.query("timeline == @tl & phone_os == 'ios' & end_diff_mins < 30").boxplot(ax = ax_array[3][i], column=["end_diff_mins"], by=["quality"])
    ax_array[3][i].set_title("")

    # print(android_ax_returned.shape, ios_ax_returned.shape)

for ax in ax_array[0]:
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")
    
for ax in ax_array[1]:
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

ax_array[1,0].text(0.55,25,"Excluding trips where battery ran out")

for ax in ax_array[2]:
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

for ax in ax_array[3]:
    ax.set_xticklabels([q2r_map[int(t.get_text())] for t in ax.get_xticklabels()])
    ax.set_xlabel("")

ax_array[0][0].set_ylabel("Start time diff (android)")
ax_array[1][0].set_ylabel("End time diff (android)")
ax_array[2][0].set_ylabel("Start time diff (ios)")
ax_array[3][0].set_ylabel("End time diff (ios)")
ifig.suptitle("Section start end accuracy (mins) v/s configured quality over multiple timelines")

# ifig.tight_layout(pad=2.5)

In [None]:
pv_ucb.spec_details.curr_spec.keys()

### Anomaly checks

We can clearly see that there are several outliers with the start/end timestamps for the sections. Let us explore these in greater detail and see if we can find any patterns.

In [None]:
fmt = lambda ts: arrow.get(ts).to("America/Los_Angeles")


def check_outlier(eval_range, trip_idx, section_id):
    eval_trip = eval_range["evaluation_trip_ranges"][trip_idx]
    eval_range["motion_activity_df"]["fmt_time"] = eval_range["motion_activity_df"].ts.apply(lambda ts: fmt(ts))
    eval_trip["motion_activity_df"]["fmt_time"] = eval_trip["motion_activity_df"].ts.apply(lambda ts: fmt(ts))
    eval_section = [s for s in eval_trip["evaluation_section_ranges"] if s["trip_id"] == section_id][0]
    print(fmt(eval_section["start_ts"]), "->", fmt(eval_section["end_ts"]))
    print([(fmt(ssr["data"]["start_ts"]), fmt(ssr["data"]["end_ts"])) for ssr in eval_trip["sensed_section_ranges"]])
    match = embs.find_matching_segments(eval_trip["evaluation_section_ranges"], "trip_id",
                                        [sr["data"] for sr in eval_trip["sensed_section_ranges"]])[section_id]
    print(match)
    print([(fmt(cm["start_ts"]), fmt(cm["end_ts"])) for cm in match["match"]])
    print("trip activity head")
    ipyd.display(eval_trip["motion_activity_df"].head(n=3))
    print("trip activity tail")
    ipyd.display(eval_trip["motion_activity_df"].tail(n=3))
    trip_end_ts = eval_trip["end_ts"]
    print("post-trip end activity head")
    ipyd.display(eval_range["motion_activity_df"].query("@trip_end_ts <= ts <= @trip_end_ts + 30 * 60").head())

#### sections which don't max out but which have large start/end

- all the walk_start trips are because we only started tracking after the walk start section was complete. There was too short a walk section at the beginning
- the `tt_*` ones are likely to be similar since they are short
- the others (`walk_downtown_urban_canyon_0`, `commuter_rail_aboveground_0`, `ebike_bikeshare_urban_long_0`) are investigated in greater detail below

In [None]:
tradeoff_df.query("25 < end_diff_mins < 30")

##### walk_start

too short, does not have sufficient transitions

In [None]:
check_outlier(av_sj.map()['android']['ucb-sdb-android-3']["evaluation_ranges"][0], 0, "walk_start_0")

##### commuter_rail_aboveground_0

too many transitions = flip flop. The GT is `08:31` -> `09:12` but the closest matching transition is from `08:34` to `08:46`. And in fact, we get one `walking` entry at around `2019-07-26T08:46:43.940675-07:00` which breaks up an otherwise consistent section.

In [None]:
check_outlier(pv_ucb.map()['ios']['ucb-sdb-ios-2']["evaluation_ranges"][2], 0, "commuter_rail_aboveground_0")

In [None]:
arrow.get("2019-07-26T08:31:56.065814-07:00").timestamp, arrow.get("2019-07-26T09:12:01.181978-07:00").timestamp

In [None]:
pv_ucb.map()['ios']['ucb-sdb-ios-2']["evaluation_ranges"][2]["evaluation_trip_ranges"][0]["motion_activity_df"].query("1564155116 <= ts <= 1564157521").query(embs.IOS_VALID_QUERY_NO_STILL).loc[50:60]

##### walk_downtown_urban_canyon_0

no transition at end, bleeds over to the light rail. GT is `17:51` to `18:00`, closest range is `17:51` to `18:28`

In [None]:
check_outlier(pv_ucb.map()['android']['ucb-sdb-android-3']["evaluation_ranges"][2], 2, "walk_downtown_urban_canyon_0")

In [None]:
arrow.get("2019-07-26T18:00:00-07:00").timestamp

In [None]:
pv_ucb.map()['android']['ucb-sdb-android-3']["evaluation_ranges"][2]["evaluation_trip_ranges"][2]["motion_activity_df"].query("1564189200 <= ts <= 1564190904")

##### ebike_bikeshare_urban_long

no transition at end, bleeds over to the express_bus. GT is `16:36` to `16:56`, first range is `16:37` to `17:23`

In [None]:
check_outlier(pv_ucb.map()['ios']['ucb-sdb-ios-3']["evaluation_ranges"][1], 2, "ebike_bikeshare_urban_long_0")

#### Sections which do max out

This has essentially one entry which is because we get no points at all

In [None]:
tradeoff_df.query("start_diff_mins == 30 & end_diff_mins == 30 & section_id != 'walk_start_0' & section_id != 'walk_end_0'").groupby("section_id").section_id.count()

In [None]:
check_outlier(pv_ucb.map()['ios']['ucb-sdb-ios-3']["evaluation_ranges"][0], 1, "walk_urban_university_0")

In [None]:
for i in range(3):
    print(pv_ucb.map()['ios']['ucb-sdb-ios-3']["evaluation_ranges"][0]["evaluation_trip_ranges"][i]["motion_activity_df"].ts.count())

In [None]:
# check_outlier(pv_ucb.map()['android']['ucb-sdb-android-3']["evaluation_ranges"][2], 1, "walk_urban_university_0")

In [None]:
# r = pv_ucb.map()['android']['ucb-sdb-android-3']["evaluation_ranges"][2]
# tr = r["evaluation_trip_ranges"][1]
# trip_end_ts = tr["end_ts"]
# trip_ma_df = tr["motion_activity_df"]
# extended_ma_df = r["motion_activity_df"].query("@trip_end_ts <= ts <= @trip_end_ts + 30 * 60")
# ma_df = pd.concat([trip_ma_df, extended_ma_df], axis="index")
# curr_trip_section_transitions = embs.find_section_transitions(ma_df.query(VALID_QUERIES_NO_STILL["android"]),
#                                                             TRANSITION_FNS["android"])
# still_section_transitions = extended_ma_df.query(STILL_ENTRIES["android"])
# if len(still_section_transitions) > 0:
#     curr_trip_section_transitions = curr_trip_section_transitions.append(still_section_transitions.iloc[0])
# ipyd.display(curr_trip_section_transitions)

##### suburb_bicycling

In [None]:
tradeoff_df.query("start_diff_mins == 30 & end_diff_mins == 30 & section_id == 'suburb_bicycling_0'")

In [None]:
check_outlier(av_la.map()['ios']['ucb-sdb-ios-2']["evaluation_ranges"][4], 1, "suburb_bicycling_0")

In [None]:
r = av_la.map()['ios']['ucb-sdb-ios-2']["evaluation_ranges"][4]
tr = r["evaluation_trip_ranges"][1]
print([(fmt(sr["start_ts"]), fmt(sr["end_ts"])) for sr in r["evaluation_trip_ranges"]])
print([(sr["data"]["start_fmt_time"], sr["data"]["end_fmt_time"]) for sr in r["sensed_trip_ranges"]])
ipyd.display(r["transition_df"][["currState","transition", "fmt_time"]])
pvr = pv_la.map()['ios']['ucb-sdb-ios-2']["evaluation_ranges"][4]
gt_leg = sd_la.get_ground_truth_for_leg("suburb_bicycling", "suburb_bicycling"); print(gt_leg["id"])
curr_map = ezgj.get_map_for_geojson(sd_ucb.get_geojson_for_leg(gt_leg), name="ground_truth")
name_err_time = lambda lr: "%d: %s" % (lr["index"], sd_ucb.fmt(lr["ts"], "MM-DD HH:mm:ss"))
gt_16k = lambda lr: (1567272304 <= lr["ts"] <= 1567272305) or (1567277181 <= lr["ts"] <= 1567277184)
folium.GeoJson(ezgj.get_geojson_for_loc_df(pvr["location_df"], color="red"), name="sensed_values").add_to(curr_map)
ezgj.get_fg_for_loc_df(pvr["location_df"], name="sensed_points", color="red", popupfn=name_err_time, stickyfn=gt_16k).add_to(curr_map)
folium.LayerControl().add_to(curr_map)
curr_map
# print([(sr["data"]["start_fmt_time"], sr["data"]["end_fmt_time"]) for sr in av_la.map()['ios']['ucb-sdb-ios-3']["sensed_section_ranges"]])
# print([(sr["data"]["start_fmt_time"], sr["data"]["end_fmt_time"]) for sr in tr["sensed_section_ranges"]])
# ma_df = tr["motion_activity_df"]
# we may get some transitions after the trip ends 
# let's expand the activity range to account for that
# trip_end_ts = tr["end_ts"]
# ma_df = pd.concat([ma_df, 
#             r["motion_activity_df"].query("@trip_end_ts <= ts <= @trip_end_ts + 30 * 60")],
#             axis="index")
# embs.find_section_transitions(ma_df.query(IOS_VALID_QUERY_WITH_STILL), get_transition_mask_ios).count()

In [None]:
for i in range(3):
    print(pv_la.map()['ios']['ucb-sdb-ios-3']["evaluation_ranges"][i]["evaluation_trip_ranges"][1]["motion_activity_df"].ts.count())

##### walk_downtown_urban_canyon

In [None]:
tradeoff_df.query("start_diff_mins == 30 & end_diff_mins == 30 & section_id == 'walk_downtown_urban_canyon_0'")

In [None]:
check_outlier(pv_ucb.map()['ios']['ucb-sdb-ios-2']["evaluation_ranges"][0], 2, "walk_downtown_urban_canyon_0")

In [None]:
importlib.reload(ems)

In [None]:
# r = pv_ucb.map()['ios']['ucb-sdb-ios-2']["evaluation_ranges"][0]
# tr = r["evaluation_trip_ranges"][2]
# sr = tr["evaluation_section_ranges"][5]; print(sr["trip_id"])
# embs.find_matching_segments(tr["evaluation_section_ranges"], "trip_id", tr["sensed_section_ranges"])

In [None]:
# r = pv_la.map()['ios']['ucb-sdb-ios-3']["evaluation_ranges"][0]
# tr = r["evaluation_trip_ranges"][1]
# ma_df = tr["motion_activity_df"]
# we may get some transitions after the trip ends 
# let's expand the activity range to account for that
# trip_end_ts = tr["end_ts"]
# ma_df = pd.concat([ma_df, 
#             r["motion_activity_df"].query("@trip_end_ts <= ts <= @trip_end_ts + 30 * 60")],
#             axis="index")
# embs.find_section_transitions(ma_df.query(IOS_VALID_QUERY_WITH_STILL), get_transition_mask_ios).count()

In [None]:
for i in range(3):
    print(pv_la.map()['ios']['ucb-sdb-ios-3']["evaluation_ranges"][i]["evaluation_trip_ranges"][1]["motion_activity_df"].ts.count())