## Set up the dependencies

In [None]:
# for reading and validating data
import emeval.input.spec_details as eisd
import emeval.input.phone_view as eipv
import emeval.input.eval_view as eiev

In [None]:
# Visualization helpers
import emeval.viz.phone_view as ezpv
import emeval.viz.eval_view as ezev

In [None]:
# Analytics results
import emeval.metrics.baseline_segmentation as embs

In [None]:
# For plots
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
%matplotlib inline

import IPython.display as ipyd

In [None]:
# For maps
import folium
import branca.element as bre

In [None]:
# For easier debugging while working on modules
import importlib

In [None]:
import pandas as pd
pd.options.display.float_format = '{:.6f}'.format
import arrow
import numpy as np

In [None]:
THIRTY_MINUTES = 30 * 60
TEN_MINUTES = 10 * 60

## The spec

The spec defines what experiments were done, and over which time ranges. Once the experiment is complete, most of the structure is read back from the data, but we use the spec to validate that it all worked correctly. The spec also contains the ground truth for the legs. Here, we read the spec for the trip to UC Berkeley.

In [None]:
DATASTORE_LOC = "bin/data"
AUTHOR_EMAIL = "shankari@eecs.berkeley.edu"
sd_la = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, "unimodal_trip_car_bike_mtv_la")
sd_sj = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, "car_scooter_brex_san_jose")
sd_ucb = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, "train_bus_ebike_mtv_ucb")

## The views

There are two main views for the data - the phone view and the evaluation view. 

### Phone view

In the phone view, the phone is primary, and then there is a tree that you can traverse to get the data that you want. Traversing that tree typically involves nested for loops; here's an example of loading the phone view and traversing it. You can replace the print statements with real code. When you are ready to check this in, please move the function to one of the python modules so that we can invoke it more generally

In [None]:
importlib.reload(eipv)

In [None]:
pv_la = eipv.PhoneView(sd_la)

In [None]:
pv_sj = eipv.PhoneView(sd_sj)

In [None]:
pv_ucb = eipv.PhoneView(sd_ucb)

In [None]:
import emeval.analysed.phone_view as eapv

In [None]:
importlib.reload(eapv)

In [None]:
MASTER_DATA_LOC = "bin/data/master_9b70c97"
master_spec = eisd.FileSpecDetails(MASTER_DATA_LOC, AUTHOR_EMAIL)

In [None]:
av_la_master = eapv.create_analysed_view(pv_la, master_spec, "analysis/recreated_location", "analysis/cleaned_trip", "analysis/inferred_section")

In [None]:
GIS_DATA_LOC = "bin/data/gis_9b679e3/"
gis_spec = eisd.FileSpecDetails(GIS_DATA_LOC, AUTHOR_EMAIL)
av_la_gis = eapv.create_analysed_view(pv_la, gis_spec, "analysis/recreated_location", "analysis/cleaned_trip", "analysis/inferred_section")

In [None]:
range_0 = av_la_master.map()["ios"]["ucb-sdb-ios-3"]["evaluation_ranges"][0]
for t in range_0["sensed_trip_ranges"]:
    print(arrow.get(t["start_ts"]).to("America/Los_angeles"), arrow.get(t["end_ts"]).to("America/Los_angeles"))
print("=======")
for s in range_0["sensed_section_ranges"]:
    print(arrow.get(s["start_ts"]).to("America/Los_angeles"), arrow.get(s["end_ts"]).to("America/Los_angeles"))
print("=======")

for t in range_0["evaluation_trip_ranges"]:
    print(arrow.get(t["start_ts"]).to("America/Los_angeles"), arrow.get(t["end_ts"]).to("America/Los_angeles"))
    for s in t["sensed_section_ranges"]:
        print("-------", arrow.get(s["start_ts"]).to("America/Los_angeles"), arrow.get(s["end_ts"]).to("America/Los_angeles"))

In [None]:
# av_la = eapv.create_analysed_view(pv_la, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/inferred_section")
# av_sj = eapv.create_analysed_view(pv_sj, eisd.FileSpecDetails("bin/data/master_9b70c97", AUTHOR_EMAIL), "analysis/recreated_location", "analysis/cleaned_trip", "analysis/inferred_section")
# av_ucb = eapv.create_analysed_view(pv_ucb, eisd.FileSpecDetails("bin/data/master_9b70c97", AUTHOR_EMAIL), "analysis/recreated_location", "analysis/cleaned_trip", "analysis/inferred_section")
# av_ucb_reroute = eapv.create_analysed_view(pv_ucb_reroute, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/inferred_section")

In [None]:
def verify_range(eval_range):
    if len(eval_range["evaluation_trip_ranges"]) > len(eval_range["sensed_trip_ranges"]):
        print(f"MISSING TRIP: found {len(eval_range['sensed_trip_ranges'])}, expected {len(eval_range['evaluation_trip_ranges'])}")
    if len(eval_range["evaluation_trip_ranges"]) < len(eval_range["sensed_trip_ranges"]):
        print(f"EXTRA TRIP: found {len(eval_range['sensed_trip_ranges'])}, expected {len(eval_range['evaluation_trip_ranges'])}")
    for i, t in enumerate(eval_range["evaluation_trip_ranges"]):
        if len(t["evaluation_section_ranges"]) > len(t["sensed_section_ranges"]):
            print(f"MISSING SECTION: In trip {i}, found {len(t['sensed_section_ranges'])}, expected {len(t['evaluation_section_ranges'])}")
        if len(t["evaluation_section_ranges"]) < len(t["sensed_section_ranges"]):
            print(f"EXTRA SECTION: In trip {i}, found {len(t['sensed_section_ranges'])}, expected {len(t['evaluation_section_ranges'])}")

In [None]:
def verify_analysed_view(av):
    for phone_os, phone_map in av.map().items():
        print(15 * "=*")
        for phone_label, phone_detail_map in phone_map.items():
            for r in phone_detail_map["evaluation_ranges"]:
                print(8 * ' ', 30 * "=")
                print(8 * ' ',phone_os, phone_label, r["trip_id"], r["eval_common_trip_id"], r["eval_role"])
                if "control" not in r["eval_role"]:
                    verify_range(r)

In [None]:
verify_analysed_view(av_la_master)

In [None]:
def compare_ranges(eval_range_a, eval_range_b):
    if len(eval_range_a["sensed_trip_ranges"]) > len(eval_range_b["sensed_trip_ranges"]):
        print(f"MISMATCH: extra trip found in range_a")
    if len(eval_range_a["sensed_trip_ranges"]) < len(eval_range_b["sensed_trip_ranges"]):
        print(f"MISMATCH: extra trip found in range_b")
    for i, (t_a, t_b) in enumerate(zip(eval_range_a["evaluation_trip_ranges"], eval_range_b["evaluation_trip_ranges"])):
        if len(t_a["sensed_section_ranges"]) > len(t_b["sensed_section_ranges"]):
            print(f"MISMATCH: extra section found in range_a {len(t_a['sensed_section_ranges'])} > {len(t_b['sensed_section_ranges'])}")
        if len(t_a["sensed_section_ranges"]) < len(t_b["sensed_section_ranges"]):
            print(f"MISMATCH: extra section found in range_b {len(t_a['sensed_section_ranges'])} < {len(t_b['sensed_section_ranges'])}")
        for i, (s_a, s_b) in enumerate(zip(t_a["sensed_section_ranges"], t_b["sensed_section_ranges"])):
            if s_a["sensed_mode"] != s_b["sensed_mode"]:
                print(f"MISMATCH: sensed_mode for section {i} {s_a['sensed_mode']} != {s_b['sensed_mode']}")

In [None]:
def compare_analysed_views(av_a, av_b):
    for phone_os, phone_map in av_a.map().items():
        print(15 * "=*")
        for phone_label, phone_detail_map in phone_map.items():
            for i, r_a in enumerate(phone_detail_map["evaluation_ranges"]):
                print(8 * ' ', 30 * "=")
                print(8 * ' ',phone_os, phone_label, r_a["trip_id"], r_a["eval_common_trip_id"], r_a["eval_role"])
                if "control" not in r_a["eval_role"]:
                    r_b = av_b.map()[phone_os][phone_label]["evaluation_ranges"][i]
                    compare_ranges(r_a, r_b)

In [None]:
compare_analysed_views(av_la_master, av_la_gis)

In [None]:
av_ucb_master = eapv.create_analysed_view(pv_ucb, master_spec, "analysis/recreated_location", "analysis/cleaned_trip", "analysis/inferred_section")

In [None]:
av_ucb_gis = eapv.create_analysed_view(pv_ucb, gis_spec, "analysis/recreated_location", "analysis/cleaned_trip", "analysis/inferred_section")

In [None]:
compare_analysed_views(av_ucb_master, av_ucb_gis)

## There is only one missing trip; let's plot it to be sure

In [None]:
first_repetition = pv_la.map()["ios"]["ucb-sdb-ios-3"]["evaluation_ranges"][0]; first_repetition["trip_id"]

In [None]:
missing_trip = first_repetition["evaluation_trip_ranges"][0]; print(missing_trip["trip_id"])

In [None]:
missing_trip.keys()

In [None]:
missing_trip["location_df"]

In [None]:
missing_trip["battery_df"]

In [None]:
missing_trip["motion_activity_df"].ts.apply(lambda ts: arrow.get(ts).to("America/Los_angeles"))

## Checking the other specs

- Both of them have extra trips, no missing trips
- They have both missing and extra sections, but nothing where the sensed sections are zero

In [None]:
verify_analysed_view(av_sj)

In [None]:
verify_analysed_view(av_ucb)