# Validate calibration of phones

This notebook retrieves the calibration results for a particular experiment and validates them

## Experiment parameters

If you only want to run experiments, you only need to edit these variables. The notebook will retrieve the appropriate spec, use it to find the calibration periods from the database and validate the calibration. You probably want to publish this notebook along with your results so that others can examine it as well.

In [None]:
DATASTORE_URL = "http://localhost:8080"
AUTHOR_EMAIL = "shankari@eecs.berkeley.edu"
CURR_SPEC_ID = "sfba_trial_1"
MAX_DURATION_VARIATION = 5 * 60 # seconds

## Setup some basic imports

In [None]:
import logging
logging.getLogger().setLevel(logging.DEBUG)

In [None]:
import copy
import matplotlib.pyplot as plt
import arrow
import pandas as pd
%matplotlib notebook

In [None]:
import requests

## Setup the ability to make calls to the server

In [None]:
def retrieve_data_from_server(user_label, key_list, start_ts, end_ts):
    post_msg = {
        "user": user_label,
        "key_list": key_list,
        "start_time": start_ts,
        "end_time": end_ts
    }
    # print("About to retrieve messages using %s" % post_msg)
    response = requests.post(DATASTORE_URL+"/datastreams/find_entries/timestamp", json=post_msg)
    # print("response = %s" % response)
    response.raise_for_status()
    ret_list = response.json()["phone_data"]
    # print("Found %d entries" % len(ret_list))
    return ret_list

def retrieve_all_data_from_server(user_label, key_list):
    return retrieve_data_from_server(user_label, key_list, 0, arrow.get().timestamp)

## Find the current spec

In [None]:
all_spec_entry_list = retrieve_all_data_from_server(AUTHOR_EMAIL, ["config/evaluation_spec"])
curr_spec_entry = None
for s in all_spec_entry_list:
    if s["data"]["id"] == CURR_SPEC_ID:
        curr_spec_entry = s
curr_spec = curr_spec_entry["data"]
curr_spec["name"]

## Find all evaluation transitions within the start and end times of this spec

In [None]:
eval_start_ts = curr_spec["start_ts"]
eval_end_ts = curr_spec["end_ts"]
print("Evaluation ran from %s -> %s" % (arrow.get(eval_start_ts), arrow.get(eval_end_ts)))
phone_labels = curr_spec["phones"]

Data model here is:

```
eval_transitions
    - android
        - ucb.sdb.android.1
            - list of evaluation transitions
        - ....
    - ios
```

In [None]:
eval_transitions = copy.copy(phone_labels)
for phoneOS, phone_map in eval_transitions.items():
    print("Reading data for %s phones" % phoneOS)
    for phone_label in phone_map:
        print("Loading transitions for phone %s" % phone_label)
        curr_phone_transitions = retrieve_data_from_server(phone_label, ["manual/evaluation_transition"], eval_start_ts, eval_end_ts)
        phone_map[phone_label] = {}
        phone_map[phone_label]["transitions"] = curr_phone_transitions

### Find calibration transitions, validate and map them to ranges

From here onwards, we will add the results of manipulation to each phone entry - e.g.

```
eval_transitions
    - android
        - ucb.sdb.android.1
            - transitions (all transition entries, added in previous step)
            - calibration_transitions (calibration transitions, will be added in this step)
        - ....
    - ios
```

In [None]:
for phoneOS, phone_map in eval_transitions.items():
    print("Processing data for %s phones" % phoneOS)
    for phone_label in phone_map:
        print("Processing transitions for phone %s" % phone_label)
        curr_phone_transitions = [t["data"] for t in phone_map[phone_label]["transitions"]]
        curr_calibration_transitions = [t for t in curr_phone_transitions if t["transition"] in ["START_CALIBRATION_PERIOD", "STOP_CALIBRATION_PERIOD"]]
        print("Filtered %d total -> %d calibration transitions " % (len(curr_phone_transitions), len(curr_calibration_transitions)))
        phone_map[phone_label]["calibration_transitions"] = curr_calibration_transitions

In [None]:
ios_1_transitions = eval_transitions["ios"]["ucb.sdb.ios.1"]["calibration_transitions"]
[(t["transition"], arrow.get(t["ts"])) for t in ios_1_transitions]

In [None]:
# We expect that transitions occur in pairs
def transitions_to_ranges(transition_list):
    if len(transition_list) % 2 != 0:
        print("Transitions occur in pairs, so count (%d) cannot be odd, returning empty list" % len(transition_list))
        return []
    start_transitions = transition_list[::2]
    end_transitions = transition_list[1::2]
    range_list = []
    for (s, e) in zip(start_transitions, end_transitions):
        # print("------------------------------------- \n %s -> \n %s" % (s, e))
        assert s["transition"] == "START_CALIBRATION_PERIOD", "Start transition has %s transition" % s["transition"]
        assert e["transition"] == "STOP_CALIBRATION_PERIOD", "Start transition has %s transition" % s["transition"]
        assert s["trip_id"] == e["trip_id"], "trip_id mismatch! %s != %s" % (s["trip_id"], e["trip_id"])
        assert e["ts"] > s["ts"], "end %s is before start %s" % (arrow.get(e["ts"]), arrow.get(s["ts"]))
        for f in ["spec_id", "device_manufacturer", "device_model", "device_version"]:
            assert s[f] == e[f], "Field %s mismatch! %s != %s" % (f, s[f], e[f])
        curr_range = {"trip_id": s["trip_id"], "start_ts": s["ts"], "end_ts": e["ts"], "duration": (e["ts"] - s["ts"])}
        range_list.append(curr_range)
    return range_list

In [None]:
transitions_to_ranges(eval_transitions["ios"]["ucb.sdb.ios.1"]["calibration_transitions"])

In [None]:
for phoneOS, phone_map in eval_transitions.items():
    print("Processing data for %s phones" % phoneOS)
    for phone_label in phone_map:
        curr_calibration_ranges = transitions_to_ranges(phone_map[phone_label]["calibration_transitions"])
        print("Found %d ranges for phone %s" % (len(curr_calibration_ranges), phone_label))
        phone_map[phone_label]["calibration_ranges"] = curr_calibration_ranges

## Validate the ranges for individual phones

This involves two main checks:
- that we have at least one calibration range for each test in the spec. Note that we do not currently enforce that we have exactly one calibration range for each test, on the theory that more calibration is always good. But I am open to argument about this
- that the settings in the calibration range are consistent with the spec

In [None]:
expected_config_map = {}
for ct in curr_spec["calibration_tests"]:
    expected_config_map[ct["id"]] = ct["config"]["sensing_config"]

In [None]:
# Current accuracy constants
# Since we can't read these from the phone, we hardcoded them from the documentation
# If there are validation failures, these need to be updated
# In the future, we could upload the options from the phone (maybe the accuracy control)
# but that seems like overkill here

accuracy_options = {
    "android": {
        "PRIORITY_HIGH_ACCURACY": 100,
        "PRIORITY_BALANCED_POWER_ACCURACY": 102,
        "PRIORITY_LOW_POWER": 104,
        "PRIORITY_NO_POWER": 105
    },
    "ios": {
        "kCLLocationAccuracyBestForNavigation": -2,
        "kCLLocationAccuracyBest": -1,
        "kCLLocationAccuracyNearestTenMeters": 10,
        "kCLLocationAccuracyHundredMeters": 100,
        "kCLLocationAccuracyKilometer": 1000,
        "kCLLocationAccuracyThreeKilometers": 3000,
    }
}

In [None]:
opt_array_idx = lambda phoneOS: 0 if phoneOS == "android" else 1

def validate_filter(phoneOS, config_during_test, expected_config):
    # filter checking is a bit tricky because the expected value has two possible values and the real config has two possible values
    expected_filter = expected_config["filter"]
    if type(expected_filter) == int:
        ev = expected_filter
    else:
        assert type(expected_filter) == list, "platform specific filters should be specified in array, not %s" % expected_filter
        ev = expected_filter[opt_array_idx(phoneOS)]
        
    if phoneOS == "android":
        cvf = "filter_time"
    elif phoneOS == "ios":
        cvf = "filter_distance"
        
    assert config_during_test[cvf] == ev, "Field filter mismatch! %s != %s" % (config_during_test, expected_config)
    
def validate_accuracy(phoneOS, config_during_test, expected_config):
    # expected config accuracy is an array of strings ["PRIORITY_BALANCED_POWER_ACCURACY", "kCLLocationAccuracyNearestTenMeters"]
    # so we find the string at the correct index and then map it to the value from the options
    ev = accuracy_options[phoneOS][expected_config["accuracy"][opt_array_idx(phoneOS)]]
    assert config_during_test["accuracy"] == ev, "Field accuracy mismatch! %s != %s" % (config_during_test[accuracy], ev)

for phoneOS, phone_map in eval_transitions.items():
    print("Processing data for %s phones" % phoneOS)
    for phone_label in phone_map:
        curr_calibration_ranges = phone_map[phone_label]["calibration_ranges"]
        all_test_ids = [r["trip_id"] for r in curr_calibration_ranges]
        unique_test_ids = sorted(list(set(all_test_ids)))
        spec_test_ids = sorted([ct["id"] for ct in curr_spec["calibration_tests"]])
        # assert unique_test_ids == spec_test_ids, "Missing calibration test while comparing %s, %s" % (unique_test_ids, spec_test_ids)
        for r in curr_calibration_ranges:
            config_during_test_entries = retrieve_data_from_server(phone_label, ["config/sensor_config"], r["start_ts"], r["end_ts"])
            assert len(config_during_test_entries) == 1, "Out of band configuration? Found %d config changes" % len(config_during_test_entries)
            config_during_test = config_during_test_entries[0]["data"]
            expected_config = expected_config_map[r["trip_id"]]
            # print(config_during_test, expected_config)
            validate_filter(phoneOS, config_during_test, expected_config)
            validate_accuracy(phoneOS, config_during_test, expected_config)
            for f in expected_config:
                if f != "accuracy" and f != "filter":
                    assert config_during_test[f] == expected_config[f], "Field %s mismatch! %s != %s" % (f, config_during_test[f], expected_config[f])

## Validate ranges across phones

This effectively has one test right now - is the duration of the tests across phones consistent?
TODO: We should add a reasonable fuzz factor based on real calibration.

We are going to create a pandas dataframe with the following structure

```
                    android_<phone_1> android_<phone_2> android_<phone_3> ....
<trip_id_1>
<trip_id_2>
...
```

Then, we can transpose it to get

```
                    <trip_id_1> <trip_id_2> <trip_id_3> ....
android_<phone_1>
android_<phone_2>
...
```

then, we can get a series of durations for each `trip_id` as a series and compare it

In [None]:
duration_map = {}
for phoneOS, phone_map in eval_transitions.items():
    print("Processing data for %s phones" % phoneOS)
    for phone_label in phone_map:
        curr_phone_duration_map = {}
        curr_calibration_ranges = phone_map[phone_label]["calibration_ranges"]
        for r in curr_calibration_ranges:
            curr_phone_duration_map[r["trip_id"]] = r["duration"]
        duration_map[phoneOS+"_"+phone_label] = curr_phone_duration_map
        
duration_df = pd.DataFrame(duration_map).transpose()
duration_df

Since these are not statistical samples, the regular standard deviation/variation don't have much meaning. The variation is really caused by human control of the evaluation start/stop and the durations should be within a few minutes of each other. The expected variation defined in `MAX_DURATION_VARIATION`

In [None]:
duration_variation = duration_df.high_accuracy - duration_df.high_accuracy.median(); duration_variation

In [None]:
assert duration_variation.abs().max() < MAX_DURATION_VARIATION,\
    "INVALID: duration_variation.abs().max() > threshold" % (duration_variation.abs().max(), MAX_DURATION_VARIATION)

Now, we can evaluate the actual values

## Battery drain over time

The data is in the format

```
android:
    - <phone_1>:
        - <trip_id>
          - dataframe with index = ts, columns = other fields
    - <phone_2>:
        - <trip_id>
          - dataframe with index = ts, columns = other fields
...
ios:
    - <phone_1>:
        - <trip_id>
          - dataframe with index = ts, columns = other fields
    - <phone_2>:
        - <trip_id>
          - dataframe with index = ts, columns = other fields
```

In [None]:
for phoneOS, phone_map in eval_transitions.items():
    print("Processing data for %s phones" % phoneOS)
    for phone_label in phone_map:
        curr_calibration_ranges = phone_map[phone_label]["calibration_ranges"]
        for r in curr_calibration_ranges:
            battery_entries = retrieve_data_from_server(phone_label, ["background/battery"], r["start_ts"], r["end_ts"])
            battery_df = pd.DataFrame([e["data"] for e in battery_entries])
            battery_df["hr"] = (battery_df.ts-r["start_ts"])/3600.0
            if phoneOS == "ios":
                battery_df["battery_level_pct"] = battery_df.battery_level_ratio * 100
            r["battery_df"] = battery_df

In [None]:
def plot_calibration_curves(ax, phone_map):
    for phone_label in phone_map:
        curr_calibration_ranges = phone_map[phone_label]["calibration_ranges"]
        for r in curr_calibration_ranges:
            battery_df = r["battery_df"]
            ret_axes = battery_df.plot(x="hr", y="battery_level_pct", ax=ax, label=phone_label+"_"+r["trip_id"], sharey=True)

In [None]:
fig = plt.Figure((16,6))
android_axes = fig.add_subplot(1,2,1)
ios_axes = fig.add_subplot(1,2,2)
plot_calibration_curves(ios_axes, eval_transitions["ios"])
plot_calibration_curves(android_axes, eval_transitions["android"])
fig

## Location points over time

Now, we download the location points and check to see that the density is largely consistent

In [None]:
for phoneOS, phone_map in eval_transitions.items():
    print("Processing data for %s phones" % phoneOS)
    for phone_label in phone_map:
        curr_calibration_ranges = phone_map[phone_label]["calibration_ranges"]
        for r in curr_calibration_ranges:
            all_done = False
            location_entries = []
            curr_start_ts = r["start_ts"]
            while not all_done:
                # print("About to retrieve data for %s from %s -> %s" % (phone_label, curr_start_ts, r["end_ts"]))
                curr_location_entries = retrieve_data_from_server(phone_label, ["background/location"], curr_start_ts, r["end_ts"])
                # print("Retrieved %d entries " % len(curr_location_entries))
                if len(curr_location_entries) == 0 or len(curr_location_entries) == 1:
                    all_done = True
                else:
                    location_entries.extend(curr_location_entries)
                    curr_start_ts = curr_location_entries[-1]["data"]["ts"]
            location_df = pd.DataFrame([e["data"] for e in location_entries])
            location_df["hr"] = (location_df.ts-r["start_ts"])/3600.0
            r["location_df"] = location_df

In [None]:
count_map = {}
for phoneOS, phone_map in eval_transitions.items():
    print("Processing data for %s phones" % phoneOS)
    for phone_label in phone_map:
        curr_phone_count_map = {}
        curr_calibration_ranges = phone_map[phone_label]["calibration_ranges"]
        for r in curr_calibration_ranges:
            curr_phone_count_map[r["trip_id"]] = len(r["location_df"])
        count_map[phoneOS+"_"+phone_label] = curr_phone_count_map
        
count_df = pd.DataFrame(count_map).transpose()
count_df            

In [None]:
def get_location_density_df(phone_map, sel_trip_id):
    density_map = {}
    sel_trip_id = "high_accuracy"
    for phone_label in phone_map:
        curr_phone_density_map = {}
        curr_calibration_ranges = phone_map[phone_label]["calibration_ranges"]
        for r in curr_calibration_ranges:
            if r["trip_id"] == sel_trip_id:
                density_map[phone_label] = r["location_df"].ts
        
    density_df = pd.DataFrame(density_map)
    return density_df

In [None]:
android_density_df = get_location_density_df(eval_transitions["android"], "high_accuracy")
nRows = int(len(eval_transitions["android"].keys())/2) + 1
print(nRows)
android_density_df.plot(kind='density', subplots=True, layout=(nRows, 2), figsize=(16,16), sharex=True, sharey=True)

In [None]:
def plot_density_vs_power_curves(fig, nRows, phone_map, sel_trip_id):
    for i, phone_label in enumerate(phone_map.keys()):
        ax = fig.add_subplot(nRows, 2, i+1)
        curr_calibration_ranges = phone_map[phone_label]["calibration_ranges"]
        for r in curr_calibration_ranges:
            if r["trip_id"] == sel_trip_id:
                battery_df = r["battery_df"]
                location_df = r["location_df"]
                battery_df.plot(x="ts", y="battery_level_pct", ax=ax, label=phone_label, sharex=True, sharey=True)
                location_df.ts.plot(ax=ax, kind="density", secondary_y=True)

In [None]:
fig = plt.Figure((16, 16))
plot_density_vs_power_curves(fig, nRows, eval_transitions["android"], "high_accuracy")
fig

In [None]:
ios_density_df = get_location_density_df(eval_transitions["ios"], "high_accuracy")
nRows = int(len(eval_transitions["ios"].keys())/2) + 1
print(nRows)
ios_density_df.plot(kind='density', subplots=True, layout=(nRows, 2), figsize=(16,16), sharex=True, sharey=True)

In [None]:
fig = plt.Figure((16, 16))
plot_density_vs_power_curves(fig, nRows, eval_transitions["ios"], "high_accuracy")
fig