In [None]:
# for reading and validating data
import emeval.input.spec_details as eisd
import emeval.input.phone_view as eipv
import emeval.input.eval_view as eiev

In [None]:
# Visualization helpers
import emeval.viz.phone_view as ezpv
import emeval.viz.eval_view as ezev
import pandas as pd

In [None]:
# For computation
import numpy as np
import math
import scipy.stats as stats
import matplotlib.pyplot as plt

In [None]:
import geopandas as gpd
import shapely as shp

In [None]:
DATASTORE_URL = "http://cardshark.cs.berkeley.edu"
AUTHOR_EMAIL = "shankari@eecs.berkeley.edu"
sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "unimodal_trip_car_bike_mtv_la")
sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "car_scooter_brex_san_jose")
sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, "train_bus_ebike_mtv_ucb")

In [None]:
import importlib
importlib.reload(ezpv)

In [None]:
pv_la = eipv.PhoneView(sd_la)

In [None]:
pv_sj = eipv.PhoneView(sd_sj)

In [None]:
pv_ucb = eipv.PhoneView(sd_ucb)

In [None]:
def get_polygons(pvunp):
    """
    Return GeoSeries of polygons
    """
    polygons = []
    trips = pvunp.spec_details.curr_spec['evaluation_trips']

    for trip in trips:
        for leg in trip['legs']:
            if 'loc' in leg and leg['loc']['geometry']['type'] == 'Polygon':
                polygons.append(shp.geometry.Polygon(leg['loc']['geometry']['coordinates'][0]))
            if 'end_loc' in leg and leg['end_loc']['geometry']['type'] == 'Polygon':
                polygons.append(shp.geometry.Polygon(leg['end_loc']['geometry']['coordinates'][0]))
            if 'start_loc' in leg and leg['start_loc']['geometry']['type'] == 'Polygon':
                polygons.append(shp.geometry.Polygon(leg['start_loc']['geometry']['coordinates'][0]))
    return gpd.GeoSeries(polygons)

In [None]:
def is_point_outside_polygons(loc_row, polygons):
    """
    Utility function to check if a point represented by a row in a location dataframe
    is contained within a series of Shapely polygons
    """
    # print(loc_row)
    point = loc_row.geometry
    inside_polygons = polygons.contains(point)
    return not inside_polygons.any()

def get_travel_trajectory(df, polygons):
    """ 
    Filters the dataframe of location points to only include values outside the defined polygons
    """
    geo_df = gpd.GeoDataFrame(
        df, geometry=df.apply(lambda lr: shp.geometry.Point(lr.longitude, lr.latitude), axis=1))
    geo_df["outside_polygons"] = geo_df.apply(lambda r: is_point_outside_polygons(r, polygons), axis=1)
    # return a slice instead of setting a column value
    return geo_df.query("outside_polygons==True")

def get_gt_linestring(gt_leg):
    """
    Get lat-long corrdinates in ground truth
    """
    if 'route_coords' in gt_leg:
        coords = gt_leg['route_coords']['geometry']['coordinates']
    else:
        coords = []
    return shp.geometry.LineString(coords)

def get_measured_points(df, polygons):
    """
    Get lat-long coordinates recorded from phone
    """
    return get_travel_trajectory(df, polygons)

def get_projection(loc_row, line):
    point = geometry.Point(loc_row.longitude, loc_row.latitude)
    return point.distance(line)

def get_shortest_distances(df, line):
    """
    Returns a series representing the distances from each point in a location dataframe to a line
    """
    return df.distance(line)

def convert_to_xy(points):
    xy_points = []
    R = 6371000 # Radius of the earth in m
    
    for point in points:
        lon = point[0]
        lat = point[1]
        
        theta = math.pi/2 - math.radians(lat) 
        phi = math.radians(lon)
        
        x = R * math.sin(theta) * math.cos(phi)
        y = R * math.sin(theta) * math.sin(phi)
        xy_points.append([x, y])
        
    return xy_points

In [None]:
def get_spatial_errors(pvunp):
    spatial_error_list = []
    # This is a GeoSeries
    polygons = get_polygons(pvunp)
    
    for phone_os, phone_map in pvunp.map().items():
        for phone_label, phone_detail_map in phone_map.items():
            for (r_idx, r) in enumerate(phone_detail_map["evaluation_ranges"]):
                run_errors = []
                for (tr_idx, tr) in enumerate(r["evaluation_trip_ranges"]):
                    trip_errors = []
                    for (sr_idx, sr) in enumerate(tr["evaluation_section_ranges"]):
                        # This is a Shapely LineString
                        section_gt_leg = pvunp.spec_details.get_ground_truth_for_leg(tr["trip_id_base"], sr["trip_id_base"])
                        section_gt_points = get_gt_linestring(section_gt_leg)
                        if section_gt_points.is_empty:
                            print("No ground truth route for %s %s, must be polygon, skipping..." % (tr["trip_id_base"], sr["trip_id_base"]))
                            continue
                        if len(sr['location_df']) == 0:
                            print("No sensed locations found, role = %s skipping..." % (r["eval_role_base"]))
                            # assert r["eval_role_base"] == "power_control", "Found no locations for %s, %s, %s, %s, %s" % (phone_os, phone_label, r_idx, tr_idx, sr_idx)
                            continue

                        # This is a GeoDataFrame
                        section_measured_points = get_measured_points(sr['location_df'], polygons)
                        line = section_gt_points
                        degree_dist = get_shortest_distances(section_measured_points.geometry, line)
                        meter_dist = degree_dist * (6371000/360) # Radius of the earth in m
                        # print(projections)

                        trip_errors += meter_dist.tolist()
                    
                    run_errors += trip_errors
                
                
                spatial_error_entry = {"phone_os": phone_os, "phone_label": phone_label, "timeline": pvunp.spec_details.curr_spec["id"], "run": r_idx, "role": r["eval_role_base"], "errors": run_errors}
                spatial_error_list.append(spatial_error_entry)   
    return spatial_error_list

In [None]:
spatial_errors_list = []
spatial_errors_list.extend(get_spatial_errors(pv_la))
spatial_errors_list.extend(get_spatial_errors(pv_sj))
spatial_errors_list.extend(get_spatial_errors(pv_ucb))

spatial_errors_df = pd.DataFrame(spatial_errors_list)

In [None]:
r2q_map = {"power_control": 0, "HAMFDC": 1, "MAHFDC": 1, "HAHFDC": 2, "accuracy_control": 3}

In [None]:
spatial_errors_df["quality"] = spatial_errors_df.role.apply(lambda r: r2q_map[r])
spatial_errors_df["label"] = spatial_errors_df.role.apply(lambda r: r.replace('_control', ''))
timeline_list = ["train_bus_ebike_mtv_ucb", "car_scooter_brex_san_jose", "unimodal_trip_car_bike_mtv_la"]

# Plot of Individual Errors (Over each individual run for Android)

In [None]:
fig, ax_array = plt.subplots(nrows=0, ncols=0, figsize=(20, 15))
os = "android"

for i, tl in enumerate(timeline_list):
    for r in range(3):
        data = pd.Series()
        labels = []
        ax = fig.add_subplot(3,3, (i*len(timeline_list) + r) + 1)
        for q in range(5):
            curr_errors = spatial_errors_df.query('timeline == @tl & run == @r & phone_os == @os & quality == @q')['errors']
            print("curr_errors for %s, %s, %s, %s = %s" % (tl, r, os, q, curr_errors.head()))
            data = data.append(curr_errors)
            labels.extend(spatial_errors_df.query('timeline == @tl & run == @r & phone_os == @os & quality == @q')['role'])
        print("appended data = %s" % data.head())
        ax.set_title(tl + " - Run: " + str(r+1))
        ax.set_xticklabels(labels)
        ax.set_ylabel('Spatial Errors in Meters (android)')
        bp = ax.boxplot(data)

# Plot of Individual Errors (Over each individual run for iOS)

In [None]:
fig = plt.figure(9, figsize=(20, 15))
os = "ios"

for i, tl in enumerate(timeline_list):
    for r in range(3):
        data = []
        labels = []
        ax = fig.add_subplot(3,3, (i*len(timeline_list) + r) + 1)
        for q in range(4):
            data.extend(spatial_errors_df.query('timeline == @tl & run == @r & phone_os == @os & quality == @q')['errors'])
            labels.extend(spatial_errors_df.query('timeline == @tl & run == @r & phone_os == @os & quality == @q')['role'])
        ax.set_title(tl + " - Run: " + str(r+1))
        ax.set_xticklabels(labels)
        ax.set_ylabel('Spatial Errors in Meters (iOS)')
        bp = ax.boxplot(data)

In [None]:
fig = plt.figure(3, figsize=(11, 2.75))
os = "android"

for i, tl in enumerate(timeline_list):
    ax = fig.add_subplot(1,3,i+1)
    
    for r in range(3):
        data = []
        labels = []
        
        for q in range(4):
            data.extend(spatial_errors_df.query('timeline == @tl & run == @r & phone_os == @os & quality == @q')['errors'])
            labels.extend(spatial_errors_df.query('timeline == @tl & run == @r & phone_os == @os & quality == @q')['label'])
    ax.set_title(tl)
    ax.set_xticklabels(labels)
    ax.set_ylabel('Spatial Errors in Meters (android)')
    bp = ax.boxplot(data)
    plt.tight_layout()

In [None]:
fig = plt.figure(3, figsize=(11, 2.75))
os = "ios"

for i, tl in enumerate(timeline_list):
    ax = fig.add_subplot(1,3,i+1)
    
    for r in range(3):
        data = []
        labels = []
        
        for q in range(4):
            data.extend(spatial_errors_df.query('timeline == @tl & run == @r & phone_os == @os & quality == @q')['errors'])
            labels.extend(spatial_errors_df.query('timeline == @tl & run == @r & phone_os == @os & quality == @q')['label'])
    ax.set_title(tl)
    ax.set_xticklabels(labels)
    ax.set_ylabel('Spatial Errors in Meters (ios)')
    bp = ax.boxplot(data)
    plt.tight_layout()