In [None]:
# for reading and validating data
import emeval.input.spec_details as eisd
import emeval.input.phone_view as eipv
import emeval.input.eval_view as eiev

In [None]:
import pandas as pd

In [None]:
import geopandas as gpd
import shapely as shp

In [None]:
import numpy as np

In [None]:
import emeval.metrics.dist_calculations as emd

In [None]:
# For plots
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Analytics results
import emeval.metrics.segmentation as ems

In [None]:
# For easier debugging while working on modules
import importlib

In [None]:
def import_sd_and_pv_from_server(trips  = ["unimodal_trip_car_bike_mtv_la", "car_scooter_brex_san_jose", "train_bus_ebike_mtv_ucb"], 
                                 AUTHOR_EMAIL  = "shankari@eecs.berkeley.edu", 
                                 DATASTORE_LOC = "http://localhost:8080", 
                                 pkl_file_name = None):
    sd_l = []
    pv_l = []
    for trip in trips:
        sd = eisd.ServerSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, trip)
        pv = eipv.PhoneView(sd)
        sd_l.append(sd)
        pv_l.append(pv)
    if pkl_file_name:
        import pickle
        with open(pkl_file_name, 'wb') as outp:
            for pv in pv_l:
                pickle.dump(pv, outp, pickle.HIGHEST_PROTOCOL)
    return sd_l, pv_l

In [None]:
def import_pv_from_pkl(pkl_file_name, 
                       trips = ["unimodal_trip_car_bike_mtv_la", "car_scooter_brex_san_jose", "train_bus_ebike_mtv_ucb"]):
    import pickle
    pv_l = []
    with open('pv.pkl', 'rb') as inp:
        for trip in trips:
            pv_l.append(pickle.load(inp))
    return pv_l

In [None]:
(pv_la, pv_sj, pv_ucb) = import_pv_from_pkl('pv.pkl')

In [None]:
def get_spatial_errors(pv):
    spatial_error_df = pd.DataFrame()
    
    for phone_os, phone_map in pv.map().items():
        for phone_label, phone_detail_map in phone_map.items():
            for (r_idx, r) in enumerate(phone_detail_map["evaluation_ranges"]):
                run_errors = []
                for (tr_idx, tr) in enumerate(r["evaluation_trip_ranges"]):
                    trip_errors = []
                    for (sr_idx, sr) in enumerate(tr["evaluation_section_ranges"]):
                        # This is a Shapely LineString
                        section_gt_leg = pv.spec_details.get_ground_truth_for_leg(tr["trip_id_base"], sr["trip_id_base"], sr["start_ts"], sr["end_ts"])
                        section_gt_shapes = gpd.GeoSeries(eisd.SpecDetails.get_shapes_for_leg(section_gt_leg))
                        if len(section_gt_shapes) == 1:
                            print("No ground truth route for %s %s, must be polygon, skipping..." % (tr["trip_id_base"], sr["trip_id_base"]))
                            assert section_gt_leg["type"] != "TRAVEL", "For %s, %s, %s, %s, %s found type %s" % (phone_os, phone_label, r_idx, tr_idx, sr_idx, section_gt_leg["type"])
                            continue
                        if len(sr['location_df']) == 0:
                            print("No sensed locations found, role = %s skipping..." % (r["eval_role_base"]))
                            # assert r["eval_role_base"] == "power_control", "Found no locations for %s, %s, %s, %s, %s" % (phone_os, phone_label, r_idx, tr_idx, sr_idx)
                            continue
                            
                        print("Processing travel leg %s, %s, %s, %s, %s" %
                              (phone_os, phone_label, r["eval_role_base"], tr["trip_id_base"], sr["trip_id_base"]))
                        # This is a GeoDataFrame
                        section_geo_df = emd.to_geo_df(sr["location_df"])
                        
                        # After this point, everything is in UTM so that 2-D inside/filtering operations work
                        utm_section_geo_df = emd.to_utm_df(section_geo_df)
                        utm_section_gt_shapes = section_gt_shapes.apply(lambda s: shp.ops.transform(emd.to_utm_coords, s))
                        filtered_us_gpdf = emd.filter_geo_df(utm_section_geo_df, utm_section_gt_shapes.loc["start_loc":"end_loc"])
                        filtered_gt_linestring = emd.filter_ground_truth_linestring(utm_section_gt_shapes)
                        meter_dist = filtered_us_gpdf.geometry.distance(filtered_gt_linestring)
                        ne = len(meter_dist)
                        filtered_section_geo_df = section_geo_df.loc[filtered_us_gpdf.index]
                        curr_spatial_error_df = gpd.GeoDataFrame({"error": meter_dist,
                                                                  "ts": section_geo_df.ts,
                                                                  "geometry": section_geo_df.geometry,
                                                                  "ts": filtered_section_geo_df.ts,
                                                                  "geometry": filtered_section_geo_df.geometry,
                                                                  "phone_os": np.repeat(phone_os, ne),
                                                                  "phone_label": np.repeat(phone_label, ne),
                                                                  "role": np.repeat(r["eval_role_base"], ne),
                                                                  "timeline": np.repeat(pv.spec_details.CURR_SPEC_ID, ne), 
                                                                  "run": np.repeat(r_idx, ne),
                                                                  "trip_id": np.repeat(tr["trip_id_base"], ne),
                                                                  "section_id": np.repeat(sr["trip_id_base"], ne)})
                        spatial_error_df = pd.concat([spatial_error_df, curr_spatial_error_df], axis="index")
    return spatial_error_df

In [None]:
spatial_errors_df = pd.DataFrame()
for pv in [pv_la, pv_sj, pv_ucb]:
    spatial_errors_df = pd.concat([spatial_errors_df, get_spatial_errors(pv)], axis="index")

## Get Total Spacial Error for Each Trip
### `TODO:`
* Algo analysis
    + focus on everythin
* Ensemble
    + Android: HAMF
    + IOS    : HAHF

In [None]:
spatial_errors_df.head()

In [None]:
spatial_err_by_phone_run_df = spatial_errors_df.groupby(['run', 'phone_label', 'phone_os', 'role']).sum()
spatial_err_by_phone_run_df.head()