In [None]:
# for reading and validating data
import emeval.input.spec_details as eisd
import emeval.input.phone_view as eipv
import emeval.input.eval_view as eiev

In [None]:
# Visualization helpers
import emeval.viz.phone_view as ezpv
import emeval.viz.eval_view as ezev

In [None]:
# for pipelined data
import emeval.analysed.phone_view as eapv

In [None]:
import pandas as pd

In [None]:
import geopandas as gpd
import shapely as shp
import shapely.geometry as shpgeo

In [None]:
import emeval.viz.geojson as ezgj

In [None]:
import numpy as np

In [None]:
import emeval.metrics.dist_calculations as emd

In [None]:
# For plots
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Analytics results
import emeval.metrics.segmentation as ems

In [None]:
# for statistics
import scipy as sp
import scipy.stats as spst 

In [None]:
# For easier debugging while working on modules
import importlib

In [None]:
import sys 

In [None]:
sys.path.append('/Users/gkosmach/Documents/every_trip_counts/e-mission-server')

In [None]:
import emission.analysis.intake.cleaning.location_smoothing as eaicl

In [None]:
# json
import json

In [None]:
def import_sd_and_pv_from_server(trips  = ["unimodal_trip_car_bike_mtv_la", "car_scooter_brex_san_jose", "train_bus_ebike_mtv_ucb"], 
                                 AUTHOR_EMAIL  = "shankari@eecs.berkeley.edu", 
                                 DATASTORE_LOC = "http://localhost:8080", 
                                 pkl_file_name = None):
    sd_l = []
    pv_l = []
    for trip in trips:
        sd = eisd.ServerSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, trip)
        pv = eipv.PhoneView(sd)
        sd_l.append(sd)
        pv_l.append(pv)
    if pkl_file_name:
        import pickle
        with open(pkl_file_name, 'wb') as outp:
            for pv in pv_l:
                pickle.dump(pv, outp, pickle.HIGHEST_PROTOCOL)
    return sd_l, pv_l

In [None]:
def import_pv_from_pkl(pkl_file_name, 
                       trips = ["unimodal_trip_car_bike_mtv_la", "car_scooter_brex_san_jose", "train_bus_ebike_mtv_ucb"]):
    import pickle
    pv_l = []
    with open('pv.pkl', 'rb') as inp:
        for trip in trips:
            pv_l.append(pickle.load(inp))
    return pv_l

In [None]:
(pv_la, pv_sj, pv_ucb) = import_pv_from_pkl('pv.pkl')

In [None]:
av_la = eapv.create_analysed_view(pv_la, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")
av_sj = eapv.create_analysed_view(pv_sj, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")
av_ucb = eapv.create_analysed_view(pv_ucb, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")

In [None]:
def get_ss_and_gts_dists(pv_l, os, role):
    if type(pv_l) is not list:
        pv_l = [pv_l]
    trip_dists = []
    for pv in pv_l:
        for phone_os, phone_map in pv.map().items():
            if os != phone_os: continue
            for phone_label, phone_detail_map in phone_map.items():
                for r in phone_detail_map["evaluation_ranges"]:
                    if role not in r['eval_role']: continue
                    if 'control' in r['eval_role']: continue
                    run_ss_dist, run_gt_dist = 0,0
                    for i, tr in enumerate(r["evaluation_trip_ranges"]):
                        sensed_dist, gt_dist = 0,0
                        for ss in tr['sensed_section_ranges']:
                            if 'data' in ss.keys():
                                if i > 0: assert ss['data']['start_ts'] > r["evaluation_trip_ranges"][i-1]['end_ts']
                                if i > 0: assert ss['data']['start_ts'] > trip_dists[-1]['gt_end_ts']
                                sensed_dist += ss['data']['distance']
#                                 print(ss['data']['sensed_mode'], ss['data']['start_ts'], ss['data']['end_ts'])
                            else:
                                sensed_dist = eaicl.add_dist_heading( tr['location_df'] ).distance.sum()
                                break
                        print(sensed_dist)
                        run_ss_dist += sensed_dist
                        for sr in tr['evaluation_section_ranges']:
                            ##### Ground Truth Distance ######
                            gt_leg = pv.spec_details.get_ground_truth_for_leg(
                                tr["trip_id_base"], 
                                sr["trip_id_base"], 
                                tr['start_ts'], 
                                tr['end_ts']
                            )
                            gt_shapes = gpd.GeoSeries(eisd.SpecDetails.get_shapes_for_leg(gt_leg))
                            if len(gt_shapes) <= 1:
                                continue
                            ## GET THE TOTAL GT DISTANCE OF A SECTION
                            gt_linestring = gt_shapes['route']
                            gt_geo_df = emd.linestring_to_geo_df(gt_linestring)
                            gt_loc_df = emd.to_loc_df(gt_geo_df)
                            gt_loc_with_dist_df =  eaicl.add_dist_heading( gt_loc_df )
                            gt_dist += gt_loc_with_dist_df['distance'].sum()
#                             print(gt_leg['id'], sr['start_ts'], sr['end_ts'])
                        print(gt_dist)
                        run_gt_dist += gt_dist
#                         print('sensed distance \t', sensed_dist)
#                         print('ground truth distance \t', gt_dist)
                        trip_dists.append(
                            {
                                'sensed_distance' : sensed_dist,
                                'ground_truth_distance' : gt_dist,
                                'gt_end_ts' : tr['evaluation_section_ranges'][-1]['end_ts']
                            }
                        )
#                     print('run sensed distance \t', run_ss_dist)
#                     print('run ground truth dist \t', run_gt_dist)
#                     print(f"trip: \t{tr['trip_id']} \n os: \t{phone_os}",'\n \n')
    return trip_dists

In [None]:
get_ss_and_gts_dists([av_la, av_sj, av_ucb], 'ios', 'HAMFDC');

#### First, we find the overall approximation errors

In [None]:
def get_approx_err(os, role, pv_l):
    trip_dist = get_ss_and_gts_dists(pv_l, os, role)
    relative_error = []
    absolute_error = []
    for i in range(len(trip_dist)):
        abs_err = (trip_dist[i]['sensed_distance'] - trip_dist[i]['ground_truth_distance'])
        rel_err = abs_err / trip_dist[i]['ground_truth_distance']
        relative_error.append(rel_err)
        absolute_error.append(abs_err)
    relative_error_df = pd.DataFrame(relative_error)
    absolute_error_df =  pd.DataFrame(absolute_error)
    return absolute_error_df, relative_error_df

In [None]:
def get_describe_table(os, pv_l):
    dic = {}
    df = None
    err_df = None
    for role in ['HAHFDC', 'HAMFDC', 'MAHFDC']:
        a_err, r_err = get_approx_err(os, role, pv_l)
        if df is None:
            df = r_err.rename(columns={0: f"{role}"}).describe()
            err_df = r_err.rename(columns={0: f"{role}"})
        else:
            df = pd.concat([df, r_err.rename(columns={0: f"{role}"}).describe()], axis=1)
            err_df = pd.concat([err_df, r_err.rename(columns={0: f"{role}"})], axis=1)
    dic[os] = df
    err_df.plot.density(title=f'Gaussian KDE of Relative Error for {os}')
    err_df.plot.box(title=f'Box Plots of Relative Error for {os}')
    return pd.concat(dic, axis=1)

# Production Level Box Plots for Select Settings

In [None]:
def box_selected():
    fig, ax = plt.subplots(1,2, figsize=(12,4), dpi=300, sharey=False)
    fig.text(0.06, 0.5, 'Signed Relative Error', va='center', rotation='vertical')
    for i, pv_l in enumerate([[pv_la, pv_sj, pv_ucb], [av_la, av_sj, av_ucb]]):
        a_err, r_err_and = get_approx_err('android', 'HAHFDC', pv_l)
        a_err, r_err_ios = get_approx_err('ios', 'HAMFDC', pv_l)
        err_df = pd.concat(
            [
                r_err_and.rename(columns={0 : 'andoird:HAHFDC'}),
                r_err_ios.rename(columns={0 : 'ios:HAMFDC'})
            ], axis=1
        )
        if i == 0:
            title = 'raw'
        else:
            title = 'clean'
        err_df.plot.box(title=title, ax=ax[i])
    plt.savefig(fname=f'images/rel_err_box_selected',  bbox_inches="tight")

In [None]:
box_selected()

## Results

In [None]:
2.811445**2

In [None]:
get_describe_table('android', [pv_la, pv_sj, pv_ucb])

In [None]:
0.291166**2

In [None]:
get_describe_table('ios', [pv_la, pv_sj, pv_ucb])

### cleaned output

In [None]:
0.156445**2

In [None]:
get_describe_table('android', [av_la, av_sj, av_ucb])

In [None]:
0.291166**2

In [None]:
get_describe_table('ios', [av_la, av_sj, av_ucb])

In [None]:
box_selected()

In [None]:
import numpy as np 
import pylab 
import scipy.stats as stats
measurements = np.random.normal(loc = 20, scale = 5, size=100)
a_err, r_err_ios = get_approx_err('ios', 'HAMFDC', [pv_la, pv_la, pv_la]); np.array(r_err_ios)
r = np.array(r_err_ios).flatten()
print(measurements.shape, r.shape)
stats.probplot(r, dist="norm", plot=pylab)
pylab.show()

In [None]:
measurements;
a_err, r_err_ios = get_approx_err('ios', 'HAMFDC', [av_la]); np.array(r_err_ios)


# investigate outlier

# JSON table 

In [None]:
json_dump = json.dumps( 
    {
        "relative_distance_errors" :
        {
            "android:HAMF" : np.array(get_approx_err('android', 'HAMFDC')[-1]).tolist(),
            "ios:HAHF"     : np.array(get_approx_err('ios', 'HAHFDC')[-1]).tolist()
        }
    }
)
# Writing to relative_distance_errors.json
with open("tables/relative_distance_errors.json", "w") as outfile:
    outfile.write(json_dump)

# Function that takes in a spec detail (or spec details) and outputs a JSON table

In [None]:
def get_relative_distance_error_table(sd, output_file_name="relative_distance_errors.json") :
    if type(sd) is not list: sd = [sd]
    pv_l = []
    av_l = []
    for s in sd:
        pv = eipv.PhoneView(sd)
        av = eapv.create_analysed_view(pv, s.DATASTORE_LOC, "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")
        pv_l.append(pv_l)
        av_l.append(av_l)
    abs_err, rel_err = get_approx_err(os, role, pv_l=pv_l)
    json_dump = json.dumps( 
        {
            "relative_distance_errors" :
            {
                "android:HAMF" : np.array(get_approx_err('android', 'HAMFDC')[-1]).tolist(),
                "ios:HAHF"     : np.array(get_approx_err('ios', 'HAHFDC')[-1]).tolist()
            }
        }
    )
    # Writing to relative_distance_errors.json
    with open(output_file_name, "w") as outfile:
        outfile.write(json_dump)