# Classification Analysis
This notebook will contain classification analysis for both the sensed and pipelined algorithms. Analysis will be preformed in regards for the sensed and pipelined algorthms themselves, as well as the ensemble algorithms. The analysis for the ensemble algorithm will focus on the HAMF android phones and the HAHF iOS phones.

## Dependencies

In [None]:
# for reading and validating data
import emeval.input.spec_details as eisd
import emeval.input.phone_view as eipv
import emeval.input.eval_view as eiev

In [None]:
import emeval.viz.phone_view as ezpv
import emeval.viz.eval_view as ezev
import emeval.viz.geojson as ezgj

In [None]:
# for analysized view
import emeval.analysed.phone_view as eapv

In [None]:
import emeval.metrics.segmentation as ems

In [None]:
import pandas as pd
pd.options.display.float_format = '{:.6f}'.format
import arrow
import numpy as np

In [None]:
# For plots
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# For maps
import folium
import branca.element as bre

In [None]:
# For easier debugging while working on modules
import importlib

In [None]:
import arrow

In [None]:
def import_sd_and_pv_from_server(trips  = ["unimodal_trip_car_bike_mtv_la", "car_scooter_brex_san_jose", "train_bus_ebike_mtv_ucb"], 
                                 AUTHOR_EMAIL  = "shankari@eecs.berkeley.edu", 
                                 DATASTORE_LOC = "http://localhost:8080", 
                                 pkl_file_name = None):
    sd_l = []
    pv_l = []
    for trip in trips:
        sd = eisd.ServerSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, trip)
        pv = eipv.PhoneView(sd)
        sd_l.append(sd)
        pv_l.append(pv)
    if pkl_file_name:
        import pickle
        with open(pkl_file_name, 'wb') as outp:
            for pv in pv_l:
                pickle.dump(pv, outp, pickle.HIGHEST_PROTOCOL)
    return sd_l, pv_l

In [None]:
def import_pv_from_pkl(pkl_file_name, 
                       trips = ["unimodal_trip_car_bike_mtv_la", "car_scooter_brex_san_jose", "train_bus_ebike_mtv_ucb"]):
    import pickle
    pv_l = []
    with open('pv.pkl', 'rb') as inp:
        for trip in trips:
            pv_l.append(pickle.load(inp))
    return pv_l

In [None]:
(pv_la, pv_sj, pv_ucb) = import_pv_from_pkl('pv.pkl')

### Get the sensed data for each trip

In [None]:
%%capture
ems.fill_sensed_section_ranges(pv_la)
ems.fill_sensed_section_ranges(pv_sj)
ems.fill_sensed_section_ranges(pv_ucb)

## Get sensed timeline

In [None]:
def get_trip_ss_and_gts_timeline(pv, os, role):
    assert os in ['android', 'ios'], 'UNKNOWN OS'
    assert role in ['accuracy_control', 'HAHFDC', 'HAMFDC', 'MAHFDC', 'power_control'], "UNKNOWN ROLE"
    trips = []
    for phone_os, phone_map in pv.map().items():
        if os != phone_os:
            continue
        for phone_label, phone_detail_map in phone_map.items():
            if "control" in phone_detail_map["role"]:
#                 print("Ignoring %s phone %s since they are always on" % (phone_detail_map["role"], phone_label))
                continue
            # this spec does not have any calibration ranges, but evaluation ranges are actually cooler
            for r in phone_detail_map["evaluation_ranges"]:
                if r['eval_role_base'] != role:
                    continue
                for tr in r["evaluation_trip_ranges"]:
                    tr_ss  = []
                    tr_gts = []
                    for ss in tr["sensed_section_ranges"]:
                        tr_ss.append(ss)
                    for section in tr["evaluation_section_ranges"]:
                        section_gt_leg = pv.spec_details.get_ground_truth_for_leg(tr['trip_id_base'],
                                                                                  section['trip_id_base'],
                                                                                  tr['start_ts'],
                                                                                  tr['end_ts'])
                        
                        if section_gt_leg["type"] == "WAITING":
#                             print("Skipping WAITING section %s %s with potential partway transitions" %
#                                   (tr["trip_id"], section["trip_id"]))
                            continue
                        ## and now we have the gt mode!
                        gts = {'start_ts': section['start_ts'], 
                               'end_ts': section['end_ts'], 
                               'mode': section_gt_leg['mode']}
                        tr_gts.append(gts)
                    # now, we build a timeline for each trip
                    trip = tr.copy()
                    trip['ss_timeline']  = tr_ss
                    trip['gts_timeline'] = tr_gts
                    trips.append(trip)
    return trips

## Binary Classification (in seconds)

#### rab base mode map

In [None]:
RBMM = {"WALKING": "WALKING",
             "RUNNING" : "WALKING", 
             "CYCLING" : "CYCLING",
             "BICYCLING": "CYCLING",
             "ESCOOTER": "CYCLING", 
             "AUTOMOTIVE" : "AUTOMOTIVE",
             "BUS": "AUTOMOTIVE",
             "TRAIN": "AUTOMOTIVE",
             "LIGHT_RAIL": "AUTOMOTIVE",
             "SUBWAY": "AUTOMOTIVE",
             "CAR": "AUTOMOTIVE",
             "AIR_OR_HSR": "AUTOMOTIVE",
             "INVALID" : "INVALID"}

#### cleaned base mode map

e-mission-server.emission.core.wrapper.motionactivity.py

In [None]:
CBMM = {0 : 'AUTOMOTIVE', 
        1 : 'CYCLING', 
        2 : 'WALKING', 
        3 : 'WALKING', 
        4 : 'INVALID', 
        5 : 'WALKING', 
        7 : 'WALKING', 
        8 : 'WALKING', 
        9 : 'INVALID', 
        10 : 'AUTOMOTIVE', 
        11 : 'AUTOMOTIVE'}

#### inferred base mode map

e-mission-server.emission.core.wrapper.modeprediction.py

In [None]:
IBMM = {0 : 'INVALID', 
        1 : 'WALKING', 
        2 : 'CYCLING', 
        3 : 'AUTOMOTIVE', 
        4 : 'TRAIN', 
        5 : 'AUTOMOTIVE', 
        6 : 'AUTOMOTIVE'}

In [None]:
def get_binary_class_in_sec(os, role, pv, BASE_MODE):
    if type(pv) is not list: pv - [pv]
    trips = []
    for v in pv:
        trips.extend(get_trip_ss_and_gts_timeline(v, os, role))
    TP, FN, FP, TN = {}, {}, {}, {}
    for trip in trips:
        for mode in set(BASE_MODE.values()):
            for ss in trip['ss_timeline']:
#                 print(ss.keys())
                if 'data' in ss.keys():
                    # taken from emission.core.wrapper.modeprediction
                    ss = ss['data']
                ss_dur = ss['end_ts'] - ss['start_ts']
                gts_dur = 0
                for gts in trip['gts_timeline']:
                    if ss['end_ts'] >= gts['start_ts'] and ss['start_ts'] <= gts['end_ts']:
                        dur = min(ss['end_ts'], gts['end_ts']) - max(ss['start_ts'], gts['start_ts'])
                        gts_dur += dur
                        if BASE_MODE[mode] == BASE_MODE[ss['mode']] and BASE_MODE[mode] == BASE_MODE[gts['mode']]:
                            TP[mode] = TP.setdefault(mode, 0) + dur
                            if mode == 'TRAIN':
                                pass
#                                 print(TP[mode])
                        elif BASE_MODE[mode] == BASE_MODE[ss['mode']] and BASE_MODE[mode] != BASE_MODE[gts['mode']]:
                            FP[mode] = FP.setdefault(mode, 0) + dur
                        elif BASE_MODE[mode] != BASE_MODE[ss['mode']] and BASE_MODE[mode] == BASE_MODE[gts['mode']]:
                            FN[mode] = FN.setdefault(mode, 0) + dur
                        else:
                            TN[mode] = TN.setdefault(mode, 0) + dur
                leftover = ss_dur - gts_dur
                assert leftover >= 0, f"ERROR, NEGATIVE LEFTOVER OF {leftover}, NEED TO INVESTIGATE"
                if leftover > 0:
                    # invalid base mode maps to NO_GT mode
                    if mode == 'INVALID':
                        TP[mode] = TP.setdefault(mode, 0) + leftover
                    # We have no gts, but our modes are equal, so a false positive
                    elif BASE_MODE[mode] == BASE_MODE[ss['mode']]:
                        FP[mode] = FP.setdefault(mode, 0) + leftover
                    # We have no_gts, but our modes are unequal, so a true negative
                    else:
                        TN[mode] = TN.setdefault(mode, 0) + leftover
    return TP, FP, FN, TN

#### raw output binary tables
BASE MODES = `['WALKING', 'CYCLING', 'AUTOMOTIVE', 'INVALID']`

In [None]:
def get_binary_raw_data(os):
    BASE_MODE = RBMM
    df = pd.DataFrame(get_binary_class_in_sec(os, 'HAHFDC', [pv_la, pv_ucb, pv_sj], BASE_MODE)).fillna(0)
    df=df.reindex(columns=['WALKING', 'CYCLING', 'AUTOMOTIVE', 'INVALID'])
    dic={}
    d = df.reset_index(drop=True)
    dic['ios'+'\\_'+'HAHFDC'] = d
    d = pd.concat(dic, axis=1)
    d['Classifier'] = ['TP', 'FP', 'FN', 'TN']
    print(d.set_index('Classifier').rename_axis(['Title', 'Mode'], axis=1).astype(int).style.to_latex())
    df = pd.DataFrame(get_binary_class_in_sec(os, 'HAMFDC', [pv_la, pv_ucb, pv_sj], BASE_MODE)).fillna(0)
    df=df.reindex(columns=['WALKING', 'CYCLING', 'AUTOMOTIVE', 'INVALID'])
    dic={}
    d = df.reset_index(drop=True)
    dic['ios'+'\\_'+'HAHFDC'] = d
    d = pd.concat(dic, axis=1)
    d['Classifier'] = ['TP', 'FP', 'FN', 'TN']
    print(d.set_index('Classifier').rename_axis(['Title', 'Mode'], axis=1).astype(int).style.to_latex())
    df = pd.DataFrame(get_binary_class_in_sec(os, 'MAHFDC', [pv_la, pv_ucb, pv_sj], BASE_MODE)).fillna(0)
    df=df.reindex(columns=['WALKING', 'CYCLING', 'AUTOMOTIVE', 'INVALID'])
    dic={}
    d = df.reset_index(drop=True)
    dic['ios'+'\\_'+'HAHFDC'] = d
    d = pd.concat(dic, axis=1)
    d['Classifier'] = ['TP', 'FP', 'FN', 'TN']
    print(d.set_index('Classifier').rename_axis(['Title', 'Mode'], axis=1).astype(int).style.to_latex())

#### random forrest binary tables
BASE MODES = `['WALKING', 'CYCLING', 'AUTOMOTIVE', 'INVALID']`

In [None]:
def get_rf_binary(os):
    BASE_MODE = IBMM
    for role in ['HAHFDC', 'HAMFDC', 'MAHFDC']:
        df = pd.DataFrame(get_binary_class_in_sec(os, role, [rfv_la, rfv_ucb, rfv_sj], BASE_MODE)).fillna(0)
        df=df.reindex(columns=['WALKING', 'CYCLING', 'AUTOMOTIVE', 'TRAIN', 'INVALID'])
        dic={}
        d = df.reset_index(drop=True)
        dic[os+'\\_'+role] = d
        d = pd.concat(dic, axis=1)
        d['Classifier'] = ['TP', 'FP', 'FN', 'TN']
        print(d.set_index('Classifier').rename_axis(['Title', 'Mode'], axis=1).astype(int).style.to_latex())

# $F_\beta$ score
$$
F_\beta = \frac {(1 + \beta^2) \cdot \mathrm{true\ positive} }{(1 + \beta^2) \cdot \mathrm{true\ positive} + \beta^2 \cdot \mathrm{false\ negative} + \mathrm{false\ positive}}
$$

In [None]:
def get_F_score(os, role, pv, BASE_MODE, beta=1):
    assert os in ['android', 'ios'], 'UNKNOWN OS'
    assert role in ['accuracy_control', 'HAHFDC', 'HAMFDC', 'MAHFDC', 'power_control'], "UNKNOWN ROLE"
    (TP, FP, FN, TN) = get_binary_class_in_sec(os, role, pv, BASE_MODE)
    F_score = {}
    for mode in TP.keys():
        numerator   = (1 + beta**2) * TP.setdefault(mode, 0)
        denominator = (1+beta**2) * TP.setdefault(mode, 0) + beta**2*FN.setdefault(mode, 0) + FP.setdefault(mode, 0)
        F_score[mode] = (numerator)/(denominator)
    # initializing K 
    K = 10
    for key in F_score:

        # rounding to K using round()
        F_score[key] = round(F_score[key], K)
    return F_score

In [None]:
def display_f_score(os, pv, BASE_MODE):
    return ([ (k, round(get_F_score(os, 'HAHFDC', pv, BASE_MODE, beta=1)[k], 4)) for k in get_F_score(os, 'HAHFDC', pv, BASE_MODE, beta=1)],
            [ (k, round(get_F_score(os, 'HAMFDC', pv, BASE_MODE, beta=1)[k], 4)) for k in get_F_score(os, 'HAMFDC', pv, BASE_MODE, beta=1)],
            [ (k, round(get_F_score(os, 'MAHFDC', pv, BASE_MODE, beta=1)[k], 4)) for k in get_F_score(os, 'MAHFDC', pv, BASE_MODE, beta=1)],
            )

## Confusion Matrix
We will now generate confusion matrices based off OS and role, with the acctual modes as the rows, the predicted modes as the columns, and the entries as the base unit for the duration measurement

In [None]:
def get_confusion_matrix(os, role, pv):
    assert os in ['android', 'ios'], 'UNKNOWN OS'
    assert role in ['accuracy_control', 'HAHFDC', 'HAMFDC', 'MAHFDC', 'power_control'], "UNKNOWN ROLE"
    cm_l = []
    if type(pv) is not list:
        pv = [pv]
    trips = []
    for v in pv :
        trips.extend(get_trip_ss_and_gts_timeline(v, os, role))
    for trip in trips:
        for ss in trip['ss_timeline']:
            if 'data' in ss.keys():
                # taken from emission.core.wrapper.modeprediction
                ss = ss['data']
            
            ss_dur = ss['end_ts'] - ss['start_ts']
            gts_dur = 0
            cm = {}
            for gts in trip['gts_timeline']:
                if ss['end_ts'] >= gts['start_ts'] and ss['start_ts'] <= gts['end_ts']:
                    dur = min(ss['end_ts'], gts['end_ts']) - max(ss['start_ts'], gts['start_ts'])
                    gts_dur += dur
                    cm[gts['mode']] = cm.setdefault(gts['mode'], 0) + dur
            leftover = ss_dur - gts_dur
            assert leftover >= 0, f"ERROR, NEGATIVE LEFTOVER OF {leftover}, NEED TO INVESTIGATE"
            cm['NO_GT'] = cm.setdefault('NO_GT', 0) + leftover
            cm['sensed_mode'] = ss['mode']
            
            cm_l.append(cm)
    return cm_l

In [None]:
def get_raw_cm(os):
    if os == 'ios':
        df = pd.DataFrame(get_confusion_matrix(os, 'HAHFDC', [pv_la, pv_sj, pv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER', 'CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'RUNNING', 'CYCLING', 'AUTOMOTIVE']).style.to_latex())
        df = pd.DataFrame(get_confusion_matrix(os, 'HAMFDC', [pv_la, pv_sj, pv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER','CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'RUNNING', 'CYCLING', 'AUTOMOTIVE']).style.to_latex())
        df = pd.DataFrame(get_confusion_matrix(os, 'MAHFDC', [pv_la, pv_sj, pv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER','CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'RUNNING', 'AUTOMOTIVE']).style.to_latex())
    if os == 'android':
        df = pd.DataFrame(get_confusion_matrix(os, 'HAHFDC', [pv_la, pv_sj, pv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER','CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'CYCLING', 'AUTOMOTIVE']).style.to_latex())
        df = pd.DataFrame(get_confusion_matrix(os, 'HAMFDC', [pv_la, pv_sj, pv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER','CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'CYCLING', 'AUTOMOTIVE']).style.to_latex())
        df = pd.DataFrame(get_confusion_matrix(os, 'MAHFDC', [pv_la, pv_sj, pv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER','CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'CYCLING', 'AUTOMOTIVE']).style.to_latex())

In [None]:
def get_rf_cm(os):
    if os == 'ios':
        df = pd.DataFrame(get_confusion_matrix('ios', 'HAHFDC', [rfv_la, rfv_sj, rfv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER', 'CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'BICYCLING', 'TRAIN', 'CAR', 'AIR_OR_HSR']).style.to_latex())
        df = pd.DataFrame(get_confusion_matrix('ios', 'HAMFDC', [rfv_la, rfv_sj, rfv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER', 'CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'BICYCLING', 'CAR', 'AIR_OR_HSR']).style.to_latex())
        df = pd.DataFrame(get_confusion_matrix('ios', 'MAHFDC', [rfv_la, rfv_sj, rfv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER', 'CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'BICYCLING', 'CAR']).style.to_latex())
    if os == 'android':
        df = pd.DataFrame(get_confusion_matrix('android', 'HAHFDC', [rfv_la, rfv_sj, rfv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER', 'CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'BICYCLING', 'CAR', 'AIR_OR_HSR']).style.to_latex())
        df = pd.DataFrame(get_confusion_matrix('android', 'HAMFDC', [rfv_la, rfv_sj, rfv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER', 'CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'BICYCLING', 'TRAIN', 'CAR']).style.to_latex())
        df = pd.DataFrame(get_confusion_matrix('android', 'MAHFDC', [rfv_la, rfv_sj, rfv_ucb])).groupby('sensed_mode').sum().astype(int)
        print(df.reindex(columns=['WALKING', 'BICYCLING', 'ESCOOTER', 'CAR', 'BUS', 'LIGHT_RAIL', 'TRAIN', 'NO_GT'],
                   index=['WALKING', 'BICYCLING', 'TRAIN', 'CAR', 'AIR_OR_HSR']).style.to_latex())

## Analyzed Data

#### cleaned view

In [None]:
cv_la   = eapv.create_analysed_view(pv_la, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")
cv_sj   = eapv.create_analysed_view(pv_sj, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")
cv_ucb  = eapv.create_analysed_view(pv_ucb, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/cleaned_section")

#### inferred view random forest

In [None]:
rfv_la   = eapv.create_analysed_view(pv_la, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/inferred_section")
rfv_sj   = eapv.create_analysed_view(pv_sj, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/inferred_section")
rfv_ucb  = eapv.create_analysed_view(pv_ucb, "http://localhost:8080", "analysis/recreated_location", "analysis/cleaned_trip", "analysis/inferred_section")

#### inferred view GIS

# Results 

#### Raw data

In [None]:
get_binary_raw_data('ios')

In [None]:
get_binary_raw_data('android')

In [None]:
display_f_score('ios', [pv_la, pv_sj, pv_ucb], RBMM)

In [None]:
display_f_score('android', [pv_la, pv_sj, pv_ucb], RBMM)

In [None]:
get_raw_cm('ios')

In [None]:
get_raw_cm('android')

#### Cleaned data

#### Random Forrest

In [None]:
get_rf_binary('ios')

In [None]:
get_rf_binary('android')

In [None]:
display_f_score('ios', [rfv_la, rfv_sj, rfv_ucb], IBMM)

In [None]:
display_f_score('android', [rfv_la, rfv_sj, rfv_ucb], IBMM)

In [None]:
get_rf_cm('ios')

In [None]:
get_rf_cm('android')

#### GIS