In [1]:
from lib.kalman_filter import generate_kalmanfilter,apply_kalmanfilter
from lib.io import load_pickle_data
from lib.noglobal import noglobal


from external_lib.evaluation_function import calc_haversine
from external_lib.gnss_manager import calc_acce_direction
from external_lib.merge_start_end_points import find_first_and_last_stopping_time,_merge_points


import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

In [2]:
train_path = "/work/data/input/selfmade_dataset/baseline_with_derived_data_v4/train.pkl"
train_df = load_pickle_data(train_path)


## 各関数

In [3]:


@noglobal()
def kalman_all(df):

    if (df["phone"].unique().shape[0] != 1):
        raise Exception()
    
    tmp_df = df.copy()
        
    num = tmp_df[["latDeg","lngDeg"]].to_numpy()                        
    kf = generate_kalmanfilter()  
    result = apply_kalmanfilter(num,kf)           
    tmp_df[["latDeg","lngDeg"]] = result         
       
    return tmp_df


@noglobal(excepts=["calc_acce_direction","find_first_and_last_stopping_time","_merge_points"])
def kalman_patial(df):

    if (df["phone"].unique().shape[0] != 1):
        raise Exception()
    
    tmp_df = df.copy()
    
    path,phone_name = tmp_df["phone"].unique()[0].split("_");
    target_col = "x_f"

    acce_df = calc_acce_direction(path,phone_name);            
    startPoint_gps,endPoint_gsp = find_first_and_last_stopping_time(acce_df,target_col);                        
        
    num = tmp_df.loc[ (tmp_df["millisSinceGpsEpoch_"] <= endPoint_gsp) | (tmp_df["millisSinceGpsEpoch_"] >= startPoint_gps) ,["latDeg","lngDeg"]].to_numpy()                        
    kf = generate_kalmanfilter()  
    result = apply_kalmanfilter(num,kf)           
    tmp_df.loc[ (tmp_df["millisSinceGpsEpoch_"] <= endPoint_gsp) | (tmp_df["millisSinceGpsEpoch_"] >= startPoint_gps) ,["latDeg","lngDeg"]] = result                                                
    
    return tmp_df

@noglobal(excepts=["calc_acce_direction","find_first_and_last_stopping_time","_merge_points"])
def kalman_all_with_merge(df):

    if (df["phone"].unique().shape[0] != 1):
        raise Exception()
        print(p)
    
    tmp_df = df.copy()
    
    path,phone_name = tmp_df["phone"].unique()[0].split("_");
    target_col = "x_f"

    acce_df = calc_acce_direction(path,phone_name);            
    first_gps,last_gps = find_first_and_last_stopping_time(acce_df,target_col);                        
    
    tmp_df = _merge_points(tmp_df,first_gps,last_gps);
    
    num = tmp_df[["latDeg","lngDeg"]].to_numpy()                        
    kf = generate_kalmanfilter()  
    result = apply_kalmanfilter(num,kf)           
    tmp_df[["latDeg","lngDeg"]] = result         
                        
    return tmp_df

@noglobal(excepts=["calc_acce_direction","find_first_and_last_stopping_time","_merge_points"])
def kalman_partial_with_merge(df):

    if (df["phone"].unique().shape[0] != 1):
        raise Exception()
        print(p)
    
    tmp_df = df.copy()
    
    path,phone_name = tmp_df["phone"].unique()[0].split("_");
    target_col = "x_f"

    acce_df = calc_acce_direction(path,phone_name);            
    startPoint_gps,endPoint_gsp = find_first_and_last_stopping_time(acce_df,target_col);                        
    
    tmp_df = _merge_points(tmp_df,startPoint_gps,endPoint_gsp);
    tmp = tmp_df.loc[ (tmp_df["millisSinceGpsEpoch_"] > endPoint_gsp) | (tmp_df["millisSinceGpsEpoch_"] < startPoint_gps)]
        
    num = tmp_df.loc[ (tmp_df["millisSinceGpsEpoch_"] <= endPoint_gsp) | (tmp_df["millisSinceGpsEpoch_"] >= startPoint_gps) ,["latDeg","lngDeg"]].to_numpy()                        
    kf = generate_kalmanfilter()  
    result = apply_kalmanfilter(num,kf)           
    tmp_df.loc[ (tmp_df["millisSinceGpsEpoch_"] <= endPoint_gsp) | (tmp_df["millisSinceGpsEpoch_"] >= startPoint_gps) ,["latDeg","lngDeg"]] = result                                                
                        
    return tmp_df


In [4]:
list_original = []
list_all = []
list_patial = [];
list_all_with_merge = []
list_partial_with_merge = []


for key,each_df in tqdm(train_df.groupby("phone")):
    
    each_df["millisSinceGpsEpoch_"] = each_df["millisSinceGpsEpoch"] //1000
    
    kalman_all_df = kalman_all(each_df);
    kalman_patial_df = kalman_patial(each_df);
    kalman_all_with_merge_df = kalman_all_with_merge(each_df);
    kalman_partial_with_merge_df = kalman_partial_with_merge(each_df);
    
    
    tmp_df = each_df
    dis = calc_haversine(tmp_df["latDeg"],tmp_df["lngDeg"],each_df["latDeg_gt"],each_df["lngDeg_gt"]).mean()
    list_original.append(dis)
    
    tmp_df = kalman_all_df
    dis = calc_haversine(tmp_df["latDeg"],tmp_df["lngDeg"],each_df["latDeg_gt"],each_df["lngDeg_gt"]).mean()
    list_all.append(dis)
    
    tmp_df = kalman_patial_df
    dis = calc_haversine(tmp_df["latDeg"],tmp_df["lngDeg"],each_df["latDeg_gt"],each_df["lngDeg_gt"]).mean()
    list_patial.append(dis)
    
    tmp_df = kalman_all_with_merge_df
    dis = calc_haversine(tmp_df["latDeg"],tmp_df["lngDeg"],each_df["latDeg_gt"],each_df["lngDeg_gt"]).mean()
    list_all_with_merge.append(dis)
    
    tmp_df = kalman_partial_with_merge_df
    dis = calc_haversine(tmp_df["latDeg"],tmp_df["lngDeg"],each_df["latDeg_gt"],each_df["lngDeg_gt"]).mean()
    list_partial_with_merge.append(dis)
    


  0%|          | 0/73 [00:00<?, ?it/s]

0.07679369115047785
0.07679369115047785
0.027444472282359075
0.027444472282359075
0.09074677363260952
0.09074677363260952
0.06368215261330701
0.06368215261330701
0.2463805860491479
0.2463805860491479
0.06669365082077297
0.06669365082077297
0.06744961890775161
0.06744961890775161
0.2128402806561067
0.2128402806561067
0.06856381771041498
0.06856381771041498
0.005079321772517069
0.005079321772517069
0.028266753990192288
0.028266753990192288
0.01933642122046905
0.01933642122046905
0.12867683184184625
0.12867683184184625
0.056317525239038824
0.056317525239038824
0.06044765189367119
0.06044765189367119
0.2036189553626569
0.2036189553626569
0.19881588762144653
0.19881588762144653
0.0
0.0
0.001359598097505004
0.001359598097505004
0.002993450579762891
0.002993450579762891
0.23330062419698683
0.23330062419698683
0.2737608397591605
0.2737608397591605
0.25508600528286046
0.25508600528286046
0.13276174616858022
0.13276174616858022
0.10057514204669064
0.10057514204669064
0.007972129492958521
0.00797

IndexError: single positional indexer is out-of-bounds

In [5]:
import numpy as np 

#list_original
#list_all
#list_patial
#list_all_with_merge 
#list_partial_with_merge 

print("original",np.mean(list_original))
print("kalman_all",np.mean(list_all))
print("kalman_partial",np.mean(list_patial))
print("kalman_all_with_merge",np.mean(list_all_with_merge ))
print("kalman_partial_with_merge",np.mean(list_partial_with_merge ))



original 2.6956054342719993
kalman_all 2.425119005880942
kalman_partial 2.425119005880942
kalman_all_with_merge 2.3997535867813573
kalman_partial_with_merge 2.3997535867813573
