In [3]:
!pip install simdkalman

Collecting simdkalman
  Downloading simdkalman-1.0.1-py2.py3-none-any.whl (11 kB)
Installing collected packages: simdkalman
Successfully installed simdkalman-1.0.1


In [139]:
import numpy as np
import pandas as pd
import simdkalman
from tqdm.notebook import tqdm
import warnings
warnings.simplefilter("ignore")

### カルマンフィルターのパラメータ

In [140]:
T = 1.0


state_transition = np.array([[1, 0, T, 0, 0.5 * T ** 2, 0], [0, 1, 0, T, 0, 0.5 * T ** 2], [0, 0, 1, 0, T, 0],
                             [0, 0, 0, 1, 0, T], [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1]])


process_noise = np.diag([1e-5, 1e-5, 5e-6, 5e-6, 1e-6, 1e-6]) + np.ones((6, 6)) * 1e-9

observation_model = np.array([[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]])
observation_noise = np.diag([5e-5, 5e-5]) + np.ones((2, 2)) * 1e-9


display(state_transition)
display(process_noise)
display(observation_model)
display(observation_noise)

array([[1. , 0. , 1. , 0. , 0.5, 0. ],
       [0. , 1. , 0. , 1. , 0. , 0.5],
       [0. , 0. , 1. , 0. , 1. , 0. ],
       [0. , 0. , 0. , 1. , 0. , 1. ],
       [0. , 0. , 0. , 0. , 1. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 1. ]])

array([[1.0001e-05, 1.0000e-09, 1.0000e-09, 1.0000e-09, 1.0000e-09,
        1.0000e-09],
       [1.0000e-09, 1.0001e-05, 1.0000e-09, 1.0000e-09, 1.0000e-09,
        1.0000e-09],
       [1.0000e-09, 1.0000e-09, 5.0010e-06, 1.0000e-09, 1.0000e-09,
        1.0000e-09],
       [1.0000e-09, 1.0000e-09, 1.0000e-09, 5.0010e-06, 1.0000e-09,
        1.0000e-09],
       [1.0000e-09, 1.0000e-09, 1.0000e-09, 1.0000e-09, 1.0010e-06,
        1.0000e-09],
       [1.0000e-09, 1.0000e-09, 1.0000e-09, 1.0000e-09, 1.0000e-09,
        1.0010e-06]])

array([[1, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0]])

array([[5.0001e-05, 1.0000e-09],
       [1.0000e-09, 5.0001e-05]])

### カルマンフィルター実行関数

In [143]:
def apply_kf_smoothing(df,kf_=kf):
    
    kf = simdkalman.KalmanFilter(
        state_transition = state_transition,
        process_noise = process_noise,
        observation_model = observation_model,
        observation_noise = observation_noise
    )
    
    
    
    df_set = [];
    
    for (colName,phone),df_per_phone in tqdm(df.groupby(["collectionName","phoneName"])):
        
        data_df = df_per_phone[["latDeg","lngDeg"]];
        data = data_df.to_numpy();
        
        data = data.reshape(1,len(data),2);        
        smoothed = kf_.smooth(data)
        
        data_df['latDeg'] =  smoothed.states.mean[0, :, 0]
        data_df['lngDeg'] =  smoothed.states.mean[0, :, 1]
                
        df_set.append(data_df); 
                                
    modified = pd.concat(df_set);
    
    df["latDeg"] = modified["latDeg"];
    df["lngDeg"] = modified["lngDeg"];
    

### train データに対してカルマンフィルターを行う。

#### 以下、グラフプロット関数

In [128]:

def visualize_trafic(df,color_header:str=None, zoom=9,outputfile=None):
    

    center = {"lat":(df["latDeg"].max()  + df["latDeg"].min())/2, "lon": (df["lngDeg"].max()  + df["lngDeg"].min())/2}
    
    
    if (color_header == None):
        df["history"] =  [ i for i in range(sample_ground_truth_df.shape[0])]
        color_header = "history"
    
    fig = px.scatter_mapbox(df,
                            
                            # Here, plotly gets, (x,y) coordinates
                            lat="latDeg",
                            lon="lngDeg",
                            
                            #Here, plotly detects color of series
                            color=color_header,
                            labels="phoneName",
                            
                            zoom=zoom,
                            center=center,
                            height=600,
                            width=800)
    fig.update_layout(mapbox_style='stamen-terrain')
    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
    fig.update_layout(title_text="GPS trafic")
    
    
    if (not outputfile == None):
        if (outputfile.endswith(".html")):
            fig.write_html(outputfile);
        elif(outputfile.endswith(".png")):
            fig.write_image(outputfile,format = "png")
        else:
            raise Exception("Sorry, I cannot understand the " + outputfile.split(".")[-1] +" file");
    else:
        return fig
    


In [130]:
train_df = pd.read_csv("/work/data/input/google-smartphone-decimeter-challenge/baseline_locations_train.csv");

path = "2020-09-04-US-SF-1"
phone = "Pixel4"
extracted_train_df = train_df[(train_df["phoneName"] == phone) & (train_df["collectionName"] == path)]

Unnamed: 0,collectionName,phoneName,millisSinceGpsEpoch,latDeg,lngDeg,heightAboveWgs84EllipsoidM,phone
59239,2020-09-04-US-SF-1,Pixel4,1283274652442,37.416558,-122.081989,-8.86,2020-09-04-US-SF-1_Pixel4
59240,2020-09-04-US-SF-1,Pixel4,1283274653442,37.416553,-122.082009,-10.03,2020-09-04-US-SF-1_Pixel4
59241,2020-09-04-US-SF-1,Pixel4,1283274654442,37.416557,-122.081999,-5.37,2020-09-04-US-SF-1_Pixel4
59242,2020-09-04-US-SF-1,Pixel4,1283274655442,37.416565,-122.081995,-12.26,2020-09-04-US-SF-1_Pixel4
59243,2020-09-04-US-SF-1,Pixel4,1283274656442,37.416547,-122.081976,-13.82,2020-09-04-US-SF-1_Pixel4
...,...,...,...,...,...,...,...
60981,2020-09-04-US-SF-1,Pixel4,1283276399667,37.629641,-122.401144,-15.35,2020-09-04-US-SF-1_Pixel4
60982,2020-09-04-US-SF-1,Pixel4,1283276400654,37.629627,-122.401042,-26.61,2020-09-04-US-SF-1_Pixel4
60983,2020-09-04-US-SF-1,Pixel4,1283276401651,37.629513,-122.401005,-48.44,2020-09-04-US-SF-1_Pixel4
60984,2020-09-04-US-SF-1,Pixel4,1283276402664,37.629522,-122.400881,-28.55,2020-09-04-US-SF-1_Pixel4


In [146]:
filterd = extracted_train_df.copy()
apply_kf_smoothing(filterd);

  0%|          | 0/1 [00:00<?, ?it/s]

In [147]:
visualize_trafic(filterd)

NameError: name 'sample_ground_truth_df' is not defined