In [1]:
import pandas as pd
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.preprocessing.statistics import Statistics

pdf = pd.read_csv('./starkey_new.csv')
starkey = PTRAILDataFrame(data_set=pdf,
                          latitude='lat',
                          longitude='lon',
                          datetime='DateTime',
                          traj_id='Id')
print(starkey)

------------------------ Dataset Facts ------------------------------

Number of unique Trajectories in the data: 253
Number of points in the data: 287136
Dataset time range: 1196 days 22:51:45
Datatype of the DataFrame: <class 'ptrail.core.TrajectoryDF.PTRAILDataFrame'>
Dataset Bounding Box: (45.18896978643169, -118.61020848239596, 45.314545642992, -118.50455596234036)

---------------------------------------------------------------------


In [2]:
%%time

stats = Statistics.generate_kinematic_stats(starkey)

CPU times: user 37.2 s, sys: 1.24 s, total: 38.5 s
Wall time: 40.6 s


In [3]:
pivoted_stats = Statistics.pivot_stats_df(dataframe=stats,
                                          target_col_name='Species')
pivoted_stats

Unnamed: 0_level_0,10%_Acceleration,10%_Bearing,10%_Bearing_Rate,10%_Distance,10%_Distance_from_start,10%_Jerk,10%_Rate_of_bearing_rate,10%_Speed,25%_Acceleration,25%_Bearing,...,min_Speed,std_Acceleration,std_Bearing,std_Bearing_Rate,std_Distance,std_Distance_from_start,std_Jerk,std_Rate_of_bearing_rate,std_Speed,Species
traj_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
910313E37,-2.424220e-05,36.304532,-0.063322,30.022359,1121.185006,-1.388547e-08,-0.063322,0.008259,-6.195019e-06,80.020676,...,0.000000,0.003805,106.277853,0.154034,398.901980,1502.135300,7.500795e-05,0.154034,0.351682,1
890424E08,-2.742458e-05,32.508596,-0.064023,42.384642,674.170024,-1.644760e-08,-0.064023,0.008840,-6.594604e-06,79.399167,...,0.000000,0.009098,104.887878,0.241358,446.249248,1009.296449,2.761390e-04,0.241358,0.627852,1
921228E06,-2.609985e-05,32.488596,-0.058033,42.390114,807.693739,-1.409491e-08,-0.058033,0.008041,-5.299279e-06,80.427854,...,0.000000,0.628230,105.795948,1.008263,517.650637,1282.236895,2.092630e-01,1.008263,2.224608,1
930304E16,-2.893563e-05,43.769266,-0.068092,42.384012,1156.455188,-1.707229e-08,-0.068092,0.009212,-6.483826e-06,88.865905,...,0.000000,0.482653,104.723326,0.600187,396.220730,1451.846819,7.585291e-02,0.600187,3.764110,1
940110D01,-8.556065e-06,43.796599,-0.041207,30.022192,492.522877,-3.455470e-09,-0.041207,0.005311,-2.424314e-06,88.893389,...,0.000000,1.739274,105.698662,3.677063,235.093739,501.450441,1.733837e+00,3.677063,2.274554,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
OSUX92071,-1.932433e-06,30.368128,-0.013195,42.384783,2019.896767,-3.361987e-10,-0.013195,0.003691,-5.335003e-07,88.874194,...,0.000000,0.000223,101.259361,0.022347,797.302786,1495.831625,2.424151e-07,0.022347,0.231182,2
OSUX92008,-7.314997e-06,12.874876,-0.033016,60.044073,2330.556507,-2.641983e-09,-0.033016,0.003975,-1.188258e-06,70.383379,...,0.000000,0.000033,104.842801,0.033022,565.038959,977.382744,1.407212e-08,0.033022,0.113691,2
OSUX91143,-8.197858e-07,34.813154,-0.011116,90.066320,2741.029578,-1.094577e-10,-0.011116,0.001726,-2.145431e-07,85.697016,...,0.000087,0.000005,106.771327,0.011857,1043.556535,1521.500791,8.591675e-10,0.011857,0.040621,2
OSUX91121,-1.435370e-06,34.339105,-0.010799,66.956676,2660.409775,-2.802791e-10,-0.010799,0.002419,-1.680267e-07,65.905762,...,0.000557,0.000004,108.860939,0.011728,1119.869470,827.029713,6.772859e-10,0.011728,0.030566,2


In [23]:
import plotly.express as px

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

train_x, test_x, train_y, test_y = train_test_split(pivoted_stats.drop(columns=['Species']),
                                                    pivoted_stats["Species"], test_size=0.33)

# All the models
names = ["Random Forest", "Naive Bayes", "Decision Tree", "KNN", "SVC"]
models = [RandomForestClassifier(), GaussianNB(), DecisionTreeClassifier(), KNeighborsClassifier(), SVC()]
model_fit = [x + ' fit' for x in names]
fitted_models = []

# For each model defined above, predict the species for
# our animals.
for name, model, fits in zip(names, models, model_fit):
    fit = model.fit(train_x, train_y)
    ans = fit.predict(test_x)
    score = f1_score(y_true=test_y, y_pred=ans, average='weighted')
    fitted_models.append(fit)

    # Perform PCA on our df and plot a 3D plot.
    pca = PCA(n_components=3)
    transformed = pca.fit_transform(test_x)

    fig = px.scatter_3d(x=transformed[:, 0],
                        y=transformed[:, 1],
                        z=transformed[:, 2],
                        color=ans,
                        color_continuous_scale=px.colors.qualitative.Safe,
                        template='ggplot2',
                        title=f'Model: {name}, f1_score: {round(score, 2)}'
                        )
    fig.update_coloraxes(showscale=False)
    fig.show()