In [None]:
"""classical_models.ipynb
by: Archie Gertsman (arkadiy2@illinois.edu)
Project director: Richard Sowers
r-sowers@illinois.eduhttps://publish.illinois.edu/r-sowers/
Copyright 2019 University of Illinois Board of Trustees. All Rights Reserved. Licensed under the MIT license
"""

In [1]:

import pandas as pd
import numpy as np
from feature_eng import split_trajectories
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [16]:
df = pd.read_pickle('block4_concat_lane.pkl')
#df = pd.read_pickle('block4_edge_filter.pkl')
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lat,lon,speed,lon_acc,lat_acc,type,traveled_d,avg_speed,bearing,nearest_edge_start_node,...,xtrack_dist,time_stamp,edge_progress_intervals,edge_id,len,lanes,node_veh_dist,edge_seg,vehicle_density,avg_surr_speed
file_name,id,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
4_1,1,42.0,37.982746,23.732961,11.9046,-0.1145,0.0138,Taxi,182.37,9.740748,1.570795,250699362,...,-1.883401,42.0,0.3,250699362_250699984,97.581,5.4,29.81433,1.0,7,10.464171
4_1,1,42.04,37.982746,23.732963,11.8975,-0.1007,0.0147,Taxi,182.37,9.740748,0.168572,250699362,...,-1.980795,42.04,0.3,250699362_250699984,97.581,5.4,29.67483,1.0,7,10.457843
4_1,1,42.08,37.982747,23.732964,11.8919,-0.0918,0.0157,Taxi,182.37,9.740748,0.168573,250699362,...,-1.937041,42.08,0.3,250699362_250699984,97.581,5.4,29.537753,1.0,7,10.452857
4_1,1,42.12,37.982748,23.732965,11.8871,-0.0869,0.0167,Taxi,182.37,9.740748,1.570796,250699362,...,-1.893287,42.12,0.3,250699362_250699984,97.581,5.4,29.400718,1.0,7,10.448586
4_1,1,42.16,37.982748,23.732966,11.8831,-0.0784,0.0176,Taxi,182.37,9.740748,0.32808,250699362,...,-1.941984,42.16,0.3,250699362_250699984,97.581,5.4,29.330986,1.0,7,10.444986


In [17]:
#def lane_changes(df,xtrack_dist = 1):
def __xtrack_dist(df):
    """splits a vehicle trajectory into smaller trajectories of fixed size and removes
    the last (len(df) mod size) riws
    """
    
    df["xtrack_diff"] = df.loc[:,['xtrack_dist']]- df.loc[:,['xtrack_dist']].shift(-1)
    df["xtrack_diff"]=df['xtrack_diff'].fillna(0)
    return df


df = df.groupby(['file_name','id'], as_index=False, group_keys=False) \
            .apply(__xtrack_dist)

In [18]:
def reset_traj_and_split(df, traj_len):
    #df.index = df.index.droplevel(1)
    return split_trajectories(df, traj_len)

In [19]:
def agg(df):
    df['xtrack_diff_sq'] = df['xtrack_diff']**2
    df_agg = df[np.isin(df['type'], ['Car','Taxi'])] \
        .groupby(['id', 'traj']).agg({
            'xtrack_diff_sq': ['mean','std','skew','max','min',pd.DataFrame.kurt,'sum'],
            'xtrack_diff': ['mean','std','skew',pd.DataFrame.kurt],
            'xtrack_dist': ['mean','std','skew',pd.DataFrame.kurt],
            'avg_surr_speed': ['mean','std','skew',pd.DataFrame.kurt],
            'lanes':['mean'],
            'len':['mean'],
            'speed':['mean','std','skew',pd.DataFrame.kurt,'sum'],
            'vehicle_density': ['mean','std','skew',pd.DataFrame.kurt],
            'lon_acc': ['mean','std','max','min','skew', pd.DataFrame.kurt],
            'lat_acc': ['mean','std','max','min', 'skew', pd.DataFrame.kurt],
            'type': 'first'
        }) \
        .reset_index(drop=True)
    #df_agg1 = df_agg.copy()
    
    df_agg.columns = ['_'.join(col) for col in df_agg.columns]
    df_agg.speed_sum = df_agg.speed_sum*0.04
    df_agg.rename(columns={'type_first':'type'}, inplace=True)
    g = df_agg.groupby('type')
    df_agg = g.apply(lambda group: group.sample(g.size().min())).reset_index(drop=True)
    X,y = df_agg.drop('type', axis=1), df_agg['type']
    
    return X,y


In [20]:
def train_and_accuracy(X, y, model):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    model.fit(X_train, y_train)
    y_hat = model.predict(X_test)
    #print(y_hat)
    a = y_hat==y_test
    
    f = f1_score((y_test == 'Car').astype(int),(y_hat == 'Car').astype(int))
    return len(a[a==True]) / len(y_test),f


In [8]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lat,lon,speed,lon_acc,lat_acc,type,traveled_d,avg_speed,bearing,nearest_edge_start_node,...,time_stamp,edge_progress_intervals,edge_id,len,lanes,node_veh_dist,edge_seg,vehicle_density,avg_surr_speed,xtrack_diff
file_name,id,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
4_1,1,42.00,37.982746,23.732961,11.9046,-0.1145,0.0138,Taxi,182.37,9.740748,1.570795,250699362,...,42.00,0.3,250699362_250699984,97.581,5.4,29.814330,1.0,7,10.464171,0.097394
4_1,1,42.04,37.982746,23.732963,11.8975,-0.1007,0.0147,Taxi,182.37,9.740748,0.168572,250699362,...,42.04,0.3,250699362_250699984,97.581,5.4,29.674830,1.0,7,10.457843,-0.043754
4_1,1,42.08,37.982747,23.732964,11.8919,-0.0918,0.0157,Taxi,182.37,9.740748,0.168573,250699362,...,42.08,0.3,250699362_250699984,97.581,5.4,29.537753,1.0,7,10.452857,-0.043754
4_1,1,42.12,37.982748,23.732965,11.8871,-0.0869,0.0167,Taxi,182.37,9.740748,1.570796,250699362,...,42.12,0.3,250699362_250699984,97.581,5.4,29.400718,1.0,7,10.448586,0.048697
4_1,1,42.16,37.982748,23.732966,11.8831,-0.0784,0.0176,Taxi,182.37,9.740748,0.328080,250699362,...,42.16,0.3,250699362_250699984,97.581,5.4,29.330986,1.0,7,10.444986,0.004943
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1_3,2116,921.64,37.981674,23.735746,25.3482,-0.3011,0.3234,Car,66.63,23.064621,-0.057578,300400248,...,,0.4,300400248_8446047162,106.837,6.5,52.205948,2.0,2,26.005850,-0.106162
1_3,2116,921.68,37.981677,23.735745,25.3217,-0.3032,0.3216,Car,66.63,23.064621,-0.171231,300400248,...,,0.4,300400248_8446047162,106.837,6.5,52.535125,2.0,2,26.008150,0.027622
1_3,2116,921.72,37.981679,23.735743,25.2946,-0.3012,0.3180,Car,66.63,23.064621,-0.086247,300400248,...,,0.4,300400248_8446047162,106.837,6.5,52.816624,2.0,2,26.010500,-0.046176
1_3,2116,921.76,37.981681,23.735742,25.2667,-0.3054,0.3124,Car,66.63,23.064621,-0.171228,300400248,...,,0.4,300400248_8446047162,106.837,6.5,53.051626,2.0,2,26.012950,0.027622


In [22]:
#traj_lens = [349]
traj_lens = np.arange(250,450, step=50)
models = {
        'Random Forest': Pipeline([('scaler', StandardScaler()), ('rf', RandomForestClassifier())]),
        'AdaBoost':Pipeline([('scaler', StandardScaler()), ('abc', AdaBoostClassifier())]) ,
        'SVM': Pipeline([('scaler', StandardScaler()), ('svc', SVC(max_iter=10000))]) ,
        'Log Regression': Pipeline([('scaler', StandardScaler()), ('lr', LogisticRegression(max_iter=10000))]) 
    }
    
df_acc = pd.DataFrame(index=pd.MultiIndex.from_product([models.keys(),['f1_score','accuracy'], ['mean','std']]), columns=traj_lens)

k = 5
kf = StratifiedKFold(n_splits=k, shuffle=True)
accs = np.zeros(k)
f1 = np.zeros(k)
df4 = df.copy()
for j in range(0,7):
    #df = df4[df4.vehicle_density>=j].copy()
    for traj_len in traj_lens:

        df_traj_list = df.groupby(['id','file_name']).count()['lat'].reset_index()
        df_traj_list = df_traj_list[df_traj_list.lat >= traj_len]
        df2 = df[df.index.droplevel('time').isin(df_traj_list.set_index(['file_name','id']).index.to_list())].copy()

        df2 = reset_traj_and_split(df2, traj_len-1)

        df3 = df2.reset_index()[["id","file_name","traj","speed"]]
        df3["speed_bool"]= df3["speed"]>0
        df3 = df3.groupby(["file_name","id","traj"]).sum(["speed_bool"])
        
        df2 = df2[df2.index.droplevel(3).isin(df3[df3.speed_bool >= traj_len*0.75].index.to_list())]
        
        df3 = df2.reset_index()[["id","file_name","traj","vehicle_density"]]
        df3 = df3.groupby(["file_name","id","traj"]).mean(["vehicle_density"])
        
        df2 = df2[df2.index.droplevel(3).isin(df3[df3.vehicle_density >= j].index.to_list())]
        
        X,y = agg(df2)
        print("No of trajectories: ",len(X))
        df_acc.loc[('traj_len','traj_len','total'), traj_len] = len(X)
        df_acc.loc[('traj_len','traj_len','Car'), traj_len] = sum(y == 'Car')
        df_acc.loc[('traj_len','traj_len','Taxi'), traj_len] = sum(y == 'Taxi')
        #print(df_acc)
        for name, model in models.items():
            for i, (train_index, test_index) in enumerate(kf.split(X,y)):
                accs[i],f1[i] = train_and_accuracy(X, y, model)


            df_acc.loc[(name, 'accuracy','mean'), traj_len] = round(100*accs.mean(), 3)
            df_acc.loc[(name, 'accuracy','std'), traj_len] = round(100*accs.std(), 3)
            df_acc.loc[(name, 'f1_score','mean'), traj_len] = round(100*f1.mean(), 3)
            df_acc.loc[(name, 'f1_score','std'), traj_len] = round(100*f1.std(), 3)

            print(name, 'complete.')

        print('trajectory length', traj_len, 'complete.')
    print('minimum traffic ',j)
    print(df_acc)

No of trajectories:  2994


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(


SVM complete.


  result = self._run_cell(


Log Regression complete.
trajectory length 250 complete.
No of trajectories:  2394


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(


SVM complete.


  result = self._run_cell(


Log Regression complete.
trajectory length 300 complete.
No of trajectories:  1934


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(


SVM complete.


  result = self._run_cell(


Log Regression complete.
trajectory length 350 complete.
No of trajectories:  1602


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(
  result = self._run_cell(


SVM complete.
Log Regression complete.
trajectory length 400 complete.
minimum traffic  0
                                  250     300     350     400
Random Forest  f1_score mean   56.754  57.765  59.394  59.285
                        std     1.273    1.84   2.569   3.725
               accuracy mean   57.147  58.646  59.619  59.679
                        std     1.439   1.547    2.06   3.369
AdaBoost       f1_score mean   55.712   56.01  58.477  55.685
                        std     2.057   3.189   0.843   6.432
               accuracy mean   56.747  57.266  59.411  58.174
                        std     1.176    2.78   0.788   5.306
SVM            f1_score mean   55.072  55.973  55.404  54.998
                        std     2.601   2.277   1.872   1.914
               accuracy mean   58.851  59.901  58.841  59.674
                        std     1.507   1.545   1.546   1.545
Log Regression f1_score mean   54.558  56.343  54.987  56.257
                        std     1.804   2.

  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(


SVM complete.


  result = self._run_cell(


Log Regression complete.
trajectory length 250 complete.
No of trajectories:  2394


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(


SVM complete.


  result = self._run_cell(


Log Regression complete.
trajectory length 300 complete.
No of trajectories:  1934


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(
  result = self._run_cell(


SVM complete.
Log Regression complete.
trajectory length 350 complete.
No of trajectories:  1602


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(
  result = self._run_cell(


SVM complete.
Log Regression complete.
trajectory length 400 complete.
minimum traffic  1
                                  250     300     350     400
Random Forest  f1_score mean   57.354  57.361  56.574  58.684
                        std     1.088   1.609   1.717    2.23
               accuracy mean   58.216  57.644  57.551  58.992
                        std     1.379   1.168   2.366   2.509
AdaBoost       f1_score mean   56.038  54.835  54.713  56.098
                        std     2.036   1.825   1.461    1.12
               accuracy mean   57.882  55.807  55.637  57.491
                        std     1.809   1.678   2.236   2.192
SVM            f1_score mean   55.462  54.008  52.995  52.852
                        std     2.562   1.422   2.722   3.165
               accuracy mean   59.352  58.605  57.187  57.554
                        std     2.279   1.233   1.489   3.086
Log Regression f1_score mean   54.188  54.086  55.675  55.491
                        std     2.078    1

  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(


SVM complete.


  result = self._run_cell(


Log Regression complete.
trajectory length 250 complete.
No of trajectories:  1640


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(
  result = self._run_cell(


SVM complete.
Log Regression complete.
trajectory length 300 complete.
No of trajectories:  1366


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(
  result = self._run_cell(


SVM complete.
Log Regression complete.
trajectory length 350 complete.
No of trajectories:  1130


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(
  result = self._run_cell(


SVM complete.
Log Regression complete.
trajectory length 400 complete.
minimum traffic  2
                                  250     300     350     400
Random Forest  f1_score mean   59.023  59.387  54.757  54.901
                        std     3.537   2.631   2.132   2.939
               accuracy mean     59.7   58.72  55.855  55.487
                        std     2.605   2.745   2.258   3.655
AdaBoost       f1_score mean   60.431  55.513  53.867  53.494
                        std      1.78   1.803   2.479   1.171
               accuracy mean     61.1  55.915  54.027  54.779
                        std     2.154    2.27   2.591   1.543
SVM            f1_score mean   58.198  55.126  49.431  51.294
                        std     1.872   1.887   4.316   3.387
               accuracy mean     59.8  59.207  56.224   56.46
                        std     1.512   1.942   2.648    2.75
Log Regression f1_score mean   58.659  56.603  52.826  54.063
                        std     3.058   2.

  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(


AdaBoost complete.


  result = self._run_cell(
  result = self._run_cell(


SVM complete.
Log Regression complete.
trajectory length 250 complete.
No of trajectories:  832


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.


  result = self._run_cell(


Log Regression complete.
trajectory length 300 complete.
No of trajectories:  692


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.


  result = self._run_cell(


Log Regression complete.
trajectory length 350 complete.
No of trajectories:  566


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.


  result = self._run_cell(


Log Regression complete.
trajectory length 400 complete.
minimum traffic  3
                                  250     300     350     400
Random Forest  f1_score mean   55.164  56.376  53.098  55.602
                        std     1.646   2.991    3.88   6.022
               accuracy mean   54.493   55.05  52.027  56.181
                        std     1.974   1.075   2.849   4.661
AdaBoost       f1_score mean   56.429   55.51  55.783  50.965
                        std     4.784   3.775   4.593   4.216
               accuracy mean   56.501   56.01  54.759  51.941
                        std     3.744   3.167    3.53   5.578
SVM            f1_score mean    55.14   52.99  51.806  51.468
                        std     3.819   3.396   5.659   5.422
               accuracy mean   57.651  55.645  55.771  52.104
                        std      2.34   2.133   5.921   5.321
Log Regression f1_score mean   54.158  57.157  55.614  53.652
                        std      3.38   4.602   5.981   

  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.


  result = self._run_cell(


Log Regression complete.
trajectory length 250 complete.
No of trajectories:  424


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.


  result = self._run_cell(


Log Regression complete.
trajectory length 300 complete.
No of trajectories:  294


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.
Log Regression complete.
trajectory length 350 complete.
No of trajectories:  266


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.
Log Regression complete.
trajectory length 400 complete.
minimum traffic  4
                                  250     300     350     400
Random Forest  f1_score mean   55.042  58.637  54.192   52.15
                        std     2.793   3.543   1.526   1.857
               accuracy mean     52.5  54.487  51.701  50.377
                        std     2.572   3.762   2.274   1.849
AdaBoost       f1_score mean    53.01  48.235  48.008  54.096
                        std     2.797   5.167   9.485   7.717
               accuracy mean   54.052  47.661  49.281   54.92
                        std     4.188   6.244    6.98   7.103
SVM            f1_score mean    53.12  44.977    54.1  45.294
                        std     1.731   4.333   2.754   7.322
               accuracy mean   52.311    46.0   53.39  47.729
                        std     2.178   2.939   2.005   3.009
Log Regression f1_score mean   57.751  51.765  53.385  48.614
                       

  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.
Log Regression complete.
trajectory length 250 complete.
No of trajectories:  198


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.
Log Regression complete.
trajectory length 300 complete.
No of trajectories:  150


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.
Log Regression complete.
trajectory length 350 complete.
No of trajectories:  118


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.
Log Regression complete.
trajectory length 400 complete.
minimum traffic  5
                                  250     300     350     400
Random Forest  f1_score mean   49.669  55.449  52.414   45.13
                        std     6.206   4.181   5.174   7.165
               accuracy mean     48.4  54.551  48.667  44.058
                        std     5.571    3.46   7.775   8.484
AdaBoost       f1_score mean   53.699  45.097  53.771  42.656
                        std     7.676   8.102   8.943  10.771
               accuracy mean     53.2  45.923    54.0  44.855
                        std     5.307   5.337   4.422   7.367
SVM            f1_score mean    56.88  56.752  55.852  50.078
                        std     1.691   6.724   6.699  13.037
               accuracy mean     54.0  55.577  53.333  49.167
                        std      2.53   3.498   4.714   8.196
Log Regression f1_score mean   58.301  55.349  52.521  47.313
                       

  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.
Log Regression complete.
trajectory length 250 complete.
No of trajectories:  82


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.
Log Regression complete.
trajectory length 300 complete.
No of trajectories:  52


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.


  result = self._run_cell(
  result = self._run_cell(
  result = self._run_cell(


AdaBoost complete.
SVM complete.
Log Regression complete.
trajectory length 350 complete.
No of trajectories:  32


  result = self._run_cell(
  result = self._run_cell(


Random Forest complete.
AdaBoost complete.
SVM complete.
Log Regression complete.
trajectory length 400 complete.
minimum traffic  6
                                  250     300     350     400
Random Forest  f1_score mean   48.573  45.714  41.434  52.857
                        std    11.837   9.264  12.626  11.107
               accuracy mean   46.784  43.971  40.727   56.19
                        std     9.012   5.103  11.859  12.196
AdaBoost       f1_score mean   46.997  50.093  51.761  54.762
                        std     6.111  17.463  10.403  12.418
               accuracy mean   48.947  50.882  49.818  52.857
                        std     5.009  15.826   13.72  11.107
SVM            f1_score mean   62.266  57.965   40.26   64.81
                        std     5.733  10.223  14.268  19.908
               accuracy mean    61.93  58.676  40.727  69.524
                        std     6.168   9.091  11.859  14.861
Log Regression f1_score mean   54.876  61.695   35.96  61.095

  result = self._run_cell(
  result = self._run_cell(
  result = self._run_cell(


In [59]:
df_acc.to_pickle("acc_50p_0_lane_filter_f1.pkl")