classical_models.ipynb

by: Archie Gertsman (arkadiy2@illinois.edu)
Lloyd Fernandes (lloydf2@illinois.edu)

Project director: Richard Sowers

r-sowers@illinois.eduhttps://publish.illinois.edu/r-sowers/

Copyright 2019 University of Illinois Board of Trustees. All Rights Reserved. Licensed under the MIT license


In [1]:
import warnings; warnings.simplefilter('ignore')

In [2]:
import sys
sys.path.append('../../Lib/')
import pandas as pd
import numpy as np
from feature_eng import split_trajectories
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from time import time
from IPython.display import display

In [3]:
df = pd.read_pickle('block4_concat_lane.pkl')  \
    .set_index('edge_id', append=True) \
    .reorder_levels((0,1,3,2))

df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,lat,lon,speed,lon_acc,lat_acc,type,traveled_d,avg_speed,bearing,nearest_edge_start_node,...,edge_progress_intervals,len,lanes,node_veh_dist,edge_seg,vehicle_density,avg_surr_speed,edge_bearing,acc_edge,acc_per_edge
file_name,id,edge_id,time,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
4_1,1,250699362_250699984,42.0,37.982746,23.732961,11.9046,-0.1145,0.0138,Taxi,182.37,9.740748,1.570795,250699362,...,0.3,97.581,5.4,29.81433,1.0,7,10.464171,-2.83013,0.11322,0.021953
4_1,1,250699362_250699984,42.04,37.982746,23.732963,11.8975,-0.1007,0.0147,Taxi,182.37,9.740748,0.168572,250699362,...,0.3,97.581,5.4,29.67483,1.0,7,10.457843,-2.83013,0.10036,0.016867
4_1,1,250699362_250699984,42.08,37.982747,23.732964,11.8919,-0.0918,0.0157,Taxi,182.37,9.740748,0.168573,250699362,...,0.3,97.581,5.4,29.537753,1.0,7,10.452857,-2.83013,0.092194,0.013188
4_1,1,250699362_250699984,42.12,37.982748,23.732965,11.8871,-0.0869,0.0167,Taxi,182.37,9.740748,1.570796,250699362,...,0.3,97.581,5.4,29.400718,1.0,7,10.448586,-2.83013,0.087837,0.010734
4_1,1,250699362_250699984,42.16,37.982748,23.732966,11.8831,-0.0784,0.0176,Taxi,182.37,9.740748,0.32808,250699362,...,0.3,97.581,5.4,29.330986,1.0,7,10.444986,-2.83013,0.080021,0.007273


In [171]:
def rolling_agg(df, agg_dict, window_size=100, step=25):
    # rolling agg with step size = 1
    df_agg = df.groupby(df.index.names[:-1]) \
                .rolling(window_size) \
                .agg(agg_dict) \
                .dropna()
    
    # select a subset of above computations to achieve custom step size
    df_agg = df_agg.groupby(df_agg.index.names, 
                            as_index=False, 
                            group_keys=False) \
                .apply(lambda x: x[::step])
    
    df_agg.columns = ['_'.join(col) for col in df_agg.columns]
    
    # add 'type' column
    vehicle_types = df.type.groupby(df.index.names[:-1]).first()
    return df_agg.join(vehicle_types)
  
def speed_ratio(grp, min_speed=0):
    return len(grp[grp.speed > min_speed]) / len(grp)

def validation_set(df,test_size):
    """dataframe is split based on their vehicle id's"""
    df_val = df.reset_index()[["file_name",'id','type']].drop_duplicates()
    X,y = df_val[["file_name","id"]],df_val['type']
    X_train,X_test,_,y_test = train_test_split(X, y, test_size=test_size, random_state=4, stratify=y) 
    df_train = df[df.index.droplevel(['time','edge_id']).isin(X_train.set_index(['file_name','id']).index)]
    X_test['type'] = y_test
    g = X_test.groupby('type')
    X_test = g.apply(lambda group: group.sample(g.size().min())).reset_index(drop = True)
    df_test = df[df.index.droplevel(['time','edge_id']).isin(X_test.set_index(['file_name','id']).index)]
    return df_train,df_test

def train_and_accuracy(X_train,y_train,X_test,y_test, model):

    model.fit(X_train, y_train)
    y_hat = model.predict(X_test)
    a = y_hat==y_test
    
    f = f1_score((y_test == 'Car').astype(int),(y_hat == 'Car').astype(int))
    return len(a[a==True]) / len(y_test),f

def val_voting_accuracy(X_train,y_train,X_val,y_val, model,by_edge = False):

    y_hat = model.predict(X_val)
    if by_edge == False:
        y_hat = pd.DataFrame(index = y_val.index,data = y_hat,columns = ['type'])

        #predicted value for the entire trajectory would be the mode of the predicted labels
        y_hat = y_hat.groupby(['file_name','id']).apply(lambda group: pd.Series.mode(group['type'])[0])
        y_test = y_val.groupby(['file_name','id']).first(['type'])
    else:
        y_hat = pd.DataFrame(index = y_val.index,data = y_hat,columns = ['type'])

        #predicted value for the entire trajectory would be the mode of the predicted labels
        y_hat = y_hat.groupby(['file_name','id','edge_id']).apply(lambda group: pd.Series.mode(group['type'])[0])
        y_test = y_val.groupby(['file_name','id','edge_id']).first(['type'])

    a = y_hat==y_test
   
    f = f1_score((y_test == 'Car').astype(int),(y_hat == 'Car').astype(int))
    return len(a[a==True]) / len(y_test),f

def get_xy(df,overlap,traj_len,agg_dict,outlier_limit=None,balance = True):
    
    df_agg =rolling_agg(df, window_size=traj_len, step=int((1 - overlap)*traj_len),agg_dict = agg_dict)
    if outlier_limit is not None:
        df_agg = filter_by_percentile(df_agg,outlier_limit)
    if balance == True:
        g = df_agg.groupby('type', group_keys=False)
        df_agg = g.apply(lambda grp: grp.sample(g.size().min()))
        
    X,y = df_agg.drop('type', axis=1), df_agg.type
    return X,y
  
def filter_by_percentile(df,percentile):
    
    top_le = 1-(percentile/100)
    bottom_le = percentile/100
    df_top = df.quantile(top_le).reset_index()
    df_top['cond'] ='('+df_top['index']+" <= "+df_top[top_le].astype(str)+')'
    df_bottom = df.quantile(bottom_le).reset_index()
    df_bottom['cond'] ='('+df_bottom['index']+" >= "+df_bottom[bottom_le].astype(str)+')'
    df = df.query(df_top.cond.str.cat(sep=' & '))
    df = df.query(df_bottom.cond.str.cat(sep=' & '))
    
    return df  

def __xtrack_dist_diff(df):
    """splits a vehicle trajectory into smaller trajectories of fixed size and removes
    the last (len(df) mod size) riws
    """
    
    df["xtrack_diff"] = df.loc[:,['xtrack_dist']]- df.loc[:,['xtrack_dist']].shift(-1)
    df["xtrack_diff"]=df['xtrack_diff'].fillna(0)
    return df



In [5]:

df = df.groupby(['file_name','id','edge_id'], as_index=False, group_keys=False) \
            .apply(__xtrack_dist_diff)
df['xtrack_diff_sq'] = df['xtrack_diff']**2
df['acc_edge_sq'] = df['acc_edge']**2
df['acc_per_edge_sq'] = df['acc_per_edge']**2

In [193]:
class ensemble():
    def __init__(self,model_num,accuracy_measure,model_list = None):
        self.model_num = model_num
        self.accuracy_measure = accuracy_measure
        self.model_list = model_list
        
    def find_ensemble(self,df_acc,traj_len,vehicle_density):
        self.model_list = df_acc.loc[(slice(None),'accuracy','mean'),(vehicle_density,traj_len,self.accuracy_measure)].sort_values(ascending = False).index.get_level_values(0)[:self.model_num].to_list()
      
    def fit(self,X,y,model_dict=None):
        self.model_dict = model_dict
        if model_dict == None:
            self.model_dict = {}
            for model in self.model_list:
                self.model_dict[model] = model.fit(X,y)
    
    def predict(self,X):
        label_list = []
        df_model = pd.DataFrame(columns = self.model_list)
        
        for model in self.model_list:
            df_model[model] = self.model_dict[model].predict(X)
            
        return df_model.apply(lambda x : x.mode(),axis = 1)[0].to_numpy()
    
    def predict_proba(self,X,get_label = True):
        label_list = []
        model = list(self.model_dict.values())[0]
        df_model = pd.DataFrame(columns = pd.MultiIndex.from_product([self.model_list,model.classes_]))#,index = np.arange(0,len(X)))
        #df_model.loc[:,('MLP',model.classes_)] =  model.predict_proba(X)
        for name in self.model_list:
            model = self.model_dict[name]
            df_model[:,(name,model.classes_)] = model.predict_proba(X)
            
        df_model = df_model.mean(axis=1, level=[1])
        if get_label == True:
            return df_model.idxmax(axis=1)
        else:
            return df_model
        
       

In [225]:
#initial parameters
models = {
        'Random Forest': Pipeline([('scaler', StandardScaler()), ('rf', RandomForestClassifier())]),
        'AdaBoost':Pipeline([('scaler', StandardScaler()), ('abc', AdaBoostClassifier())]) ,
        'SVM': Pipeline([('scaler', StandardScaler()), ('svc', SVC(max_iter=10000))]) ,
        'Log Regression': Pipeline([('scaler', StandardScaler()), ('lr', LogisticRegression(max_iter=10000))]) ,
        'GBM': Pipeline([('scaler', StandardScaler()), ('gbm', GradientBoostingClassifier())]),
        'MLP': Pipeline([('scaler', StandardScaler()), ('mlp', MLPClassifier(hidden_layer_sizes = (250,100,25),max_iter=1000,\
                                                                             learning_rate = 'adaptive',early_stopping = True,n_iter_no_change = 10))])
                        
    }

df_acc = pd.DataFrame(index=pd.MultiIndex.from_product([models.keys(),['f1_score','accuracy'], ['mean']]))
overlap = 0.4
min_movement_limit = 0.75
speed_limit = 0
k = 5
validation_ratio = 0.2
kf = StratifiedKFold(n_splits=k, shuffle=True)
accs = np.zeros(k)
f1 = np.zeros(k)
 
agg_dict = {'xtrack_diff': ['mean','std'],
            'xtrack_dist': ['mean','std'],
            'avg_surr_speed': ['skew',pd.DataFrame.kurt],
            #'lanes':['mean'],
            'speed':['mean'],#,'skew',pd.DataFrame.kurt],
            'vehicle_density': ['mean'],#,'std','skew',pd.DataFrame.kurt],
            'acc_edge': ['mean','skew'],
            'acc_per_edge': ['mean','std']}



In [226]:
# Car and Taxi classification

traj_lens = np.arange(100,300, step=50)
model_num = 5
df_acc = pd.DataFrame(columns = pd.MultiIndex.from_product([list(range(0,6)),traj_lens,['kfold','val_woedge','val_by_edge']]),index=pd.MultiIndex.from_product([models.keys(),['accuracy','accuracy_baseline'], ['mean','std']]))
ensemble_model = ensemble(model_num,'kfold')

validation_ratio = 0.2
test_ratio = 0.2

for vehicle_density in range(1,6):
    df_vehicle_density = df[df.vehicle_density >= vehicle_density]
    for traj_len in traj_lens:

        df_filtered = df_vehicle_density.groupby(df_vehicle_density.index.names[:-1]) \
                .filter(lambda grp: (len(grp) >= traj_len) & (speed_ratio(grp,speed_limit) >= min_movement_limit))

        df_train_test,df_val = validation_set(df_filtered,validation_ratio)
        df_train,df_test = validation_set(df_train_test,test_ratio)
        #aggregate trajectories
        X,y = get_xy(df_train_test,overlap,traj_len,agg_dict,1)
        X_train,y_train = get_xy(df_train,overlap,traj_len,agg_dict,1)
        X_test,y_test = get_xy(df_test,overlap,traj_len,agg_dict,balance = False)
        X_val,y_val = get_xy(df_val,overlap,traj_len,agg_dict,balance = False)
        
        #store percent cars and taxis
        print("No of trajectories: ",len(X))
        df_acc.loc[('traj_len','Car_Taxi','total'), (vehicle_density,traj_len,'kfold')] = len(X)
        df_acc.loc[('traj_len','Car_Taxi_percent','Car'), (vehicle_density,traj_len,'kfold')] = sum(y == 'Car')/len(X)
        df_acc.loc[('traj_len','Car_Taxi_percent','Taxi'), (vehicle_density,traj_len,'kfold')] = sum(y == 'Taxi')/ len(X)

        woedge_count = y_val.reset_index(['edge_id'],drop = True).reset_index().drop_duplicates()
        df_acc.loc[('traj_len','Car_Taxi','total'), (vehicle_density,traj_len,'val_woedge')] = len(woedge_count)
        df_acc.loc[('traj_len','Car_Taxi_percent','Car'), (vehicle_density,traj_len,'val_woedge')] = sum(woedge_count.type == 'Car')/len(woedge_count)
        df_acc.loc[('traj_len','Car_Taxi_percent','Taxi'), (vehicle_density,traj_len,'val_woedge')] =sum(woedge_count.type == 'Taxi')/len(woedge_count)

        by_edge_count = y_val.reset_index().drop_duplicates()
        df_acc.loc[('traj_len','Car_Taxi','total'), (vehicle_density,traj_len,'val_by_edge')] = len(by_edge_count)
        df_acc.loc[('traj_len','Car_Taxi_percent','Car'), (vehicle_density,traj_len,'val_by_edge')] = sum(by_edge_count.type == 'Car')/len(by_edge_count)
        df_acc.loc[('traj_len','Car_Taxi_percent','Taxi'), (vehicle_density,traj_len,'val_by_edge')] = sum(by_edge_count.type == 'Taxi')/len(by_edge_count)

        model_dict = {}
        # fit different models
        for name, model in models.items():
            
#             for i, (train_index, test_index) in enumerate(kf.split(X,y)):

#                 X_train, X_test = X.iloc[train_index], X.iloc[test_index]
#                 y_train, y_test = y.iloc[train_index], y.iloc[test_index]
#                 accs[i],f1[i] = train_and_accuracy(X_train, y_train,X_test,y_test, model)
            model.fit(X_train,y_train)
            val_accs,val_f1 = val_voting_accuracy(X_train, y_train,X_test,y_test, model)
            
            df_acc.loc[(name, 'accuracy','mean'),  (vehicle_density,traj_len,'kfold')] = round(100*val_accs, 3)

            model.fit(X, y)
            val_accs,val_f1 = val_voting_accuracy(X, y,X_val,y_val, model)
            df_acc.loc[(name, 'accuracy','mean'), (vehicle_density,traj_len,'val_woedge')] = round(100*val_accs, 3)
            #df_acc.loc[(name, 'f1_score','mean'),(str(traj_len)+'_val_woedge')] = round(100*val_f1, 3)

            val_accs,val_f1 = val_voting_accuracy(X, y,X_val,y_val, model,by_edge = True)
            df_acc.loc[(name, 'accuracy','mean'), (vehicle_density,traj_len,'val_by_edge')] = round(100*val_accs, 3)
            #df_acc.loc[(name, 'f1_score','mean'),(str(traj_len)+'_val_by_edge')] = round(100*val_f1, 3)
            model_dict[name] = model
            
        ensemble_model.find_ensemble(df_acc,traj_len,vehicle_density)
        ensemble_model.fit(X,y,model_dict)
        val_accs,val_f1 = val_voting_accuracy(X, y,X_val,y_val, ensemble_model)
        df_acc.loc[('ensemble_model', 'accuracy','mean'), (vehicle_density,traj_len,'val_woedge')] = round(100*val_accs, 3)
        val_accs,val_f1 = val_voting_accuracy(X, y,X_val,y_val, ensemble_model,by_edge = True)
        df_acc.loc[('ensemble_model', 'accuracy','mean'), (vehicle_density,traj_len,'val_by_edge')] = round(100*val_accs, 3)
            
    print('vehicle density >=  ',vehicle_density)


No of trajectories:  6430
No of trajectories:  3642
No of trajectories:  2322
No of trajectories:  1682
vehicle density >=   1
No of trajectories:  3694
No of trajectories:  2242
No of trajectories:  1358
No of trajectories:  916
vehicle density >=   2
No of trajectories:  1810
No of trajectories:  984
No of trajectories:  634
No of trajectories:  444
vehicle density >=   3
No of trajectories:  798
No of trajectories:  432
No of trajectories:  250
No of trajectories:  184
vehicle density >=   4
No of trajectories:  324
No of trajectories:  206
No of trajectories:  110
No of trajectories:  66
vehicle density >=   5


In [227]:
# Car and Car_1 classification

df_car = df[df.type == 'Car']
ensemble_model = ensemble(model_num,'kfold')
for vehicle_density in range(1,6):
    df_vehicle_density = df_car[df_car.vehicle_density >= vehicle_density]
    for traj_len in traj_lens:

        df_filtered = df_vehicle_density.groupby(df_vehicle_density.index.names[:-1]) \
                .filter(lambda grp: (len(grp) >= traj_len) & (speed_ratio(grp,speed_limit) >= min_movement_limit))
        
        df_index = df_filtered.reset_index()[['file_name','id']].drop_duplicates()
        df_filtered.loc[df_filtered.reset_index(['edge_id', 'time'],drop = True).index.isin(df_index.sample(frac = 0.5).set_index(['file_name','id']).index),'type']='Car_1'
        
        df_train_test,df_val = validation_set(df_filtered,validation_ratio)
        df_train,df_test = validation_set(df_train_test,test_ratio)
        #aggregate trajectories
        X,y = get_xy(df_train_test,overlap,traj_len,agg_dict,1)
        X_train,y_train = get_xy(df_train,overlap,traj_len,agg_dict,1)
        X_test,y_test = get_xy(df_test,overlap,traj_len,agg_dict,balance = False)
        X_val,y_val = get_xy(df_val,overlap,traj_len,agg_dict,balance = False)
        
        #store percent cars and taxis
        print("No of trajectories: ",len(X))
        print("No of trajectories: ",len(X))
        df_acc.loc[('traj_len','Car_Car','total'), (vehicle_density,traj_len,'kfold')] = len(X)
        df_acc.loc[('traj_len','Car_Car_percent','Car'), (vehicle_density,traj_len,'kfold')] = sum(y == 'Car')/len(X)
        df_acc.loc[('traj_len','Car_Car_percent','Car_1'), (vehicle_density,traj_len,'kfold')] = sum(y == 'Car_1')/ len(X)

        woedge_count = y_val.reset_index(['edge_id'],drop = True).reset_index().drop_duplicates()
        df_acc.loc[('traj_len','Car_Car','total'), (vehicle_density,traj_len,'val_woedge')] = len(woedge_count)
        df_acc.loc[('traj_len','Car_Car_percent','Car'), (vehicle_density,traj_len,'val_woedge')] = sum(woedge_count.type == 'Car')/len(woedge_count)
        df_acc.loc[('traj_len','Car_Car_percent','Car_1'), (vehicle_density,traj_len,'val_woedge')] =sum(woedge_count.type == 'Car_1')/len(woedge_count)

        by_edge_count = y_val.reset_index().drop_duplicates()
        df_acc.loc[('traj_len','Car_Car','total'), (vehicle_density,traj_len,'val_by_edge')] = len(by_edge_count)
        df_acc.loc[('traj_len','Car_Car_percent','Car'), (vehicle_density,traj_len,'val_by_edge')] = sum(by_edge_count.type == 'Car')/len(by_edge_count)
        df_acc.loc[('traj_len','Car_Car_percent','Car_1'), (vehicle_density,traj_len,'val_by_edge')] = sum(by_edge_count.type == 'Car_1')/len(by_edge_count)
        model_dict = {}
        # fit different models
        for name, model in models.items():
           

            model.fit(X_train,y_train)
            val_accs,val_f1 = val_voting_accuracy(X_train, y_train,X_test,y_test, model)
            
            df_acc.loc[(name, 'accuracy_baseline','mean'),  (vehicle_density,traj_len,'kfold')] = round(100*val_accs, 3)

            model.fit(X, y)
            val_accs,val_f1 = val_voting_accuracy(X, y,X_val,y_val, model)
            df_acc.loc[(name, 'accuracy_baseline','mean'), (vehicle_density,traj_len,'val_woedge')] = round(100*val_accs, 3)
           
            val_accs,val_f1 = val_voting_accuracy(X, y,X_val,y_val, model,by_edge = True)
            df_acc.loc[(name, 'accuracy_baseline','mean'), (vehicle_density,traj_len,'val_by_edge')] = round(100*val_accs, 3)
            model_dict[name] = model

            
       
        ensemble_model.find_ensemble(df_acc,traj_len,vehicle_density)
        ensemble_model.fit(X,y,model_dict)
        val_accs,val_f1 = val_voting_accuracy(X, y,X_val,y_val, ensemble_model)
        df_acc.loc[('ensemble_model', 'accuracy_baseline','mean'), (vehicle_density,traj_len,'val_woedge')] = round(100*val_accs, 3)
        val_accs,val_f1 = val_voting_accuracy(X, y,X_val,y_val, ensemble_model,by_edge = True)
        df_acc.loc[('ensemble_model', 'accuracy_baseline','mean'), (vehicle_density,traj_len,'val_by_edge')] = round(100*val_accs, 3)
        
            
    print('vehicle density >=  ',vehicle_density)
   

No of trajectories:  6076
No of trajectories:  6076
No of trajectories:  3466
No of trajectories:  3466
No of trajectories:  2096
No of trajectories:  2096
No of trajectories:  1510
No of trajectories:  1510
vehicle density >=   1
No of trajectories:  3146
No of trajectories:  3146
No of trajectories:  1896
No of trajectories:  1896
No of trajectories:  1192
No of trajectories:  1192
No of trajectories:  784
No of trajectories:  784
vehicle density >=   2
No of trajectories:  1666
No of trajectories:  1666
No of trajectories:  870
No of trajectories:  870
No of trajectories:  518
No of trajectories:  518
No of trajectories:  394
No of trajectories:  394
vehicle density >=   3
No of trajectories:  732
No of trajectories:  732
No of trajectories:  402
No of trajectories:  402
No of trajectories:  232
No of trajectories:  232
No of trajectories:  158
No of trajectories:  158
vehicle density >=   4
No of trajectories:  274
No of trajectories:  274
No of trajectories:  148
No of trajectorie

In [231]:
#accuracy results
df_acc.loc[(slice(None),slice(None),['mean','Car','Car_1','Taxi','total']),(list(range(1,6)),slice(None),slice(None))]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1,1,1,1,1,1,1,1,1,1,...,5,5,5,5,5,5,5,5,5,5
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,100,100,100,150,150,150,200,200,200,250,...,100,150,150,150,200,200,200,250,250,250
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,kfold,val_woedge,val_by_edge,kfold,val_woedge,val_by_edge,kfold,val_woedge,val_by_edge,kfold,...,val_by_edge,kfold,val_woedge,val_by_edge,kfold,val_woedge,val_by_edge,kfold,val_woedge,val_by_edge
Random Forest,accuracy,mean,63.0,59.274,55.802,56.522,56.087,53.672,54.268,51.471,50.523,57.746,...,46.667,68.75,63.636,63.636,41.667,57.143,62.5,75.0,70.0,61.538
Random Forest,accuracy_baseline,mean,49.55,51.439,49.375,51.0,47.6,46.31,51.724,50.459,50.47,48.611,...,63.636,27.778,72.727,73.913,41.667,64.286,64.286,50.0,40.0,40.0
AdaBoost,accuracy,mean,59.5,57.258,56.049,54.348,55.217,53.672,56.098,51.961,51.22,59.859,...,46.667,62.5,59.091,59.091,50.0,71.429,75.0,25.0,80.0,61.538
AdaBoost,accuracy_baseline,mean,50.0,52.158,52.708,54.5,50.8,49.618,54.023,50.459,49.843,50.694,...,48.485,44.444,63.636,60.87,33.333,57.143,57.143,62.5,40.0,40.0
SVM,accuracy,mean,60.0,58.871,57.531,58.152,58.696,54.802,54.878,52.451,52.613,61.972,...,46.667,62.5,59.091,59.091,41.667,71.429,75.0,37.5,60.0,53.846
SVM,accuracy_baseline,mean,50.0,52.878,50.625,49.0,48.8,49.873,52.874,53.211,52.978,55.556,...,39.394,33.333,68.182,69.565,33.333,35.714,35.714,62.5,40.0,40.0
Log Regression,accuracy,mean,55.0,51.613,51.358,53.261,56.087,52.542,52.439,54.902,52.962,56.338,...,43.333,75.0,54.545,54.545,25.0,71.429,75.0,50.0,90.0,69.231
Log Regression,accuracy_baseline,mean,45.045,53.597,53.125,52.0,50.0,49.364,51.724,51.376,50.157,53.472,...,45.455,27.778,63.636,60.87,25.0,21.429,21.429,50.0,30.0,30.0
GBM,accuracy,mean,60.0,55.645,53.333,60.326,54.783,53.39,53.049,51.961,50.871,59.155,...,53.333,68.75,63.636,63.636,25.0,78.571,75.0,87.5,100.0,92.308
GBM,accuracy_baseline,mean,46.847,49.281,47.917,52.0,45.6,47.583,47.126,55.505,53.918,49.306,...,54.545,27.778,72.727,73.913,41.667,64.286,64.286,25.0,50.0,50.0
