In [None]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from classical_model_with_voting import rolling_agg, speed_ratio

In [None]:
df = pd.read_pickle('../../Data/block4_concat_lane.pkl')  \
    .set_index('edge_id', append=True) \
    .reorder_levels((0,1,3,2))

In [None]:
min_traj_len = 300
min_speed_ratio = 0.75

df_train = df.groupby(df.index.names[:-1]) \
            .filter(lambda grp: (len(grp) >= min_traj_len) & (speed_ratio(grp) >= min_speed_ratio))

In [None]:
df_train_agg = rolling_agg(df_train, window_size=150, step=150, agg_dict={ 
    'xtrack_dist': ['std'],
    'avg_surr_speed': ['mean','std'],
    'lanes':['median'],
    'len':['median'],
    'speed':['mean','std'],
    'vehicle_density': ['median'],
    'lon_acc': ['mean','std'],
    'lat_acc': ['mean','std']
})

g = df_train_agg.groupby('type', group_keys=False)
df_train_agg = g.apply(lambda grp: grp.sample(g.size().min()))

X,y = df_train_agg.drop('type', axis=1), df_train_agg.type

In [None]:
model_params = {
    'svm': {
        'model': svm.SVC(),
        'params' : {
            'C': [1,10,100,1000],
            'gamma': ['scale','auto']
            'kernel': ['linear','poly','rbf','sigmoid','precomputed']

        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [200,400,600,800,1000,1400, 1600, 1800, 2000]
        }
    },
    'ada_boost' : {
        'model': AdaBoostClassifier(),
        'params': {
            'n_estimators': [200,400,600,800,1000,1400, 1600, 1800, 2000]
        }
    },
    'gradient_boost' : {
        'model': GradientBoostingClassifier(),
        'params': {
            'n_estimators': [200,400,600,800,1000,1400, 1600, 1800, 2000]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear'),
        'params': {
            'penalty': ['l1','l2'],
            'C': [1,10,100,1000]
        }
    },
    'MLP' : {
        'model': MLPClassifier(),
        'params': {
            'hidden_layer_sizes': [25, 100, 250],
            'activation': ['identity','lbfgs','tanh', 'relu'],
            'solver': ['lbfgs', 'sgd', 'adam'],
            'alpha': [0.0001, 0.01, 0.05],
            'learning_rate': ['constant','invscaling','adaptive']

        }
    }    
}

In [None]:
for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(X,y)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])