In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import joblib
from sklearn import metrics
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression

In [None]:
df = pd.read_pickle('../data.2020-11-09.scenarioJunction.pkl.gz')

In [None]:
df.head(5)

In [None]:
def pipeline_save(df):
    X = df.drop('winner', axis=1)
    y = df['winner']    
    #categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
    c=df.select_dtypes(include=['object']).drop(['winner'], axis=1).columns
    preprocessor = ColumnTransformer(transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), c)])
    rf = Pipeline(steps=[('preprocessor', preprocessor),('classifier', RandomForestClassifier())])
    rf.fit(X, y)
    return(rf)


In [None]:
def pipelineClassifier(df):   
    X = df.drop('winner', axis=1)
    y = df['winner']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
    c=df.select_dtypes(include=['object']).drop([['winner', 'meta_scenario', 'meta_p_red', 'meta_p_blue', 'meta_seed']], axis=1,errors="ignore").columns
    preprocessor = ColumnTransformer(transformers=[('cat', categorical_transformer, c)])
    classifiers = [
        KNeighborsClassifier(3),
        SVC(kernel="rbf", C=0.025, probability=True),
        NuSVC(probability=True),
        DecisionTreeClassifier(),
        RandomForestClassifier(),
        AdaBoostClassifier(),
        GradientBoostingClassifier()
        ]
    for classifier in classifiers:
        pipe = Pipeline(steps=[('preprocessor', preprocessor),
                          ('classifier', classifier)])
        pipe.fit(X_train, y_train)
        y_pred=pipe.predict(X_test)
        print(classifier)
        print("model score: %.3f" % pipe.score(X_test, y_test))
        print("model accuracy:",metrics.accuracy_score(y_test,y_pred))
        #print("model f1 score:",metrics.f1_score(y_test,y_pred))

In [None]:
def pipelineClassifierCross(df):   
    X = df.drop('winner', axis=1)
    y = df['winner']
    cv = KFold(n_splits=10, random_state=1, shuffle=True)
    categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
    c=df.select_dtypes(include=['object']).drop(['winner'], axis=1).columns
    preprocessor = ColumnTransformer(transformers=[('cat', categorical_transformer, c)])
    classifiers = [
        KNeighborsClassifier(3),
        #SVC(kernel="rbf", C=0.025, probability=True),
        NuSVC(probability=True),
        DecisionTreeClassifier(),
        RandomForestClassifier(),
        AdaBoostClassifier(),
        GradientBoostingClassifier()
        ]
    for classifier in classifiers:
        pipe = Pipeline(steps=[('preprocessor', preprocessor),
                          ('classifier', classifier)])
        scores = cross_val_score(pipe, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
        print(classifier.__class__.__name__)
        print('Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))
     







In [None]:
pipelineClassifierCross(df)

In [None]:
model=joblib.load('model.joblib')

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
import pandas as pd
from sklearn.naive_bayes import GaussianNB

import joblib

In [None]:
dataframes = {
    "Test1v1": "../data.2020-11-09.scenarioTest1v1.pkl.gz",
    "Test2v2": "../data.2020-11-09.scenarioTest2v2.pkl.gz",
    "Junction": "../data.2020-11-09.scenarioJunction.pkl.gz",
    "JunctionExo": "../data.2020-11-09.scenarioJunctionExo.pkl.gz",
}

In [None]:
classifiers = [
#     KNeighborsClassifier(3),
    # SVC(kernel="rbf", C=0.025, probability=True),
    # NuSVC(probability=True),
#     DecisionTreeClassifier(),
    RandomForestClassifier(),
#     AdaBoostClassifier(),
    GradientBoostingClassifier(),
    GaussianNB(),
]

In [None]:
args = []

for k, v in dataframes.items():
    for c in classifiers:
        args.append((k, v, c))


In [None]:
len(args)

In [None]:
def pipelineClassifier(args):
    name, v, classifier = args
    file_name = f'{name}_{classifier.__class__.__name__}.joblib'
    
    print('starting...', file_name)

    df = pd.read_pickle(v, compression='gzip')
    
    print('...read', v, 'completed...')
    
    X = df.drop(['winner', 'meta_scenario', 'meta_p_red', 'meta_p_blue', 'meta_seed'], axis=1, errors="ignore")
    y = df['winner']
    categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
    c = df.select_dtypes(include=['object']).drop(['winner', 'meta_scenario', 'meta_p_red', 'meta_p_blue', 'meta_seed'],
                                                  axis=1, errors="ignore").columns
    preprocessor = ColumnTransformer(transformers=[('cat', categorical_transformer, c)])
    
    pipe = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', classifier)])
    pipe.fit(X, y)
    joblib.dump(pipe, file_name)
    
    print(file_name, '...completed!')

In [None]:
from multiprocessing import Pool

In [None]:
with Pool(3) as p:
    p.map(pipelineClassifier, args)

In [None]:

if __name__ == '__main__':
    '''dataframes = {"BridgeHead": "../../../data.scenarioBridgeHead.pkl.gz",
                  "CrossingTheCity": "../../../data.scenarioCrossingTheCity.pkl.gz",
                  "Junction": "../../../data.scenarioJunction.pkl.gz",
                  "JunctionExo": "../../../data.scenarioJunctionExo.pkl.gz",
                  "Roadblock": "../../../data.scenarioRoadblock.pkl.gz",
                  "Test1v1": "../../../data.scenarioTest1v1.pkl.gz", "Test2v2": "../../../data.scenarioTest2v2.pkl.gz"}'''
    dataframes = {"Junction": "../data.2020-11-09.scenarioJunction.pkl.gz",
                  "JunctionExo": "../data.2020-11-09.scenarioJunctionExo.pkl.gz",
                  "Test1v1": "../data.2020-11-09.scenarioTest1v1.pkl.gz",
                  "Test2v2": "../data.2020-11-09.scenarioTest2v2.pkl.gz"}
    # pilots = ["BridgeHead", "CrossingTheCity", "Junction", "JunctionExo", "Roadblock", "Test1v1", "Test2v2"]
    pilots = [
        "Test1v1",
#         "Test2v2",
#         "Junction",
#         "JunctionExo"
    ]

    for p in pilots:
        df = pd.read_pickle(dataframes[p])
        '''df = df.loc[(((df.meta_p_red == "GreedyAgent") & (df.meta_p_blue == "GreedyAgent")) | (
                    (df.meta_p_red == "GreedyAgent") & (df.meta_p_blue == "RandomAgent")) | (
                                 (df.meta_p_red == "RandomAgent") & (df.meta_p_blue == "GreedyAgent")))]'''

        pipelineClassifier(df, p)


In [None]:
import os.path as op

In [None]:
dir_path = op.dirname(op.realpath("ciao"))


In [None]:
file="ciao"

In [None]:
op.join(dir_path, '..', '..', 'models', file)