In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import sys 
sys.path.append('../')
import src.model.feature_cleaning as feature_cleaning

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


#sklearn models
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
#sklearn other
import graphviz 
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, f1_score, log_loss
from mlxtend.plotting import plot_confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline



df, fieldofdegree_df, SOCP_labels, schl_labels, major_majors, NAICSP_labels_df, MAJ_NAICSP_labels_df = feature_cleaning.load_dfs()

youngemp_df = feature_cleaning.clean_that_target(df, SOCP_labels)
youngemp_df = feature_cleaning.single_occ_target(youngemp_df)
edu_df = feature_cleaning.create_edu_df(youngemp_df, fieldofdegree_df, schl_labels, major_majors)

# split the data, choosing only edu cols
X = edu_df.drop(columns=[ 'SERIALNO', 'FOD1P', 'FOD2P','SOCP','MAJ_SOCP','MAJ_SOCP_labels', 
                'MAJ_SOCP_15','FOD1P_labels','FOD2P_labels','SCHL',
                'SCHL_labels','FOD1P_MAJ_labels', 'FOD1P_MAJ'])
y = edu_df.loc[:,'MAJ_SOCP_15']


X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3, 
                                                    random_state=42)

  from numpy.core.umath_tests import inner1d
  SOCPdf = df.dropna(axis='index', subset=['SOCP'])[df.SOCP != '999920']


Number of employed people: 218454
Percent employed people: 0.5785711448056677
Number of young employed people: 77406
Percent young employed people(out of all PUMS): 0.20500827650135733
Number of emp cats: 23
Number of degree fields present (max 173): 173


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  edu_df['SCHL_labels'] = edu_df.SCHL.map(schl_labels)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  edu_df['SCHL_ord'] = edu_df.SCHL.astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats

before dummies:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 77406 entries, 0 to 77405
Data columns (total 14 columns):
SERIALNO            77406 non-null int64
SOCP                77406 non-null object
MAJ_SOCP            77406 non-null object
MAJ_SOCP_labels     77406 non-null object
MAJ_SOCP_15         77406 non-null int64
FOD1P               77406 non-null object
FOD2P               77406 non-null object
FOD1P_labels        77406 non-null object
FOD2P_labels        77406 non-null object
SCHL                77406 non-null object
SCHL_labels         77406 non-null object
SCHL_ord            77406 non-null int64
FOD1P_MAJ           77406 non-null int64
FOD1P_MAJ_labels    77406 non-null object
dtypes: int64(4), object(10)
memory usage: 53.1 MB
None


In [9]:

# model pipelines
#-----------------------------------
#-------------linear
pipe_lr = Pipeline([('scl', StandardScaler()),
            ('clf', LogisticRegression(random_state=42))])

pipe_lr_l2 = Pipeline([('scl', StandardScaler()),
            ('clf', LogisticRegression(random_state=42))])

pipe_sgd = Pipeline([('scl', StandardScaler()),
            ('clf', SGDClassifier(random_state=42))])


#-------------trees
pipe_dt = Pipeline([('clf', DecisionTreeClassifier(random_state=42))])

pipe_rf = Pipeline([('clf', RandomForestClassifier(random_state=42))])

pipe_rf_scl = Pipeline([('scl', StandardScaler()),
            ('clf', RandomForestClassifier(random_state=42))])

pipe_gb = Pipeline([('clf', GradientBoostingClassifier(random_state=42))])


#-------------SVM
pipe_svm = Pipeline([('scl', StandardScaler()),
            ('clf', SVC(random_state=42))])


#-------------KNN
pipe_knn = Pipeline([('clf', KNeighborsClassifier())])

pipe_knn_scl = Pipeline([('scl', StandardScaler()),
            ('clf', KNeighborsClassifier())])

#-----------------------------------


# grid search params
param_range = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
param_range_fl = [1.0, 0.5, 0.1]
max_depth = [10,100,1000,10000]
alpha_range = [.1, .001, .00001, .000001]
gamma_range = [.1, 1, 10]

#-------------linear
grid_params_lr = [{'clf__penalty': ['l1'],
        'clf__C': param_range_fl,
        'clf__solver': ['liblinear', ],  #,'saga'
        #'clf__multi_class': ['ovr', 'multinomial', 'auto'],
        'clf__class_weight': [None, 'balanced']}] 

grid_params_lr_l2 = [{'clf__penalty': ['l2'],
        'clf__C': param_range_fl,
        'clf__solver': ['newton-cg', 'lbfgs', 'liblinear'],  #, 'sag'
        #'clf__multi_class': ['ovr', 'multinomial', 'auto'],
        'clf__class_weight': [None, 'balanced']}]

grid_params_sgd = [{'clf__loss': ['hinge', 'log', 'perceptron'],
        'clf__alpha': alpha_range,
        'clf__penalty': ['l1', 'l2', 'elasticnet'],
        'clf__class_weight': [None, 'balanced']}] 

#-------------trees
grid_params_dt = [{'clf__criterion': ['gini', 'entropy'],
        'clf__min_samples_leaf': param_range,
        'clf__max_depth': max_depth,
        'clf__min_samples_split': param_range[1:],
        'clf__class_weight': [None, 'balanced']}]

grid_params_rf = [{'clf__criterion': ['gini', 'entropy'],
        'clf__min_samples_leaf': param_range,
        'clf__max_depth': max_depth,
        'clf__min_samples_split': param_range[1:],
        'clf__class_weight': [None, 'balanced', 'balanced_subsample']}]

grid_params_gb = [{'clf__loss': ['deviance', 'exponential'],
        'clf__learning_rate': alpha_range,
        'clf__n_estimators': max_depth,
        'clf__subsample': param_range_fl}]

#-------------SVM
grid_params_svm = [{#'clf__kernel': ['linear', 'rbf', 'poly'],
        #'clf__degree': param_range[1:],
        #'clf__gamma': gamma_range,
        #'clf__C': gamma_range,
        }]#'clf__class_weight': [None, 'balanced']

#-------------KNN
grid_params_knn = [{'clf__n_neighbors': param_range}]

#--------------------------------------------------------------

# Construct grid searches
jobs = -1
verbose = 10

#-------------linear
gs_lr = GridSearchCV(estimator=pipe_lr,
            param_grid=grid_params_lr,
            scoring='f1_micro',
            cv=10,
            n_jobs=jobs,
            verbose=verbose) 

gs_lr_l2 = GridSearchCV(estimator=pipe_lr_l2,
            param_grid=grid_params_lr_l2,
            scoring='f1_micro',
            cv=10,
            n_jobs=jobs,
            verbose=verbose)

gs_sgd = GridSearchCV(estimator=pipe_sgd,
            param_grid=grid_params_sgd,
            scoring='f1_micro',
            cv=10,
            verbose=verbose)


#-------------trees    
gs_dt = GridSearchCV(estimator=pipe_rf,
            param_grid=grid_params_dt,
            scoring='f1_micro',
            cv=10, 
            n_jobs=jobs,
            verbose=verbose)

gs_rf = GridSearchCV(estimator=pipe_rf,
            param_grid=grid_params_rf,
            scoring='f1_micro',
            cv=10, 
            n_jobs=jobs,
            verbose=verbose)

gs_rf_scl = GridSearchCV(estimator=pipe_rf_scl,
            param_grid=grid_params_rf,
            scoring='f1_micro',
            cv=10, 
            n_jobs=jobs,
            verbose=verbose)

gs_gb = GridSearchCV(estimator=pipe_gb,
            param_grid=grid_params_gb,
            scoring='f1_micro',
            cv=10, 
            verbose=verbose)

#-------------SVM

gs_svm = GridSearchCV(estimator=pipe_svm,
            param_grid=grid_params_svm,
            scoring='f1_micro',
            cv=10,
            n_jobs=jobs,
            verbose=verbose)

#-------------KNN
gs_knn = GridSearchCV(estimator=pipe_knn,
            param_grid=grid_params_knn,
            scoring='f1_micro',
            cv=10,
            n_jobs=jobs,
            verbose=verbose)

gs_knn_scl = GridSearchCV(estimator=pipe_knn_scl,
            param_grid=grid_params_knn,
            scoring='f1_micro',
            cv=10,
            n_jobs=jobs,
            verbose=verbose)

#---------------------------------------------------------------------

# List of pipelines for ease of iteration
grids = [ gs_svm, gs_knn, gs_knn_scl]

# Dictionary of pipelines and classifier types for ease of reference
grid_dict = {0:'SVC', 1: 'KNeighborsClassifier', 2:'KNeighborsClassifier w/ Scaling'}

# Fit the grid search objects
print('Performing model optimizations...')
best_f1_micro = 0.0
best_clf = 0
best_gs = ''
for idx, gs in enumerate(grids):
    print('\nEstimator: %s' % grid_dict[idx])
    # Fit grid search
    gs.fit(X_train, y_train)

    # Best params
    print('Best params: %s' % gs.best_params_)

    # Best training data f1
    print('Best training f1: %.3f' % gs.best_score_)

    # Predict on test data with best params
    y_pred = gs.predict(X_test)

    # Test data accuracy of model with best params
    print('Test set f1 score for best params: %.3f ' % f1_score(y_test, y_pred))

    # Track best (highest test f1) model
    if f1_score(y_test, y_pred) > best_f1_micro:
        best_f1_micro = f1_score(y_test, y_pred)
        best_gs = gs
        best_clf = idx
print('\nClassifier with best test set f1: %s' % grid_dict[best_clf])

# Save best grid search pipeline to file
dump_file = 'best_model_no_feat_sel_extr_occ_15.pkl'
joblib.dump(best_gs, dump_file, compress=1)
print('\nSaved %s grid search pipeline to file: %s' % (grid_dict[best_clf], dump_file))

Performing model optimizations...

Estimator: SVC
Fitting 10 folds for each of 1 candidates, totalling 10 fits
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV] ....................... , score=0.9680693983019565, total= 5.5min
[CV] ....................... , score=0.9677062188595682, total= 5.5min
[CV] ....................... , score=

[Parallel(n_jobs=-1)]: Done   3 out of  10 | elapsed:  9.2min remaining: 21.5min


[CV] ....................... , score=0.9651098393944988, total= 5.8min
[CV] ....................... , score=0.9669558796381761, total= 5.9min


[Parallel(n_jobs=-1)]: Done   5 out of  10 | elapsed:  9.4min remaining:  9.4min


[CV] ........................ , score=0.966599003506182, total= 5.9min
[CV] ....................... , score=0.9678907547517992, total= 5.9min


[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed:  9.4min remaining:  4.0min


[CV] ....................... , score=0.9675216829673371, total= 6.0min
[CV] ....................... , score=0.9653072522605647, total= 6.0min
[CV] ....................... , score=0.9682539682539683, total= 6.0min


[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  9.4min finished


Best params: {}
Best training f1: 0.967
Test set f1 score for best params: 0.393 

Estimator: KNeighborsClassifier
Fitting 10 folds for each of 10 candidates, totalling 100 fits
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=2 ..............................................
[CV] clf__n_neighbors=2 .................

[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed: 10.0min


[CV] ..... clf__n_neighbors=1, score=0.9544196346189334, total= 1.1min
[CV] clf__n_neighbors=2 ..............................................
[CV] ..... clf__n_neighbors=1, score=0.9558796381761122, total= 1.1min
[CV] clf__n_neighbors=2 ..............................................
[CV] ..... clf__n_neighbors=1, score=0.9549732422956265, total= 1.1min
[CV] clf__n_neighbors=2 ..............................................
[CV] ..... clf__n_neighbors=1, score=0.9579258165713231, total= 1.1min
[CV] ..... clf__n_neighbors=1, score=0.9472032490308289, total= 1.1min
[CV] clf__n_neighbors=2 ..............................................
[CV] ..... clf__n_neighbors=1, score=0.9603174603174603, total= 1.1min
[CV] clf__n_neighbors=2 ..............................................
[CV] clf__n_neighbors=2 ..............................................
[CV] ..... clf__n_neighbors=1, score=0.9453773758996125, total= 1.1min
[CV] clf__n_neighbors=2 ..............................................
[CV] .

[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed: 10.1min


[CV] ..... clf__n_neighbors=1, score=0.9590254706533776, total= 1.1min
[CV] clf__n_neighbors=3 ..............................................
[CV] clf__n_neighbors=3 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9664144676139509, total= 1.1min
[CV] clf__n_neighbors=3 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9671526111828751, total= 1.1min
[CV] clf__n_neighbors=3 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9614319985237129, total= 1.1min
[CV] clf__n_neighbors=3 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9664144676139509, total= 1.1min
[CV] ..... clf__n_neighbors=2, score=0.9638309651227164, total= 1.1min
[CV] clf__n_neighbors=3 ..............................................
[CV] clf__n_neighbors=3 ..............................................
[CV] ...... clf__n_neighbors=2, score=0.965485418973791, total= 1.1min
[CV] c

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 20.4min


[CV] ..... clf__n_neighbors=2, score=0.9652944434188665, total= 1.1min
[CV] clf__n_neighbors=3 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9634484031751891, total= 1.1min
[CV] clf__n_neighbors=4 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9667774086378738, total= 1.1min
[CV] clf__n_neighbors=4 ..............................................
[CV] ..... clf__n_neighbors=3, score=0.9625392138770991, total= 1.1min
[CV] clf__n_neighbors=4 ..............................................
[CV] ...... clf__n_neighbors=3, score=0.966968075290644, total= 1.1min
[CV] clf__n_neighbors=4 ..............................................
[CV] ..... clf__n_neighbors=3, score=0.9671526111828751, total= 1.1min
[CV] clf__n_neighbors=4 ..............................................
[CV] ..... clf__n_neighbors=3, score=0.9636464292304854, total= 1.1min
[CV] clf__n_neighbors=4 ..............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed: 30.8min


[CV] clf__n_neighbors=4 ..............................................
[CV] ..... clf__n_neighbors=3, score=0.9632637991508215, total= 1.1min
[CV] clf__n_neighbors=4 ..............................................
[CV] ..... clf__n_neighbors=3, score=0.9621631598375785, total= 1.1min
[CV] clf__n_neighbors=5 ..............................................
[CV] ..... clf__n_neighbors=3, score=0.9623407790289829, total= 1.1min
[CV] clf__n_neighbors=5 ..............................................
[CV] ..... clf__n_neighbors=4, score=0.9656763240450268, total= 1.1min
[CV] ..... clf__n_neighbors=4, score=0.9636464292304854, total= 1.1min
[CV] clf__n_neighbors=5 ..............................................
[CV] clf__n_neighbors=5 ..............................................
[CV] ..... clf__n_neighbors=4, score=0.9677062188595682, total= 1.1min
[CV] clf__n_neighbors=5 ..............................................
[CV] ..... clf__n_neighbors=4, score=0.9678907547517992, total= 1.1min
[CV] c

[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 41.2min


[CV] ..... clf__n_neighbors=4, score=0.9673311184939092, total= 1.1min
[CV] clf__n_neighbors=5 ..............................................
[CV] ..... clf__n_neighbors=4, score=0.9645560273213956, total= 1.1min
[CV] clf__n_neighbors=6 ..............................................
[CV] ..... clf__n_neighbors=4, score=0.9652944434188665, total= 1.1min
[CV] clf__n_neighbors=6 ..............................................
[CV] ..... clf__n_neighbors=5, score=0.9677062188595682, total= 1.1min
[CV] clf__n_neighbors=6 ..............................................
[CV] ..... clf__n_neighbors=5, score=0.9664144676139509, total= 1.1min
[CV] clf__n_neighbors=6 ..............................................
[CV] ..... clf__n_neighbors=5, score=0.9677062188595682, total= 1.1min
[CV] clf__n_neighbors=6 ..............................................
[CV] ..... clf__n_neighbors=5, score=0.9693670418896475, total= 1.1min
[CV] clf__n_neighbors=6 ..............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed: 41.7min


[CV] ..... clf__n_neighbors=5, score=0.9603101347609377, total= 1.1min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=5, score=0.9630791951264538, total= 1.1min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=6, score=0.9664144676139509, total= 1.1min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=6, score=0.9677062188595682, total= 1.1min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=6, score=0.9680752906440303, total= 1.1min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=6, score=0.9688134342129544, total= 1.1min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=6, score=0.9695515777818786, total= 1.1min
[CV] clf__n_neighbors=7 ..............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  61 tasks      | elapsed: 62.4min


[CV] ..... clf__n_neighbors=7, score=0.9673371470751061, total= 1.1min
[CV] clf__n_neighbors=8 ..............................................
[CV] ..... clf__n_neighbors=7, score=0.9688134342129544, total= 1.1min
[CV] clf__n_neighbors=8 ..............................................
[CV] ..... clf__n_neighbors=7, score=0.9673371470751061, total= 1.1min
[CV] clf__n_neighbors=8 ..............................................
[CV] ..... clf__n_neighbors=7, score=0.9680752906440303, total= 1.1min
[CV] clf__n_neighbors=8 ..............................................
[CV] ...... clf__n_neighbors=7, score=0.966968075290644, total= 1.1min
[CV] clf__n_neighbors=8 ..............................................
[CV] ..... clf__n_neighbors=7, score=0.9667774086378738, total= 1.1min
[CV] clf__n_neighbors=8 ..............................................
[CV] ..... clf__n_neighbors=7, score=0.9652944434188665, total= 1.1min
[CV] clf__n_neighbors=8 ..............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed: 73.2min


[CV] ..... clf__n_neighbors=8, score=0.9651227163683337, total= 1.1min
[CV] clf__n_neighbors=9 ..............................................
[CV] ..... clf__n_neighbors=8, score=0.9686288983207234, total= 1.1min
[CV] clf__n_neighbors=9 ..............................................
[CV] ..... clf__n_neighbors=8, score=0.9660391288298265, total= 1.1min
[CV] clf__n_neighbors=9 ..............................................
[CV] ..... clf__n_neighbors=8, score=0.9688076781100037, total= 1.1min
[CV] clf__n_neighbors=9 ..............................................
[CV] ..... clf__n_neighbors=8, score=0.9652944434188665, total= 1.1min
[CV] clf__n_neighbors=10 .............................................
[CV] ..... clf__n_neighbors=8, score=0.9636330071995569, total= 1.1min
[CV] clf__n_neighbors=10 .............................................
[CV] ..... clf__n_neighbors=9, score=0.9678907547517992, total= 1.1min
[CV] clf__n_neighbors=10 .............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  88 out of 100 | elapsed: 84.0min remaining: 11.5min


[CV] ..... clf__n_neighbors=9, score=0.9643714232970279, total= 1.1min
[CV] ..... clf__n_neighbors=9, score=0.9630791951264538, total= 1.1min
[CV] .... clf__n_neighbors=10, score=0.9677062188595682, total= 1.1min
[CV] .... clf__n_neighbors=10, score=0.9643845727994095, total= 1.1min
[CV] .... clf__n_neighbors=10, score=0.9693670418896475, total= 1.2min
[CV] .... clf__n_neighbors=10, score=0.9675216829673371, total= 1.1min
[CV] .... clf__n_neighbors=10, score=0.9660453958294888, total= 1.2min
[CV] .... clf__n_neighbors=10, score=0.9686288983207234, total= 1.2min
[CV] .... clf__n_neighbors=10, score=0.9662236987818383, total=  41.7s
[CV] ...... clf__n_neighbors=10, score=0.96843853820598, total=  41.4s
[CV] .... clf__n_neighbors=10, score=0.9660328595163374, total=  39.0s
[CV] .... clf__n_neighbors=10, score=0.9658482554919697, total=  37.2s


[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 88.4min finished


Best params: {'clf__n_neighbors': 9}
Best training f1: 0.967
Test set f1 score for best params: 0.401 

Estimator: KNeighborsClassifier w/ Scaling
Fitting 10 folds for each of 10 candidates, totalling 100 fits
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=1 ..............................................
[CV] clf__n_neighbors=2 ..............................................
[CV] clf_

[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed: 44.9min


[CV] ..... clf__n_neighbors=1, score=0.9538660269422403, total= 5.9min
[CV] clf__n_neighbors=2 ..............................................
[CV] ..... clf__n_neighbors=1, score=0.9555186415651532, total= 5.9min
[CV] clf__n_neighbors=2 ..............................................
[CV] ..... clf__n_neighbors=1, score=0.9547720140299059, total= 6.0min
[CV] clf__n_neighbors=2 ..............................................
[CV] ..... clf__n_neighbors=1, score=0.9544196346189334, total= 6.0min
[CV] clf__n_neighbors=2 ..............................................
[CV] ..... clf__n_neighbors=1, score=0.9547887064033954, total= 5.9min
[CV] clf__n_neighbors=2 ..............................................
[CV] ...... clf__n_neighbors=1, score=0.952556765737493, total= 6.0min
[CV] ..... clf__n_neighbors=1, score=0.9553340716131414, total= 6.0min
[CV] clf__n_neighbors=2 ..............................................
[CV] clf__n_neighbors=2 ..............................................


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed: 45.1min


[CV] ..... clf__n_neighbors=1, score=0.9520206680199299, total= 5.9min
[CV] clf__n_neighbors=3 ..............................................
[CV] ..... clf__n_neighbors=1, score=0.9531278833733161, total= 5.9min
[CV] clf__n_neighbors=3 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9660453958294888, total= 5.9min
[CV] clf__n_neighbors=3 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9643845727994095, total= 6.0min
[CV] clf__n_neighbors=3 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9632773574460233, total= 5.8min
[CV] clf__n_neighbors=3 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9627099870777183, total= 5.8min
[CV] clf__n_neighbors=3 ..............................................
[CV] ...... clf__n_neighbors=2, score=0.961801070308175, total= 5.7min
[CV] clf__n_neighbors=3 ..............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 90.5min


[CV] ..... clf__n_neighbors=2, score=0.9627168696936139, total= 5.7min
[CV] clf__n_neighbors=3 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9627168696936139, total= 5.8min
[CV] clf__n_neighbors=4 ..............................................
[CV] ..... clf__n_neighbors=2, score=0.9623407790289829, total= 5.8min
[CV] clf__n_neighbors=4 ..............................................
[CV] ..... clf__n_neighbors=3, score=0.9638309651227164, total= 5.7min
[CV] clf__n_neighbors=4 ..............................................
[CV] ...... clf__n_neighbors=3, score=0.961985606200406, total= 5.8min
[CV] clf__n_neighbors=4 ..............................................
[CV] ..... clf__n_neighbors=3, score=0.9660453958294888, total= 5.3min
[CV] clf__n_neighbors=4 ..............................................
[CV] ..... clf__n_neighbors=3, score=0.9647536445838716, total= 5.2min
[CV] clf__n_neighbors=4 ..............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed: 136.0min


[CV] clf__n_neighbors=4 ..............................................
[CV] ...... clf__n_neighbors=3, score=0.960863946834041, total= 5.5min
[CV] clf__n_neighbors=4 ..............................................
[CV] ..... clf__n_neighbors=3, score=0.9640155010149474, total= 5.5min
[CV] clf__n_neighbors=5 ..............................................
[CV] ..... clf__n_neighbors=3, score=0.9649317091177556, total= 5.5min
[CV] clf__n_neighbors=5 ..............................................
[CV] ..... clf__n_neighbors=4, score=0.9643845727994095, total= 5.4min
[CV] clf__n_neighbors=5 ..............................................
[CV] ..... clf__n_neighbors=4, score=0.9645691086916405, total= 5.4min
[CV] clf__n_neighbors=5 ..............................................
[CV] ...... clf__n_neighbors=4, score=0.966783539398413, total= 5.2min
[CV] clf__n_neighbors=5 ..............................................
[CV] ..... clf__n_neighbors=4, score=0.9658608599372578, total= 5.2min
[CV] c

[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 181.5min


[CV] ..... clf__n_neighbors=4, score=0.9621561750046151, total= 5.4min
[CV] clf__n_neighbors=5 ..............................................
[CV] ..... clf__n_neighbors=4, score=0.9643714232970279, total= 5.4min
[CV] clf__n_neighbors=6 ..............................................
[CV] ..... clf__n_neighbors=4, score=0.9660391288298265, total= 5.4min
[CV] clf__n_neighbors=6 ..............................................
[CV] ..... clf__n_neighbors=5, score=0.9636464292304854, total= 5.4min
[CV] clf__n_neighbors=6 ..............................................
[CV] ..... clf__n_neighbors=5, score=0.9642000369071785, total= 5.4min
[CV] clf__n_neighbors=6 ..............................................
[CV] ..... clf__n_neighbors=5, score=0.9664144676139509, total= 5.2min
[CV] clf__n_neighbors=6 ..............................................
[CV] ..... clf__n_neighbors=5, score=0.9662299317217199, total= 5.2min
[CV] clf__n_neighbors=6 ..............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed: 182.6min


[CV] ..... clf__n_neighbors=5, score=0.9634484031751891, total= 5.3min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=5, score=0.9652944434188665, total= 5.2min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=6, score=0.9664144676139509, total= 5.3min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=6, score=0.9638309651227164, total= 5.3min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=6, score=0.9677062188595682, total= 5.2min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=6, score=0.9662299317217199, total= 5.3min
[CV] clf__n_neighbors=7 ..............................................
[CV] ..... clf__n_neighbors=6, score=0.9656763240450268, total= 5.2min
[CV] clf__n_neighbors=7 ..............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  61 tasks      | elapsed: 273.1min


[CV] ..... clf__n_neighbors=7, score=0.9651227163683337, total= 5.2min
[CV] clf__n_neighbors=8 ..............................................
[CV] ..... clf__n_neighbors=7, score=0.9688134342129544, total= 5.2min
[CV] clf__n_neighbors=8 ..............................................
[CV] ..... clf__n_neighbors=7, score=0.9675216829673371, total= 5.2min
[CV] clf__n_neighbors=8 ..............................................
[CV] ..... clf__n_neighbors=7, score=0.9664144676139509, total= 5.2min
[CV] clf__n_neighbors=8 ..............................................
[CV] ..... clf__n_neighbors=7, score=0.9669619785898855, total= 5.2min
[CV] clf__n_neighbors=8 ..............................................
[CV] ..... clf__n_neighbors=7, score=0.9689979701051855, total= 5.3min
[CV] clf__n_neighbors=8 ..............................................
[CV] ..... clf__n_neighbors=7, score=0.9682539682539683, total= 5.2min
[CV] clf__n_neighbors=8 ..............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed: 319.4min


[CV] ...... clf__n_neighbors=8, score=0.966968075290644, total= 5.1min
[CV] clf__n_neighbors=9 ..............................................
[CV] ..... clf__n_neighbors=8, score=0.9662299317217199, total= 5.2min
[CV] clf__n_neighbors=9 ..............................................
[CV] ..... clf__n_neighbors=8, score=0.9660391288298265, total= 5.2min
[CV] clf__n_neighbors=9 ..............................................
[CV] ...... clf__n_neighbors=8, score=0.966592838685862, total= 5.2min
[CV] clf__n_neighbors=9 ..............................................
[CV] ..... clf__n_neighbors=8, score=0.9640022152482924, total= 5.3min
[CV] clf__n_neighbors=10 .............................................
[CV] ..... clf__n_neighbors=8, score=0.9658482554919697, total= 5.2min
[CV] clf__n_neighbors=10 .............................................
[CV] ..... clf__n_neighbors=9, score=0.9654917881527957, total= 5.2min
[CV] clf__n_neighbors=10 .............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  88 out of 100 | elapsed: 366.2min remaining: 49.9min


[CV] ..... clf__n_neighbors=9, score=0.9645560273213956, total= 5.2min
[CV] ..... clf__n_neighbors=9, score=0.9671404836625437, total= 5.1min
[CV] .... clf__n_neighbors=10, score=0.9632773574460233, total= 5.1min
[CV] .... clf__n_neighbors=10, score=0.9656763240450268, total= 5.1min
[CV] .... clf__n_neighbors=10, score=0.9677062188595682, total= 5.1min
[CV] .... clf__n_neighbors=10, score=0.9662299317217199, total= 5.1min
[CV] .... clf__n_neighbors=10, score=0.9664144676139509, total= 5.0min
[CV] .... clf__n_neighbors=10, score=0.9662299317217199, total= 5.1min
[CV] .... clf__n_neighbors=10, score=0.9662236987818383, total= 2.9min
[CV] .... clf__n_neighbors=10, score=0.9664082687338501, total= 2.9min
[CV] .... clf__n_neighbors=10, score=0.9638176112239247, total= 2.9min
[CV] .... clf__n_neighbors=10, score=0.9660328595163374, total= 2.5min


[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 386.1min finished


Best params: {'clf__n_neighbors': 7}
Best training f1: 0.967
Test set f1 score for best params: 0.381 

Classifier with best test set f1: KNeighborsClassifier


NameError: name 'joblib' is not defined