In [1]:
#TO RE-RUN
%reset -f

In [2]:
from sklearn import preprocessing
from time import time
import numpy as np
import csv
from sklearn import metrics
from sklearn.preprocessing import scale
from sklearn.feature_selection import VarianceThreshold
from sklearn.cross_validation import StratifiedShuffleSplit, cross_val_score

from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB, MultinomialNB, GaussianNB

from sklearn.grid_search import GridSearchCV, ParameterGrid
from sklearn.preprocessing import StandardScaler

from imblearn.over_sampling import SMOTE,ADASYN, RandomOverSampler
from imblearn.pipeline import Pipeline
from imblearn.pipeline import make_pipeline

from operator import truediv
from datetime import datetime
import pandas as pd
import time
import os

from pylab import *
import seaborn as sns
import matplotlib.pyplot as plt


np.set_printoptions(suppress=True)
pd.options.display.float_format = '{:,.4f}'.format
plt.style.use('classic')

%matplotlib inline

import sys
sys.path.insert(1, "../src/")
from TypeFeatImputer import TypeFeatImputer
from UnivCombineFilter import UnivCombineFilter
import MLpipeline as MLpipeline



#### Required domain methods

In [3]:
#Local methods

def load_data(typeEncounter, typeDiagnosis, typeDataFeatures):

    if typeDataFeatures == "non_extended":
        df_all=pd.read_pickle(os.path.join('resources','prepared_clean_data_' + typeEncounter + "_" +  typeDiagnosis + '.pkl'))
    else:
        df_all=pd.read_pickle(os.path.join('resources','prepared_clean_data_' + typeEncounter + "_" +  typeDiagnosis + '_' + typeDataFeatures + '.pkl'))


    return df_all

def get_columns(df_all, typeDiagnosis):

    colsDiseases = []
    if typeDiagnosis == "diag_1":
        colsDiseases = [u'Diabetis_1', u'Circulatory_1', u'Digestive_1', u'Genitourinary_1', u'Poisoning_1', u'Muscoskeletal_1',
               u'Neoplasms_1', u'Respiratory_1']

    if typeDiagnosis == "diag_3":
        colsDiseases = [u'Diabetis_3', u'Circulatory_3', u'Digestive_3', u'Genitourinary_3', u'Poisoning_3', u'Muscoskeletal_3',
               u'Neoplasms_3', u'Respiratory_3']
    
    colsNonDiseases = [c for c in df_all.columns if c not in colsDiseases]
    
    return colsDiseases, colsNonDiseases

def filter_data_by_class(df_all, typeHypothesis):
    
    # Readmitted none vs readmitted
    if typeHypothesis == "all_readmisssion_vs_none":
        df_all["readmitted"][df_all["readmitted"].values > 0] = 1

    # Readmitted none vs early readmitted            
    if typeHypothesis == "early_readmission_vs_none":
        df_all= df_all[df_all["readmitted"].isin([0,1])]
        
    return df_all

def compute_type_features(df_all, typeDataFeatures):

    numCols = ['time_in_hospital','num_lab_procedures', 'num_procedures', 'num_medications', 'number_outpatient', 
                'number_emergency', 'number_inpatient', 'number_diagnoses',
                'add_in_out', 'add_procs_meds', 'div_visits_time', 'div_em_time', 'div_visit_med', 'div_em_med',
                'number_treatment','number_treatment_0','number_treatment_1','number_treatment_2','number_treatment_3']

    catCols = []
    cols = df_all.columns
    reducedCols = cols[:-1]

    for i in range(len(cols)-1):
        if cols[i] not in numCols:
            catCols.append(1)
        else:
            catCols.append(0)
    catCols = np.array(catCols)
    
    return catCols, reducedCols

def get_diseases(colsDiseases, typeDisease):
    if typeDisease == "subset":
        return ["subset"]
    else:
        if typeDisease in colsDiseases:
            return [typeDisease]
        else:
            return colsDiseases

def filter_data_by_diseases(df_all, disease, typeDataExperiment, colsNonDiseases):
    if disease == "subset":
        df_all_filtered = df_all.copy()
    else:
        cols_filtered = colsNonDiseases[:]
        cols_filtered.insert(-1, disease)
        df_all_filtered = df_all[cols_filtered].copy()    
    
    if typeDataExperiment == "disease" and disease != "subset":
        df_all_filtered = df_all_filtered[df_all_filtered[disease] == 1]
        df_all_filtered = df_all_filtered[[c for c in df_all_filtered.columns if c != disease]]
    
    return df_all_filtered

#### Run experiments

In [4]:
typeEncounter = "last" # ['first','last']
typeHypothesis = "early_readmission_vs_none" # ['all_readmisssion_vs_none','early_readmission_vs_none']
typeDataFeatures = "non_extended" # ["non_extended","extended','extended_extra']
    #Extended -> Subset of columns
    #Minimum -> minimum set of columns 
typeDiagnosis = "none"  #["none","diag_1", "diag_3"]    
typeDisease = "subset" # ["subset","any",["Respiratory",...]]
    #subset -> Return subset of predefined disease features
    #any -> Return all disease features    
    #disease -> Return diseases feature
typeDataExperiment = "disease" #["all", "disease"] 
    #all -> Include all diagnosis as columns
    #disease -> Remove diagnosis as column and keep only rows with diagnosis == 1       

In [5]:
verbose = True
cv_thr = 0.3
cv_folds = 5

tr_thrs = [1.0] # [0.1,0.2,0.4,0.6,1.0]
ts_thr = 0.30

fs_methods = ["none",] #["none","combine_fs","lasso_fs","rfe_rf_fs"]
cls_methods = ["rf","logReg","knn","nb"] #["rf","svmRBF","logReg","knn","nn","gbt"]
lms = ["recall","f1","f1_weighted"] #["f1_weighted","precision_weighted","roc_auc","recall"]
sm_types = ["none"] #["none","after"]
sm_method = "sm_smote"

In [6]:
#Load data
df_all = load_data(typeEncounter, typeDiagnosis, typeDataFeatures)
print "\nSHAPE:"
print df_all.shape
print "\nInitial columns:"
print df_all.columns

#Filter data by class
df_all = filter_data_by_class(df_all, typeHypothesis)
print "\nRows by class type:"
print df_all.iloc[:,-1].sort_values().unique(), np.sum(df_all["readmitted"] == 0), np.sum(df_all["readmitted"] == 1)
    
#Get columns
colsDiseases, colsNonDiseases = get_columns(df_all,typeDiagnosis)
print "\nDiseases:", colsDiseases
print "\nNon-diseases:", colsNonDiseases
    
#Load diseases
diseases = get_diseases(colsDiseases, typeDisease)
print "\nTotal data:", df_all.shape
print diseases

#Load hyperparams
hyperparams = np.load("../src/default_hyperparams.npy")


SHAPE:
(67182, 42)

Initial columns:
Index([u'gender', u'age', u'race_AfricanAmerican', u'race_Caucasian',
       u'race_Other', u'HbA1c', u'Change', u'time_in_hospital', u'diabetesMed',
       u'diss_home', u'medSpec_cardio', u'medSpec_Family/GeneralPractice',
       u'medSpec_InternalMedicine', u'medSpec_surgery', u'adm_src_1',
       u'adm_src_2', u'adm_src_3', u'adm_src_4', u'adm_src_5', u'adm_src_6',
       u'adm_src_7', u'adm_src_8', u'adm_src_10', u'adm_src_11', u'adm_src_13',
       u'adm_src_14', u'adm_src_22', u'adm_src_25', u'adm_1', u'adm_2',
       u'adm_3', u'adm_4', u'adm_7', u'number_treatment',
       u'num_lab_procedures', u'num_procedures', u'num_medications',
       u'number_outpatient', u'number_emergency', u'number_inpatient',
       u'number_diagnoses', u'readmitted'],
      dtype='object')

Rows by class type:
[0 1] 39785 5994

Diseases: []

Non-diseases: ['gender', 'age', 'race_AfricanAmerican', 'race_Caucasian', 'race_Other', 'HbA1c', 'Change', 'time_in_hospi

In [None]:
from IPython.display import display, HTML

res = []
for disease in diseases:
    
    print "\nDISEASE:", disease
    print "******************\n"
    
    #Filter data by diases
    df_all_filtered = filter_data_by_diseases(df_all, disease, typeDataExperiment, colsNonDiseases)
    
    #Get features by type
    catCols, reducedCols = compute_type_features(df_all_filtered, typeDataFeatures)
    
    #Apply hyperparams changes
    hyperparams[hyperparams[:,1] == 'rfe_rf_fs',2] =  [{'rfe_rf_fs__n_features_to_select': [int(len(reducedCols) * 0.2),
                                                                                         int(len(reducedCols) * 0.4),
                                                                                         int(len(reducedCols) * 0.6)], 
                                                                                         'rfe_rf_fs__step': [0.1]}]    
    #Create id of experiment
    name = disease + "_" + typeDataFeatures + "_" +  typeDataExperiment + "_" + typeEncounter + "_" + \
           typeHypothesis + "_" + typeDiagnosis
    
    #Execute experiment
    res.append(MLpipeline.run(name, df_all_filtered, catCols, reducedCols, hyperparams, ts_thr, tr_thrs, 
                   fs_methods, sm_method, sm_types, 
                   cls_methods, lms, cv_folds, cv_thr, True, True))


DISEASE: subset
******************


DataSet:
**********
**********
SIZE: 1.0
NAME: subset_non_extended_disease_last_early_readmission_vs_none_none
(45779, 42)
ALL TRAIN: (32045, 41)
TRAIN: [0's: 27849 1's: 4196 ]
ALL TEST: (13734, 41)
TEST: [0's: 11936 1's: 1798 ]

Num experiment: 0 / 11
****************
FS: none
SM: none
CLS: rf
METRIC: recall
Fitting 5 folds for each of 48 candidates, totalling 240 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   43.1s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed:  4.6min finished



TRAIN f1 (weighted): 0.699
TRAIN Precision [c=0,1]: [ 0.91699827  0.20646146]
TRAIN Recall [c=0,1]: [ 0.64544508  0.61224976]
TRAIN AUC: 0.629
TRAIN Sensibility: 0.612249761678
TRAIN Specificity:  0.645445078818

CV INNER metric: recall
CV INNER selected params ['entropy', 4, 300]
CV INNER score: 0.610484511517

CV OUTER f1-weighted score: 0.696  (+/-0.003)
CV OUTER prec score [c=0,1]: 0.912 (+/- 0.002), 0.200  (+/- 0.003)
CV OUTER rec  score [c=0,1]: 0.646 (+/- 0.005), 0.587  (+/- 0.014)
CV OUTER AUC score: 0.661  (+/-0.006)
CV OUTER Sensibility score: 0.587  (+/-0.014)
CV OUTER Specificity score: 0.646  (+/-0.005)
Selected params (bests from CV) ['entropy', 4, 300]

TEST f1 (weighted): 0.698
TEST Precision [c=0,1]: [ 0.91500711  0.20403926]
TEST Recall [c=0,1]: [ 0.64669906  0.60122358]
TEST AUC: 0.624
TEST Sensibility: 0.601223581758
TEST Specificity: 0.646699061662
Confussion matrix:
         | PRED
REAL-->  v 
[[7719 4217]
 [ 717 1081]]

Total time: 288.385793924

Num experiment:

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   39.4s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed:  4.6min finished



TRAIN f1 (weighted): 0.735
TRAIN Precision [c=0,1]: [ 0.92847497  0.23985501]
TRAIN Recall [c=0,1]: [ 0.6912636   0.64656816]
TRAIN AUC: 0.669
TRAIN Sensibility: 0.646568160153
TRAIN Specificity:  0.691263600129

CV INNER metric: f1
CV INNER selected params ['gini', 8, 500]
CV INNER score: 0.310247751585

CV OUTER f1-weighted score: 0.723  (+/-0.002)
CV OUTER prec score [c=0,1]: 0.907 (+/- 0.003), 0.206  (+/- 0.005)
CV OUTER rec  score [c=0,1]: 0.695 (+/- 0.003), 0.526  (+/- 0.017)
CV OUTER AUC score: 0.662  (+/-0.007)
CV OUTER Sensibility score: 0.526  (+/-0.017)
CV OUTER Specificity score: 0.695  (+/-0.003)
Selected params (bests from CV) ['gini', 8, 500]

TEST f1 (weighted): 0.717
TEST Precision [c=0,1]: [ 0.9122511   0.21011352]
TEST Recall [c=0,1]: [ 0.67937332  0.56618465]
TEST AUC: 0.623
TEST Sensibility: 0.566184649611
TEST Specificity: 0.679373324397
Confussion matrix:
         | PRED
REAL-->  v 
[[8109 3827]
 [ 780 1018]]

Total time: 294.794414043

Num experiment: 2 / 11
**

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   40.8s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed:  4.6min finished



TRAIN f1 (weighted): 1.000
TRAIN Precision [c=0,1]: [ 1.  1.]
TRAIN Recall [c=0,1]: [ 1.  1.]
TRAIN AUC: 1.000
TRAIN Sensibility: 1.0
TRAIN Specificity:  1.0

CV INNER metric: f1_weighted
CV INNER selected params ['entropy', None, 200]
CV INNER score: 0.810264667241

CV OUTER f1-weighted score: 0.810  (+/-0.001)
CV OUTER prec score [c=0,1]: 0.870 (+/- 0.000), 0.456  (+/- 0.042)
CV OUTER rec  score [c=0,1]: 0.998 (+/- 0.000), 0.009  (+/- 0.002)
CV OUTER AUC score: 0.646  (+/-0.003)
CV OUTER Sensibility score: 0.009  (+/-0.002)
CV OUTER Specificity score: 0.998  (+/-0.000)
Selected params (bests from CV) ['entropy', None, 200]

TEST f1 (weighted): 0.810
TEST Precision [c=0,1]: [ 0.869892  0.5     ]
TEST Recall [c=0,1]: [ 0.9987433  0.0083426]
TEST AUC: 0.504
TEST Sensibility: 0.0083426028921
TEST Specificity: 0.998743297587
Confussion matrix:
         | PRED
REAL-->  v 
[[11921    15]
 [ 1783    15]]

Total time: 292.85272193

Num experiment: 3 / 11
****************
FS: none
SM: none
CL

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:   42.0s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:  1.2min finished



TRAIN f1 (weighted): 0.653
TRAIN Precision [c=0,1]: [ 0.91867677  0.18998155]
TRAIN Recall [c=0,1]: [ 0.57438328  0.66253575]
TRAIN AUC: 0.618
TRAIN Sensibility: 0.662535748332
TRAIN Specificity:  0.574383281267

CV INNER metric: recall
CV INNER selected params ['balanced', 1e-05, 'l2']
CV INNER score: 0.660841938046

CV OUTER f1-weighted score: 0.651  (+/-0.003)
CV OUTER prec score [c=0,1]: 0.916 (+/- 0.002), 0.187  (+/- 0.002)
CV OUTER rec  score [c=0,1]: 0.574 (+/- 0.004), 0.652  (+/- 0.008)
CV OUTER AUC score: 0.654  (+/-0.005)
CV OUTER Sensibility score: 0.652  (+/-0.008)
CV OUTER Specificity score: 0.574  (+/-0.004)
Selected params (bests from CV) ['balanced', 1e-05, 'l2']

TEST f1 (weighted): 0.655
TEST Precision [c=0,1]: [ 0.91705437  0.18901948]
TEST Recall [c=0,1]: [ 0.57799933  0.65294772]
TEST AUC: 0.615
TEST Sensibility: 0.652947719689
TEST Specificity: 0.577999329759
Confussion matrix:
         | PRED
REAL-->  v 
[[6899 5037]
 [ 624 1174]]

Total time: 80.3771710396

Num

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    6.1s
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:   42.1s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:  1.3min finished



TRAIN f1 (weighted): 0.708
TRAIN Precision [c=0,1]: [ 0.91515483  0.20870367]
TRAIN Recall [c=0,1]: [ 0.66113685  0.59318398]
TRAIN AUC: 0.627
TRAIN Sensibility: 0.593183984747
TRAIN Specificity:  0.661136845129

CV INNER metric: f1
CV INNER selected params ['balanced', 0.05, 'l1']
CV INNER score: 0.306182501107

CV OUTER f1-weighted score: 0.704  (+/-0.003)
CV OUTER prec score [c=0,1]: 0.911 (+/- 0.002), 0.202  (+/- 0.003)
CV OUTER rec  score [c=0,1]: 0.659 (+/- 0.005), 0.574  (+/- 0.013)
CV OUTER AUC score: 0.665  (+/-0.005)
CV OUTER Sensibility score: 0.574  (+/-0.013)
CV OUTER Specificity score: 0.659  (+/-0.005)
Selected params (bests from CV) ['balanced', 0.05, 'l1']

TEST f1 (weighted): 0.706
TEST Precision [c=0,1]: [ 0.91385507  0.20649834]
TEST Recall [c=0,1]: [ 0.66035523  0.58676307]
TEST AUC: 0.624
TEST Sensibility: 0.586763070078
TEST Specificity: 0.660355227882
Confussion matrix:
         | PRED
REAL-->  v 
[[7882 4054]
 [ 743 1055]]

Total time: 83.602866888

Num experi

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    7.3s
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:   41.8s
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:  1.3min finished



TRAIN f1 (weighted): 0.817
TRAIN Precision [c=0,1]: [ 0.87268489  0.51851852]
TRAIN Recall [c=0,1]: [ 0.99486517  0.03670162]
TRAIN AUC: 0.516
TRAIN Sensibility: 0.036701620591
TRAIN Specificity:  0.994865165715

CV INNER metric: f1_weighted
CV INNER selected params [None, 1e-05, 'l2']
CV INNER score: 0.816976804012

CV OUTER f1-weighted score: 0.816  (+/-0.002)
CV OUTER prec score [c=0,1]: 0.872 (+/- 0.001), 0.440  (+/- 0.054)
CV OUTER rec  score [c=0,1]: 0.993 (+/- 0.001), 0.035  (+/- 0.006)
CV OUTER AUC score: 0.654  (+/-0.005)
CV OUTER Sensibility score: 0.035  (+/-0.006)
CV OUTER Specificity score: 0.993  (+/-0.001)
Selected params (bests from CV) [None, 1e-05, 'l2']

TEST f1 (weighted): 0.817
TEST Precision [c=0,1]: [ 0.87263112  0.53333333]
TEST Recall [c=0,1]: [ 0.99530831  0.03559511]
TEST AUC: 0.515
TEST Sensibility: 0.035595105673
TEST Specificity: 0.995308310992
Confussion matrix:
         | PRED
REAL-->  v 
[[11880    56]
 [ 1734    64]]

Total time: 83.8898479939

Num ex

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:  3.9min finished



TRAIN f1 (weighted): 1.000
TRAIN Precision [c=0,1]: [ 1.  1.]
TRAIN Recall [c=0,1]: [ 1.  1.]
TRAIN AUC: 1.000
TRAIN Sensibility: 1.0
TRAIN Specificity:  1.0

CV INNER metric: recall
CV INNER selected params ['uniform', 1]
CV INNER score: 0.171882446386

CV OUTER f1-weighted score: 0.789  (+/-0.002)
CV OUTER prec score [c=0,1]: 0.877 (+/- 0.001), 0.187  (+/- 0.006)
CV OUTER rec  score [c=0,1]: 0.886 (+/- 0.004), 0.173  (+/- 0.004)
CV OUTER AUC score: 0.530  (+/-0.003)
CV OUTER Sensibility score: 0.173  (+/-0.004)
CV OUTER Specificity score: 0.886  (+/-0.004)
Selected params (bests from CV) ['uniform', 1]

TEST f1 (weighted): 0.794
TEST Precision [c=0,1]: [ 0.87786008  0.19823232]
TEST Recall [c=0,1]: [ 0.8935992   0.17463849]
TEST AUC: 0.534
TEST Sensibility: 0.174638487208
TEST Specificity: 0.89359919571
Confussion matrix:
         | PRED
REAL-->  v 
[[10666  1270]
 [ 1484   314]]

Total time: 560.606350899

Num experiment: 7 / 11
****************
FS: none
SM: none
CLS: knn
METRIC: f

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:  3.9min finished



TRAIN f1 (weighted): 1.000
TRAIN Precision [c=0,1]: [ 1.  1.]
TRAIN Recall [c=0,1]: [ 1.  1.]
TRAIN AUC: 1.000
TRAIN Sensibility: 1.0
TRAIN Specificity:  1.0

CV INNER metric: f1
CV INNER selected params ['uniform', 1]
CV INNER score: 0.179139617736

CV OUTER f1-weighted score: 0.789  (+/-0.002)
CV OUTER prec score [c=0,1]: 0.877 (+/- 0.001), 0.187  (+/- 0.006)
CV OUTER rec  score [c=0,1]: 0.886 (+/- 0.004), 0.173  (+/- 0.004)
CV OUTER AUC score: 0.530  (+/-0.003)
CV OUTER Sensibility score: 0.173  (+/-0.004)
CV OUTER Specificity score: 0.886  (+/-0.004)
Selected params (bests from CV) ['uniform', 1]

TEST f1 (weighted): 0.794
TEST Precision [c=0,1]: [ 0.87786008  0.19823232]
TEST Recall [c=0,1]: [ 0.8935992   0.17463849]
TEST AUC: 0.534
TEST Sensibility: 0.174638487208
TEST Specificity: 0.89359919571
Confussion matrix:
         | PRED
REAL-->  v 
[[10666  1270]
 [ 1484   314]]

Total time: 550.848095894

Num experiment: 8 / 11
****************
FS: none
SM: none
CLS: knn
METRIC: f1_we

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:  3.8min finished



TRAIN f1 (weighted): 1.000
TRAIN Precision [c=0,1]: [ 1.  1.]
TRAIN Recall [c=0,1]: [ 1.  1.]
TRAIN AUC: 1.000
TRAIN Sensibility: 1.0
TRAIN Specificity:  1.0

CV INNER metric: f1_weighted
CV INNER selected params ['distance', 5]
CV INNER score: 0.815232132728


In [29]:
if 'dfAux' not in locals():
    print "Not dfAux"
    dfAux = pd.DataFrame()

if 'df' in locals():
    print "Exist df"
    dfAux = df.copy()

df = pd.DataFrame(np.array(res[0]).reshape(len(res[0]),35), columns=
                          ["exp", "name",
                           "size_tr","fs","sm","cls","metric","params",
                           "tr_sens","tr_spec","tr_auc",
                           "tr_prec","tr_rec","tr_f1",
                           "cv_sens_mean","cv_sens_std","cv_spec_mean","cv_spec_std","cv_auc_mean","cv_auc_std",
                           "cv_prec_mean","cv_prec_std","cv_rec_mean","cv_rec_std",
                           "cv_f1_mean","cv_f1_std",
                           "test_sens","test_spec","test_auc",
                           "test_rec","test_prec","test_f1",
                           "cm_test",
                           "time","pipeline"])

df[["size_tr","fs","sm","metric","cls","params", 
    "tr_auc","tr_sens","tr_spec",
    "cv_auc_std","cv_f1_mean","cv_sens_mean","cv_sens_std","cv_spec_mean","cv_spec_std","cv_auc_mean",
    "test_auc","test_sens","test_spec","test_f1"]]

Exist df


Unnamed: 0,size_tr,fs,sm,metric,cls,params,tr_auc,tr_sens,tr_spec,cv_auc_std,cv_f1_mean,cv_sens_mean,cv_sens_std,cv_spec_mean,cv_spec_std,cv_auc_mean,test_auc,test_sens,test_spec,test_f1
0,0.01,none,none,recall,rf,"[entropy, 4, 200]",0.7814,0.5952,0.9676,0.0467,0.786,0.0154,0.0308,0.9542,0.0279,0.5312,0.5291,0.119,0.9391,0.8083
1,0.01,none,none,f1,rf,"[gini, 4, 200]",0.785,0.5952,0.9748,0.0596,0.7868,0.0154,0.0308,0.9566,0.0346,0.5361,0.528,0.1057,0.9504,0.8115
2,0.01,none,none,f1_weighted,rf,"[entropy, None, 200]",1.0,1.0,1.0,0.0473,0.8018,0.0,0.0,1.0,0.0,0.5663,0.5018,0.0044,0.9992,0.8093
3,0.01,none,none,recall,logReg,"[balanced, 0.1, l1]",0.6882,0.8333,0.5432,0.0567,0.6403,0.5846,0.1427,0.5735,0.0553,0.6278,0.5542,0.5845,0.5238,0.6062
4,0.01,none,none,f1,logReg,"[balanced, 0.1, l1]",0.6882,0.8333,0.5432,0.0567,0.6403,0.5846,0.1427,0.5735,0.0553,0.6278,0.5542,0.5845,0.5238,0.6062
5,0.01,none,none,f1_weighted,logReg,"[None, 0.5, l1]",0.5577,0.119,0.9964,0.0655,0.8142,0.0615,0.0576,0.988,0.0132,0.6449,0.52,0.0528,0.9872,0.8178
6,0.01,none,none,recall,knn,"[uniform, 1]",1.0,1.0,1.0,0.0404,0.7836,0.1385,0.0754,0.8964,0.018,0.5174,0.5201,0.168,0.8722,0.7807
7,0.01,none,none,f1,knn,"[uniform, 3]",0.6238,0.2619,0.9856,0.0658,0.8029,0.0462,0.0377,0.9735,0.009,0.5095,0.5137,0.0673,0.9601,0.8077
8,0.01,none,none,f1_weighted,knn,"[uniform, 9]",0.5238,0.0476,1.0,0.0781,0.8018,0.0,0.0,1.0,0.0,0.5619,0.5018,0.0056,0.9981,0.8091
9,0.01,none,none,recall,nb,[],0.5773,1.0,0.1547,0.0205,0.3305,0.8154,0.0784,0.2169,0.0522,0.5188,0.5001,0.8576,0.1425,0.2426


In [30]:
dfAux =pd.DataFrame()
df = pd.concat((dfAux, df))
df[["size_tr","fs","sm","metric","cls","params", 
    "tr_auc","tr_sens","tr_spec",
    "cv_auc_mean","cv_f1_mean","cv_sens_mean","cv_spec_mean",
    "test_auc","test_f1","test_sens","test_spec"]].sort_values("cv_auc_mean", ascending=False)

Unnamed: 0,size_tr,fs,sm,metric,cls,params,tr_auc,tr_sens,tr_spec,cv_auc_mean,cv_f1_mean,cv_sens_mean,cv_spec_mean,test_auc,test_f1,test_sens,test_spec
5,0.01,none,none,f1_weighted,logReg,"[None, 0.5, l1]",0.5577,0.119,0.9964,0.6449,0.8142,0.0615,0.988,0.52,0.8178,0.0528,0.9872
3,0.01,none,none,recall,logReg,"[balanced, 0.1, l1]",0.6882,0.8333,0.5432,0.6278,0.6403,0.5846,0.5735,0.5542,0.6062,0.5845,0.5238
4,0.01,none,none,f1,logReg,"[balanced, 0.1, l1]",0.6882,0.8333,0.5432,0.6278,0.6403,0.5846,0.5735,0.5542,0.6062,0.5845,0.5238
2,0.01,none,none,f1_weighted,rf,"[entropy, None, 200]",1.0,1.0,1.0,0.5663,0.8018,0.0,1.0,0.5018,0.8093,0.0044,0.9992
8,0.01,none,none,f1_weighted,knn,"[uniform, 9]",0.5238,0.0476,1.0,0.5619,0.8018,0.0,1.0,0.5018,0.8091,0.0056,0.9981
1,0.01,none,none,f1,rf,"[gini, 4, 200]",0.785,0.5952,0.9748,0.5361,0.7868,0.0154,0.9566,0.528,0.8115,0.1057,0.9504
0,0.01,none,none,recall,rf,"[entropy, 4, 200]",0.7814,0.5952,0.9676,0.5312,0.786,0.0154,0.9542,0.5291,0.8083,0.119,0.9391
9,0.01,none,none,recall,nb,[],0.5773,1.0,0.1547,0.5188,0.3305,0.8154,0.2169,0.5001,0.2426,0.8576,0.1425
10,0.01,none,none,f1,nb,[],0.5773,1.0,0.1547,0.5188,0.3305,0.8154,0.2169,0.5001,0.2426,0.8576,0.1425
11,0.01,none,none,f1_weighted,nb,[],0.5773,1.0,0.1547,0.5188,0.3305,0.8154,0.2169,0.5001,0.2426,0.8576,0.1425
