In [1]:
import pickle

from metabolitics.preprocessing import MetaboliticsPipeline

from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, cross_val_score

from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold, SelectKBest
from sklearn.decomposition import PCA, NMF
from sklearn.model_selection import GridSearchCV, cross_val_score, cross_validate, StratifiedKFold

from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score, confusion_matrix, make_scorer

import numpy as np, pandas as pd
from collections import defaultdict, OrderedDict
from itertools import chain, starmap
from itertools import product

In [42]:
results = pickle.load(open('../results/breast_cancer2.results','rb'))
labels = pickle.load(open('../datasets/breast_cancer2_y','rb'))

In [43]:
pipe = MetaboliticsPipeline(['reaction-diff',
                             'pathway_transformer'])

pre_processed_results = pipe.fit_transform(results, labels)

In [44]:
samples = defaultdict(lambda : [])
[
 samples[key].append(value) for key, value in 
 chain(*map(lambda sample: sample.items(), pre_processed_results))
]

dataset = pd.DataFrame(samples, index=labels)

In [45]:
dataset.T.head()

Unnamed: 0,unhealthy,unhealthy.1,unhealthy.2,healthy,unhealthy.3,unhealthy.4,unhealthy.5,unhealthy.6,healthy.1,healthy.2,...,unhealthy.7,healthy.3,healthy.4,unhealthy.8,unhealthy.9,unhealthy.10,unhealthy.11,unhealthy.12,healthy.5,unhealthy.13
,-25.490606,-21.389085,-29.843938,-14.52688,-12.18496,4.338045,-7.707883,7.645137,99.308536,31.907804,...,-13.039948,-90.031445,-32.690873,-26.580457,-62.169126,-2.229793,-12.184928,-36.71715,-42.234431,42.581163
Alanine and aspartate metabolism,-100.77703,96.445184,38.29439,-100.777,6.018669,62.624693,24.222962,-100.777038,147.239393,-100.777038,...,-58.700946,24.222962,35.878663,24.222962,24.222962,24.222962,-100.777,274.222962,-205.961401,15.846673
Alkaloid synthesis,7e-06,-1e-06,-1e-06,5.131799e-07,6e-06,-1e-06,-1e-06,-1e-06,-1e-06,-1e-06,...,2e-06,-1e-06,-1e-06,-1e-06,-1e-06,-1e-06,3.8e-05,-1e-06,-1e-06,-1e-06
Aminosugar metabolism,-55.414072,73.618178,256.413877,-44.66139,-55.414075,-12.403328,-12.403328,-12.403328,-33.908704,116.62893,...,-55.414077,-55.41408,-12.403328,-55.41408,154.263339,84.370866,-55.414042,-55.41408,-33.908704,-33.908704
Androgen and estrogen synthesis and metabolism,7e-06,-1e-06,-1e-06,1.983698e-06,6e-06,-1e-06,-1e-06,-1e-06,-1e-06,-1e-06,...,4e-06,-1e-06,-1e-06,-1e-06,-1e-06,-1e-06,3.8e-05,-1e-06,-1e-06,-1e-06


In [46]:
balance = labels.count('unhealthy') / len(labels)

print(balance)

0.6477272727272727


In [47]:
sc = StandardScaler()

In [48]:
binarize = lambda ls: np.array([1 if l == 'unhealthy' else 0 for l in ls])

X = dataset
X = sc.fit_transform(dataset)
y = binarize(dataset.index)

In [49]:
dataset.shape

(88, 100)

In [17]:
#     (SVC, {
#         'kernel':['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
#         'C': np.geomspace(1e-6, 1e6, num=15),
#         'max_iter': range(5, 30+1, 5),
#         'degree':range(1,6)
#     }),
classifiers = [
    
    (SVC, {
        'C': np.geomspace(1e-6, 1e6, num=10),
    })
]
feature_selection = [
    (PCA, {
    'n_components': range(3, 81+1, 6)
  })
]

In [16]:
def build_pipeline(p):
    pipeline, pipeline_params = [], OrderedDict()
    
    for model, model_params in p:
        name = model.__name__
        
        pipeline.append((name, model()))
        pipeline_params.update({'{}__{}'.format(name, param_name) : values 
                                for param_name, values in model_params.items()})
    
    return Pipeline(pipeline), pipeline_params

### Nested cross-validation over 10 trials

In [67]:
NUM_TRIALS = 10
metrics = ['f1', 'recall', 'precision', 'accuracy']
trials = []

for i in range(NUM_TRIALS):
    cv_pipelines = []
    inner_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=i)
    outer_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=i)
   
    for pipeline, params in map(build_pipeline, product(feature_selection, classifiers)):
        cv_pipeline = GridSearchCV(pipeline, params, cv=inner_cv, n_jobs=-1, verbose=1).fit(X, y)
        cv_pipelines.append(cv_pipeline)
        
    best_pipeline = cv_pipelines[np.argmax([i.best_score_ for i in cv_pipelines])]
    cv = cross_validate(best_pipeline.best_estimator_, 
                        X=X, y=y, cv=outer_cv, 
                        scoring=metrics, 
                        return_train_score=False)
    
    trials.append((best_pipeline, cv))
    print("{} trial done".format(i+1))
    print("-"*10)

Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 160 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 867 tasks      | elapsed: 17.3min
[Parallel(n_jobs=-1)]: Done 1559 tasks      | elapsed: 21.9min
[Parallel(n_jobs=-1)]: Done 2438 tasks      | elapsed: 28.9min
[Parallel(n_jobs=-1)]: Done 3259 tasks      | elapsed: 31.2min
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed: 37.6min finished


1 trial done
----------
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:  7.5min
[Parallel(n_jobs=-1)]: Done 1389 tasks      | elapsed: 21.4min
[Parallel(n_jobs=-1)]: Done 1913 tasks      | elapsed: 27.4min
[Parallel(n_jobs=-1)]: Done 2685 tasks      | elapsed: 29.9min
[Parallel(n_jobs=-1)]: Done 3585 out of 3600 | elapsed: 31.2min remaining:    7.8s
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed: 35.8min finished


2 trial done
----------
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done 891 tasks      | elapsed: 19.2min
[Parallel(n_jobs=-1)]: Done 1495 tasks      | elapsed: 19.6min
[Parallel(n_jobs=-1)]: Done 1907 tasks      | elapsed: 23.4min
[Parallel(n_jobs=-1)]: Done 2627 tasks      | elapsed: 27.0min
[Parallel(n_jobs=-1)]: Done 3585 out of 3600 | elapsed: 28.1min remaining:    7.0s
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed: 29.5min finished


3 trial done
----------
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 160 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 847 tasks      | elapsed: 13.0min
[Parallel(n_jobs=-1)]: Done 1522 tasks      | elapsed: 21.1min
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed: 24.2min
[Parallel(n_jobs=-1)]: Done 2989 tasks      | elapsed: 29.4min
[Parallel(n_jobs=-1)]: Done 3585 out of 3600 | elapsed: 32.5min remaining:    8.2s
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed: 33.6min finished


4 trial done
----------
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 160 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 861 tasks      | elapsed: 15.5min
[Parallel(n_jobs=-1)]: Done 1156 tasks      | elapsed: 22.4min
[Parallel(n_jobs=-1)]: Done 1799 tasks      | elapsed: 24.2min
[Parallel(n_jobs=-1)]: Done 2573 tasks      | elapsed: 25.4min
[Parallel(n_jobs=-1)]: Done 3585 out of 3600 | elapsed: 30.7min remaining:    7.7s
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed: 34.1min finished


5 trial done
----------
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 891 tasks      | elapsed: 20.3min
[Parallel(n_jobs=-1)]: Done 1346 tasks      | elapsed: 20.5min
[Parallel(n_jobs=-1)]: Done 1996 tasks      | elapsed: 27.0min
[Parallel(n_jobs=-1)]: Done 2741 tasks      | elapsed: 31.0min
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed: 36.3min finished


6 trial done
----------
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 895 tasks      | elapsed: 25.4min
[Parallel(n_jobs=-1)]: Done 1707 tasks      | elapsed: 30.3min
[Parallel(n_jobs=-1)]: Done 2236 tasks      | elapsed: 31.3min
[Parallel(n_jobs=-1)]: Done 3002 tasks      | elapsed: 32.9min
[Parallel(n_jobs=-1)]: Done 3585 out of 3600 | elapsed: 35.0min remaining:    8.8s
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed: 37.2min finished


7 trial done
----------
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 891 tasks      | elapsed: 16.6min
[Parallel(n_jobs=-1)]: Done 1187 tasks      | elapsed: 16.8min
[Parallel(n_jobs=-1)]: Done 1827 tasks      | elapsed: 22.8min
[Parallel(n_jobs=-1)]: Done 2364 tasks      | elapsed: 23.2min
[Parallel(n_jobs=-1)]: Done 3585 out of 3600 | elapsed: 28.8min remaining:    7.2s
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed: 32.3min finished


8 trial done
----------
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 232 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 1118 tasks      | elapsed: 14.1min
[Parallel(n_jobs=-1)]: Done 1728 tasks      | elapsed: 23.4min
[Parallel(n_jobs=-1)]: Done 2569 tasks      | elapsed: 24.6min
[Parallel(n_jobs=-1)]: Done 3437 tasks      | elapsed: 30.3min
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed: 40.7min finished


9 trial done
----------
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 782 tasks      | elapsed:  8.0min
[Parallel(n_jobs=-1)]: Done 1102 tasks      | elapsed: 23.3min
[Parallel(n_jobs=-1)]: Done 1675 tasks      | elapsed: 24.6min
[Parallel(n_jobs=-1)]: Done 2376 tasks      | elapsed: 28.6min
[Parallel(n_jobs=-1)]: Done 3197 tasks      | elapsed: 31.6min
[Parallel(n_jobs=-1)]: Done 3585 out of 3600 | elapsed: 32.2min remaining:    8.1s
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed: 34.4min finished


10 trial done
----------


### Trial params

In [87]:
for model, scores in trials:
    try:
        n_components = model.best_estimator_.named_steps['PCA']
        print('pca n_components: {}'.format(n_components.n_components))
    #     except KeyError:
    #         n_components = model.best_estimator_.named_steps['SelectKBest']
    #         print('kkk n_components: {}'.format(n_components.k))

        logistic_regression = model.best_estimator_.named_steps['SVC']
        print('C: {}\t max_iter: {}\n'.format(logistic_regression.C, logistic_regression.max_iter))
    except:
        continue

pca n_components: 21
C: 4.641588833612772	 max_iter: -1

pca n_components: 15
C: 0.21544346900318823	 max_iter: -1

pca n_components: 21
C: 100.0	 max_iter: -1

pca n_components: 21
C: 46415.888336127726	 max_iter: -1

pca n_components: 21
C: 4.641588833612772	 max_iter: -1

pca n_components: 21
C: 4.641588833612772	 max_iter: -1

pca n_components: 21
C: 4.641588833612772	 max_iter: -1

pca n_components: 9
C: 1000000.0	 max_iter: -1

pca n_components: 15
C: 100.0	 max_iter: -1



### Trials stats!

In [69]:
trials_scores = [scores for model.best_estimator_, scores in trials]

In [70]:
trials_means = map(lambda trial_scores: {key: value.mean() 
                                         for key, value in trial_scores.items()}, trials_scores)

stats = pd.DataFrame(list(trials_means))

In [71]:
stats.mean()[2:]

test_accuracy     0.781167
test_f1           0.840115
test_precision    0.794667
test_recall       0.908667
dtype: float64

# Lung_Cancer

In [2]:
results = pickle.load(open('../results/lung_cancer.results','rb'))
labels = pickle.load(open('../datasets/lung_cancer_y','rb'))

In [3]:
pipe = MetaboliticsPipeline(['reaction-diff',
                             'pathway_transformer'])

pre_processed_results = pipe.fit_transform(results, labels)

In [4]:
samples = defaultdict(lambda : [])
[
 samples[key].append(value) for key, value in 
 chain(*map(lambda sample: sample.items(), pre_processed_results))
]

dataset = pd.DataFrame(samples, index=labels)

In [5]:
dataset.T.head()

Unnamed: 0,unhealthy,unhealthy.1,unhealthy.2,unhealthy.3,unhealthy.4,unhealthy.5,unhealthy.6,unhealthy.7,unhealthy.8,unhealthy.9,...,healthy,healthy.1,healthy.2,healthy.3,healthy.4,healthy.5,healthy.6,healthy.7,healthy.8,healthy.9
,48.843,2.642386,41.381614,-3.062908,89.662783,-47.396311,-22.855148,-76.388182,-30.308966,-0.479821,...,-9.595058,-0.479838,-0.4798335,1.657575,0.82855,-0.657037,24.175086,-0.479837,-17.858686,6.007183
Alanine and aspartate metabolism,138.648466,138.648466,-4.367172,109.86672,138.648466,177.514679,138.648466,-112.223385,13.648466,-89.90422,...,-48.851531,78.226806,-27.7255,173.649444,85.367449,13.648466,13.648466,-111.351531,27.491078,13.648466
Alkaloid synthesis,-6e-06,-6e-06,-6e-06,-5e-06,-6e-06,-6e-06,-6e-06,-6e-06,-6e-06,5e-06,...,-3e-06,-5e-06,-3.742089e-07,-6e-06,-6e-06,-6e-06,-6e-06,-3e-06,-6e-06,-6e-06
Aminosugar metabolism,-82.204632,-82.204632,-82.204632,-114.462695,-103.710008,-125.215385,197.36526,-103.710008,197.36526,-60.908711,...,-125.215382,-46.384747,-119.2054,144.945906,-28.357061,136.074938,36.074938,-125.215382,-86.76063,3.816873
Androgen and estrogen synthesis and metabolism,-1.1e-05,-1.1e-05,-1.1e-05,-9e-06,-1.1e-05,-1.1e-05,-1.1e-05,-1.1e-05,-1.1e-05,8e-06,...,-8e-06,-9e-06,-4.725675e-06,-1.1e-05,-1.1e-05,-1.1e-05,-1.1e-05,-8e-06,-1.1e-05,-1.1e-05


In [6]:
balance = labels.count('unhealthy') / len(labels)

print(balance)

0.5420560747663551


In [7]:
sc = StandardScaler()

In [8]:
binarize = lambda ls: np.array([1 if l == 'unhealthy' else 0 for l in ls])

X = dataset
X = sc.fit_transform(dataset)
y = binarize(dataset.index)
dataset.shape

(107, 100)

In [14]:
classifiers = [
    
    (SVC, {
        'C': np.geomspace(1e-6, 1e6, num=10),
    }),
    (LogisticRegression,{
        'C': np.geomspace(1e-6, 1e6, num=10),
        'max_iter':range(10,100+1, 10)
    }),
     (RandomForestClassifier, {
         'n_estimators':range(3,10)
     })
]
feature_selection = [
    (PCA, {
    'n_components': range(3, 85, 9)
  })
]

In [18]:
NUM_TRIALS = 10
metrics = ['f1', 'recall', 'precision', 'accuracy']
trials = []

for i in range(NUM_TRIALS):
    cv_pipelines = []
    inner_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=i)
    outer_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=i)
   
    for pipeline, params in map(build_pipeline, product(feature_selection, classifiers)):
#     for pipeline, params in classifiers:
        cv_pipeline = GridSearchCV(pipeline, params, cv=inner_cv, n_jobs=-1, verbose=1).fit(X, y)
        cv_pipelines.append(cv_pipeline)
        
    best_pipeline = cv_pipelines[np.argmax([i.best_score_ for i in cv_pipelines])]
    cv = cross_validate(best_pipeline.best_estimator_, 
                        X=X, y=y, cv=outer_cv, 
                        scoring=metrics, 
                        return_train_score=False)
    
    trials.append((best_pipeline, cv))
    print("{} trial done".format(i+1))
    print("-"*10)

Fitting 10 folds for each of 100 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Done 160 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    9.8s finished


Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Done 232 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done 2032 tasks      | elapsed:   16.0s
[Parallel(n_jobs=-1)]: Done 5032 tasks      | elapsed:   39.7s
[Parallel(n_jobs=-1)]: Done 9016 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  1.5min finished


Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:    4.1s
[Parallel(n_jobs=-1)]: Done 629 tasks      | elapsed:   12.2s
[Parallel(n_jobs=-1)]: Done 685 out of 700 | elapsed:   13.2s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:   13.5s finished


1 trial done
----------
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   10.4s finished


Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Done  88 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 688 tasks      | elapsed:    6.0s
[Parallel(n_jobs=-1)]: Done 1688 tasks      | elapsed:   14.2s
[Parallel(n_jobs=-1)]: Done 3088 tasks      | elapsed:   25.7s
[Parallel(n_jobs=-1)]: Done 4888 tasks      | elapsed:   40.9s
[Parallel(n_jobs=-1)]: Done 7088 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 9688 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  1.5min finished


Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-1)]: Done 685 out of 700 | elapsed:   13.7s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:   13.9s finished


2 trial done
----------
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Done 368 tasks      | elapsed:    4.3s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   10.3s finished


Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Done 268 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done 1878 tasks      | elapsed:   15.3s
[Parallel(n_jobs=-1)]: Done 3628 tasks      | elapsed:   28.7s
[Parallel(n_jobs=-1)]: Done 6078 tasks      | elapsed:   49.1s
[Parallel(n_jobs=-1)]: Done 9228 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  1.5min finished


Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done  88 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:   13.5s finished


3 trial done
----------
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Done 160 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   10.1s finished


Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done 1024 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-1)]: Done 2524 tasks      | elapsed:   20.1s
[Parallel(n_jobs=-1)]: Done 4624 tasks      | elapsed:   36.8s
[Parallel(n_jobs=-1)]: Done 7324 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  1.5min finished


Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:    7.4s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:   14.2s finished


4 trial done
----------
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Done  88 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 688 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   10.3s finished


Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 1024 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done 2524 tasks      | elapsed:   20.3s
[Parallel(n_jobs=-1)]: Done 4624 tasks      | elapsed:   38.2s
[Parallel(n_jobs=-1)]: Done 7324 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  1.5min finished


Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:   13.9s finished


5 trial done
----------
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Done 232 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   10.4s finished


Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done 1696 tasks      | elapsed:   14.0s
[Parallel(n_jobs=-1)]: Done 4196 tasks      | elapsed:   33.2s
[Parallel(n_jobs=-1)]: Done 7696 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  1.5min finished


Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:   13.1s finished


6 trial done
----------
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Done 232 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   10.0s finished


Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Done 160 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done 1360 tasks      | elapsed:   10.6s
[Parallel(n_jobs=-1)]: Done 3360 tasks      | elapsed:   26.0s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   48.4s
[Parallel(n_jobs=-1)]: Done 9760 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  1.4min finished


Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done 613 tasks      | elapsed:   12.1s
[Parallel(n_jobs=-1)]: Done 685 out of 700 | elapsed:   13.6s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:   13.9s finished


7 trial done
----------
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Done 160 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    9.7s finished


Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Done 232 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 2032 tasks      | elapsed:   16.2s
[Parallel(n_jobs=-1)]: Done 5032 tasks      | elapsed:   40.0s
[Parallel(n_jobs=-1)]: Done 8512 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  1.5min finished


Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:   13.3s finished


8 trial done
----------
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Done 232 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    9.3s finished


Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 1696 tasks      | elapsed:   13.8s
[Parallel(n_jobs=-1)]: Done 4196 tasks      | elapsed:   33.4s
[Parallel(n_jobs=-1)]: Done 7696 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  1.5min finished


Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done  88 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:   13.1s finished


9 trial done
----------
Fitting 10 folds for each of 100 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Done  88 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 688 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   10.2s finished


Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Done 232 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 2032 tasks      | elapsed:   16.5s
[Parallel(n_jobs=-1)]: Done 5032 tasks      | elapsed:   40.2s
[Parallel(n_jobs=-1)]: Done 8962 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  1.5min finished


Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done  88 tasks      | elapsed:    2.7s


10 trial done
----------


[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:   16.0s finished


In [43]:
for model, scores in trials:
    try:
        n_components = model.best_estimator_.named_steps['PCA']
        print('pca n_components: {}'.format(n_components.n_components))
    #     except KeyError:
    #         n_components = model.best_estimator_.named_steps['SelectKBest']
    #         print('kkk n_components: {}'.format(n_components.k))
        steps = list(model.best_estimator_.named_steps)
        m = model.best_estimator_.named_steps[steps[steps.index("PCA")-1]]
        print(m)
        print("Score:\t{}".format(model.best_score_))
        print("-"*30)
    except AttributeError:
        continue

pca n_components: 84
SVC(C=4.6415888336127722, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
Score:	0.822429906542056
------------------------------
pca n_components: 12
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=9, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)
Score:	0.8411214953271028
------------------------------
pca n_components: 75
SVC(C=4.6415888336127722, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probab

In [44]:
trials_scores = [scores for model.best_estimator_, scores in trials]
trials_means = map(lambda trial_scores: {key: value.mean() 
                                         for key, value in trial_scores.items()}, trials_scores)

stats = pd.DataFrame(list(trials_means))
stats.mean()[2:]

test_accuracy     0.828970
test_f1           0.831482
test_precision    0.894056
test_recall       0.799333
dtype: float64

In [45]:
stats.describe()

Unnamed: 0,fit_time,score_time,test_accuracy,test_f1,test_precision,test_recall
count,10.0,10.0,10.0,10.0,10.0,10.0
mean,0.007984,0.002709,0.82897,0.831482,0.894056,0.799333
std,0.003804,0.000894,0.019068,0.016923,0.014284,0.020294
min,0.005067,0.001876,0.805051,0.812727,0.872857,0.776667
25%,0.006456,0.002337,0.815429,0.819235,0.885119,0.785
50%,0.007298,0.002582,0.823333,0.826131,0.894683,0.793333
75%,0.007464,0.002676,0.840202,0.839392,0.903929,0.809167
max,0.018449,0.005078,0.869798,0.867444,0.914762,0.846667
