Classification of the bacteria data using three different classifiers:
1. SVM
2. RandomForest with decision stumps
3. AdaBoost with decision stumps.

The data is each replicate of each species along the rows, and the fluorescense spectra readings at the log growth phase along the columns. 

In [1]:
import os

import numpy as np
import pandas as pd

In [2]:
os.getcwd()

'/mnt/d/dev/summer-bursary-2018/bacteria'

In [3]:
os.listdir('data')

['16ms_32ms_growth_phase_spectra.csv',
 '16_ms_lag_codes.csv',
 'bacteria.csv',
 'Classific.py',
 'graphs.pdf',
 'wavelengths.csv']

In [4]:
df = pd.read_csv('data/bacteria.csv', header=[0, 1, 2], index_col=0)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1043 entries, 0 to 1042
Columns: 135 entries, (lag, bc, 01) to (stat, se, 07)
dtypes: float64(135)
memory usage: 1.1 MB


# Preprocessing

In [5]:
log = df['log'].T
log

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8,9,...,1033,1034,1035,1036,1037,1038,1039,1040,1041,1042
species,replicate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
bc,1,-1.82347,0.2104,0.631193,0.420815,3.577017,-1.82357,1.543025,1.332621,1.332612,2.314516,...,4.138728,6.734073,4.208874,6.593463,4.1389,1.333,-0.35065,0.420801,-1.40267,3.015768
bc,2,-0.44557,-1.20939,0.57287,4.392246,1.973376,3.501094,1.973367,2.291637,0.954865,1.464087,...,10.63195,8.977024,6.557754,10.63198,9.104122,7.194205,1.464012,0.572874,-0.95479,0.063652
bc,3,-0.4751633,-0.13576,1.425496,-0.067887,3.122725,0.271546,0.06787,0.339423,2.579618,2.104424,...,3.055488,4.752502,4.209555,8.079031,4.209398,8.01109,0.4751684,-1.018219,-1.697019,-0.1357642
bc,4,2.085158,0.53618,-1.489386,3.27683,1.132023,1.72781,1.608614,1.608642,-0.774528,2.442737,...,9.355482,11.262144,8.163649,8.87848,9.355428,7.448531,-0.8936396,0.536191,-0.059576,-0.5361861
bc,5,1.711495,0.305624,-0.12225,-2.62849,-1.22255,3.178665,1.772723,2.689683,0.305657,-0.18338,...,7.153159,6.174922,8.131116,6.724978,9.048333,4.157457,1.222497,-0.18337,0.733502,-1.10026
bc,6,0.591825,0.328791,0.263034,-1.381,-0.92067,-0.26304,0.789153,4.77e-06,0.526096,2.630463,...,4.275186,3.88062,3.420118,8.681357,4.340649,4.932673,-0.32879,0.065758,0.526065,0.723358
bc,7,-0.80847,-0.99503,0.248757,1.243856,2.923092,1.243862,1.741436,1.990201,0.435362,0.621929,...,5.411671,8.397327,5.909316,6.220289,5.473695,4.292109,0.74628,-0.24876,1.430367,-1.11942
bc,8,0.54028,-0.60032,-0.42022,0.660376,-0.60034,1.260745,0.180102,2.70159,1.620941,2.581469,...,6.124318,3.542755,3.422409,5.523951,5.043552,3.902787,0.780402,0.780411,0.480254,-1.02054
bc,9,-0.31181,-0.56125,0.935429,3.180636,2.432262,1.184955,-1.06021,0.68603,0.686027,1.93332,...,4.428536,3.430552,2.682108,3.430487,4.428391,4.428468,0.685983,-0.81071,0.935429,-0.06236
bc,10,0.306702,0.061425,1.594791,2.523547,-4.61656,11.6332,-1.04651,-12.7411,-2.95508,-12.3093,...,10.03745,4.248737,6.404728,1.294172,-2.27719,-6.58755,4.048481,-2.88384,-0.92031,0.429887


In [6]:
log.isnull().values.any()

False

In [52]:
X = log.values

In [8]:
y = X.reset_index()['species']
y.head()

0    bc
1    bc
2    bc
3    bc
4    bc
Name: species, dtype: object

In [9]:
y.describe()

count     41
unique     6
top       bc
freq      12
Name: species, dtype: object

In [10]:
print(y)

0     bc
1     bc
2     bc
3     bc
4     bc
5     bc
6     bc
7     bc
8     bc
9     bc
10    bc
11    bc
12    ec
13    ec
14    ec
15    ec
16    ec
17    lm
18    lm
19    lm
20    pa
21    pa
22    pa
23    pa
24    pa
25    pa
26    pa
27    pa
28    sa
29    sa
30    sa
31    sa
32    sa
33    sa
34    sa
35    sa
36    se
37    se
38    se
39    se
40    se
Name: species, dtype: object


In [11]:
from sklearn.utils import shuffle

X, y = shuffle(X, y)

print(X[:5])
print(y[:5])

                           0         1         2         3         4     \
species replicate                                                         
ec      01        -1.086490e+00 -1.278220  0.575195  1.789606  3.195759   
        04         9.230000e-07 -1.605890  2.964716  0.494150  0.247073   
sa      03         5.145850e-01 -1.543730 -1.157800  0.964893  2.573070   
se      01         6.468450e-01 -1.617090  0.646836  0.258749  1.552515   
        03         3.790200e-01 -1.010715 -1.073889 -0.063173  0.379046   

                       5         6         7         8         9       ...     \
species replicate                                                      ...      
ec      01         1.470073  4.026641  2.364880  0.255662  2.300933    ...      
        04        -0.494150  0.802997  3.211972  2.285468  1.482439    ...      
sa      03        -0.257300  0.643261  2.315750 -0.771920  0.514613    ...      
se      01         2.652206 -0.582190  1.293756 -1.487830 -1.552490  

# PCA + SVM

In [12]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.99, svd_solver='full')
pca.fit(X)

X_pca = pca.transform(X)
print(X_pca.shape)

(41, 1)


In [13]:
from sklearn.svm import SVC
from sklearn.model_selection import RepeatedStratifiedKFold, StratifiedKFold, cross_val_score, GridSearchCV

param_grid = {
    'kernel': ['linear', 'rbf'],
    'gamma': [10 ** -n for n in range(6)],
    'C': [10 ** n for n in range(-5, 2)]
}

cv = StratifiedKFold(n_splits=3)
clf = SVC()

grid_search = GridSearchCV(clf, param_grid, cv=cv, verbose=10, n_jobs=4)
grid_search.fit(X_pca, y)

print(grid_search.best_score_)
print(grid_search.best_params_)

Fitting 3 folds for each of 84 candidates, totalling 252 fits
[CV] C=1e-05, gamma=1, kernel=linear .................................
[CV] C=1e-05, gamma=1, kernel=linear .................................
[CV] C=1e-05, gamma=1, kernel=linear .................................
[CV] C=1e-05, gamma=1, kernel=rbf ....................................
[CV]  C=1e-05, gamma=1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1e-05, gamma=1, kernel=rbf ....................................
[CV]  C=1e-05, gamma=1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1e-05, gamma=0.1, kernel=linear ...............................
[CV] C=1e-05, gamma=1, kernel=rbf ....................................
[CV]  C=1e-05, gamma=1, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV]  C=1e-05, gamma=1, kernel=linear, score=0.45454545454545453, total=   0.1s
[CV] C=1e-05, gamma=0.1, kernel=linear ...............................
[CV] ....... C=1e-05, gamma=1, kernel=linear, score=0.4, to

[Parallel(n_jobs=4)]: Batch computation too fast (0.0385s.) Setting batch_size=10.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    0.1s


[CV]  C=1e-05, gamma=1, kernel=linear, score=0.5333333333333333, total=   0.7s
[CV] C=1e-05, gamma=0.0001, kernel=rbf ...............................
[CV]  C=1e-05, gamma=0.0001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1e-05, gamma=0.0001, kernel=rbf ...............................
[CV]  C=1e-05, gamma=0.0001, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=1e-05, gamma=1e-05, kernel=linear .............................
[CV]  C=1e-05, gamma=0.1, kernel=linear, score=0.5333333333333333, total=   0.7s
[CV] C=0.0001, gamma=1, kernel=linear ................................
[CV] ... C=1e-05, gamma=1e-05, kernel=linear, score=0.4, total=   0.1s
[CV] C=1e-05, gamma=1e-05, kernel=linear .............................
[CV]  C=1e-05, gamma=0.001, kernel=linear, score=0.5333333333333333, total=   0.6s
[CV] C=1e-05, gamma=0.001, kernel=linear .............................
[CV]  C=1e-05, gamma=0.01, kernel=linear, score=0.5333333333333333, total=   0.7s
[CV] C=1e-05, 

[Parallel(n_jobs=4)]: Done  28 tasks      | elapsed:    1.7s


[CV] .... C=0.0001, gamma=0.1, kernel=linear, score=0.4, total=   0.8s
[CV] C=0.0001, gamma=0.1, kernel=linear ..............................
[CV] ...... C=0.0001, gamma=1, kernel=linear, score=0.4, total=   0.8s
[CV] C=0.0001, gamma=1, kernel=linear ................................
[CV] . C=0.0001, gamma=0.0001, kernel=linear, score=0.4, total=   0.9s
[CV] C=0.0001, gamma=0.0001, kernel=linear ...........................
[CV]  C=0.0001, gamma=0.01, kernel=linear, score=0.5333333333333333, total=  11.3s
[CV] C=0.0001, gamma=0.01, kernel=linear .............................
[CV]  C=0.0001, gamma=0.1, kernel=linear, score=0.5333333333333333, total=  11.5s
[CV] C=0.0001, gamma=0.1, kernel=linear ..............................
[CV]  C=0.0001, gamma=1, kernel=linear, score=0.5333333333333333, total=  11.4s
[CV] C=0.0001, gamma=1e-05, kernel=linear ............................
[CV]  C=0.0001, gamma=0.01, kernel=linear, score=0.45454545454545453, total=   1.0s
[CV] C=0.0001, gamma=0.01, kerne

[Parallel(n_jobs=4)]: Batch computation too slow (3.5634s.) Setting batch_size=5.


[CV]  C=0.0001, gamma=0.0001, kernel=linear, score=0.5333333333333333, total=  11.7s
[CV] C=0.0001, gamma=0.0001, kernel=linear ...........................
[CV]  C=0.0001, gamma=1e-05, kernel=linear, score=0.45454545454545453, total=   1.1s
[CV] C=0.0001, gamma=1e-05, kernel=rbf ...............................
[CV]  C=0.0001, gamma=1e-05, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.0001, gamma=1e-05, kernel=rbf ...............................
[CV]  C=0.0001, gamma=1e-05, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.0001, gamma=1e-05, kernel=rbf ...............................
[CV]  C=0.0001, gamma=1e-05, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=0.001, gamma=1, kernel=linear .................................
[CV]  C=0.0001, gamma=0.1, kernel=linear, score=0.45454545454545453, total=   1.1s
[CV] C=0.0001, gamma=0.1, kernel=rbf .................................
[CV]  C=0.0001, gamma=0.1, kernel=rbf, score=0.26666666666666666, total=   

[CV] C=0.001, gamma=0.01, kernel=linear ..............................
[CV]  C=0.001, gamma=0.01, kernel=linear, score=0.45454545454545453, total=   7.8s
[CV] C=0.001, gamma=0.01, kernel=rbf .................................
[CV]  C=0.001, gamma=0.01, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.01, gamma=0.1, kernel=rbf ...................................
[CV]  C=0.01, gamma=0.1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.01, gamma=0.1, kernel=rbf ...................................
[CV]  C=0.01, gamma=0.1, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=0.01, gamma=0.01, kernel=linear ...............................


[Parallel(n_jobs=4)]: Done  98 tasks      | elapsed:  1.6min


[CV]  C=0.001, gamma=0.0001, kernel=linear, score=0.5333333333333333, total=  30.0s
[CV] C=0.01, gamma=0.01, kernel=rbf ..................................
[CV]  C=0.01, gamma=0.01, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.01, gamma=0.01, kernel=rbf ..................................
[CV]  C=0.01, gamma=0.01, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.01, gamma=0.01, kernel=rbf ..................................
[CV]  C=0.01, gamma=0.01, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=0.01, gamma=0.001, kernel=linear ..............................
[CV]  C=0.01, gamma=0.01, kernel=linear, score=0.4666666666666667, total=   6.0s
[CV] C=0.01, gamma=0.01, kernel=linear ...............................
[CV]  C=0.01, gamma=0.001, kernel=linear, score=0.4666666666666667, total=   6.2s
[CV] C=0.01, gamma=0.001, kernel=linear ..............................
[CV]  C=0.01, gamma=1, kernel=linear, score=0.5333333333333333, total= 1.7min
[CV] C=0.01,

[Parallel(n_jobs=4)]: Batch computation too slow (181.6862s.) Setting batch_size=2.


[CV]  C=0.01, gamma=0.01, kernel=linear, score=0.5333333333333333, total= 1.7min
[CV] C=0.01, gamma=0.01, kernel=linear ...............................
[CV]  C=0.01, gamma=0.001, kernel=linear, score=0.5333333333333333, total= 1.7min
[CV] C=0.01, gamma=0.0001, kernel=linear .............................
[CV]  C=0.01, gamma=0.1, kernel=linear, score=0.5454545454545454, total=  33.1s
[CV] C=0.01, gamma=0.1, kernel=rbf ...................................
[CV]  C=0.01, gamma=0.1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.01, gamma=1e-05, kernel=linear ..............................
[CV]  C=0.01, gamma=1e-05, kernel=linear, score=0.4666666666666667, total=   6.0s
[CV] C=0.01, gamma=1e-05, kernel=linear ..............................
[CV]  C=0.01, gamma=0.001, kernel=linear, score=0.5454545454545454, total=  33.1s
[CV] C=0.01, gamma=0.001, kernel=rbf .................................
[CV]  C=0.01, gamma=0.001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.

[Parallel(n_jobs=4)]: Done 135 tasks      | elapsed:  3.9min
[Parallel(n_jobs=4)]: Batch computation too slow (40.5248s.) Setting batch_size=1.


[CV]  C=0.1, gamma=1, kernel=linear, score=0.26666666666666666, total=  39.1s
[CV] C=0.1, gamma=1, kernel=linear ...................................
[CV]  C=0.1, gamma=0.1, kernel=linear, score=0.26666666666666666, total=  41.2s
[CV] C=0.1, gamma=0.1, kernel=linear .................................
[CV]  C=0.01, gamma=0.0001, kernel=linear, score=0.5333333333333333, total= 1.7min
[CV] C=0.01, gamma=0.0001, kernel=linear .............................
[CV]  C=0.01, gamma=1e-05, kernel=linear, score=0.5333333333333333, total= 1.7min
[CV] C=0.01, gamma=1e-05, kernel=linear ..............................
[CV]  C=0.01, gamma=0.0001, kernel=linear, score=0.5454545454545454, total=  32.7s
[CV] C=0.01, gamma=0.0001, kernel=rbf ................................
[CV]  C=0.01, gamma=0.0001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.01, gamma=0.0001, kernel=rbf ................................
[CV]  C=0.01, gamma=0.0001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C

[Parallel(n_jobs=4)]: Done 155 tasks      | elapsed:  8.9min


[CV]  C=0.1, gamma=0.001, kernel=linear, score=0.26666666666666666, total=  39.1s
[CV] C=0.1, gamma=0.001, kernel=linear ...............................
[CV]  C=0.1, gamma=1, kernel=linear, score=0.18181818181818182, total= 1.4min
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV]  C=0.1, gamma=1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV]  C=0.1, gamma=0.001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV]  C=0.1, gamma=0.001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV]  C=0.1, gamma=0.001, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=linear ..............................
[CV]  C=0.1, gamma=0.001, kernel=linear, score=0.18181818181818182, total= 1.4min
[CV] C=0.1, gamma=0.

[Parallel(n_jobs=4)]: Done 169 tasks      | elapsed: 10.8min


[CV]  C=0.1, gamma=1e-05, kernel=linear, score=0.26666666666666666, total=  36.6s
[CV] C=0.1, gamma=1e-05, kernel=linear ...............................
[CV]  C=0.1, gamma=0.0001, kernel=linear, score=0.18181818181818182, total= 1.3min
[CV] C=0.1, gamma=1e-05, kernel=linear ...............................
[CV]  C=0.1, gamma=0.001, kernel=linear, score=0.5333333333333333, total= 3.9min
[CV] C=0.1, gamma=1e-05, kernel=rbf ..................................
[CV]  C=0.1, gamma=1e-05, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.1, gamma=1e-05, kernel=rbf ..................................
[CV]  C=0.1, gamma=1e-05, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.1, gamma=1e-05, kernel=rbf ..................................
[CV]  C=0.1, gamma=1e-05, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=1, gamma=1, kernel=linear .....................................
[CV]  C=0.1, gamma=1e-05, kernel=linear, score=0.18181818181818182, total= 1.1min
[CV] C=1,

[Parallel(n_jobs=4)]: Done 180 tasks      | elapsed: 14.1min


[CV]  C=1, gamma=1, kernel=linear, score=0.2727272727272727, total= 1.3min
[CV] C=1, gamma=0.1, kernel=linear ...................................
[CV]  C=1, gamma=0.1, kernel=linear, score=0.4666666666666667, total=  38.1s
[CV] C=1, gamma=0.1, kernel=linear ...................................
[CV]  C=0.1, gamma=1e-05, kernel=linear, score=0.5333333333333333, total= 3.4min
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV]  C=1, gamma=0.1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV]  C=1, gamma=0.1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV]  C=1, gamma=0.1, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=1, gamma=0.01, kernel=linear ..................................
[CV]  C=1, gamma=0.01, kernel=linear, score=0.4666666666666667, total=  40.0s
[CV] C=1, gamma=0.01, kernel=linear ..

[Parallel(n_jobs=4)]: Done 191 tasks      | elapsed: 16.2min


[CV]  C=1, gamma=0.01, kernel=linear, score=0.2727272727272727, total= 1.4min
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV]  C=1, gamma=0.01, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV]  C=1, gamma=0.01, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV]  C=1, gamma=0.01, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV]  C=1, gamma=0.001, kernel=linear, score=0.4666666666666667, total=  41.2s
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV]  C=1, gamma=1, kernel=linear, score=0.5333333333333333, total= 5.6min
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV]  C=1, gamma=0.001, kernel=linear, score=0.2727272727272727, total= 1.4min
[CV] C=1, gamma=0.001, kernel=rbf .

[Parallel(n_jobs=4)]: Done 204 tasks      | elapsed: 21.1min


[CV]  C=1, gamma=1e-05, kernel=linear, score=0.4666666666666667, total=  35.0s
[CV] C=1, gamma=1e-05, kernel=linear .................................
[CV]  C=1, gamma=0.0001, kernel=linear, score=0.2727272727272727, total= 1.3min
[CV] C=1, gamma=1e-05, kernel=linear .................................
[CV]  C=1, gamma=1e-05, kernel=linear, score=0.2727272727272727, total= 1.2min
[CV] C=1, gamma=1e-05, kernel=rbf ....................................
[CV]  C=1, gamma=1e-05, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1, gamma=1e-05, kernel=rbf ....................................
[CV]  C=1, gamma=1e-05, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1, gamma=1e-05, kernel=rbf ....................................
[CV]  C=1, gamma=1e-05, kernel=rbf, score=0.45454545454545453, total=   0.0s
[CV] C=10, gamma=1, kernel=linear ....................................
[CV]  C=1, gamma=0.001, kernel=linear, score=0.5333333333333333, total= 5.2min
[CV] C=10, gamma=1, kernel

[Parallel(n_jobs=4)]: Done 217 tasks      | elapsed: 25.3min


[CV]  C=10, gamma=0.1, kernel=linear, score=0.4666666666666667, total=  40.6s
[CV] C=10, gamma=0.1, kernel=linear ..................................
[CV]  C=10, gamma=1, kernel=linear, score=0.36363636363636365, total= 2.2min
[CV] C=10, gamma=0.1, kernel=linear ..................................
[CV]  C=1, gamma=1e-05, kernel=linear, score=0.5333333333333333, total= 5.0min
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV]  C=10, gamma=0.1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV]  C=10, gamma=0.1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV]  C=10, gamma=0.1, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=10, gamma=0.01, kernel=linear .................................
[CV]  C=10, gamma=0.01, kernel=linear, score=0.4666666666666667, total=  40.4s
[CV] C=10, gamma=0.01, kernel=lin

[Parallel(n_jobs=4)]: Done 232 tasks      | elapsed: 30.0min


[CV]  C=10, gamma=0.01, kernel=linear, score=0.36363636363636365, total= 2.2min
[CV] C=10, gamma=0.001, kernel=linear ................................
[CV]  C=10, gamma=0.1, kernel=linear, score=0.5333333333333333, total= 5.7min
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV]  C=10, gamma=0.001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV]  C=10, gamma=0.001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV]  C=10, gamma=0.001, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=linear ...............................
[CV]  C=10, gamma=0.0001, kernel=linear, score=0.4666666666666667, total=  35.4s
[CV] C=10, gamma=0.0001, kernel=linear ...............................
[CV]  C=10, gamma=0.001, kernel=linear, score=0.36363636363636365, total= 2.0min
[CV] C=10, gamma=0.0

[Parallel(n_jobs=4)]: Done 252 out of 252 | elapsed: 38.1min finished


0.5121951219512195
{'C': 0.01, 'gamma': 1, 'kernel': 'linear'}


In [14]:
%%time

cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(grid_search.best_estimator_, X_pca, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.42 (+/- 0.20)
CPU times: user 18min 12s, sys: 0 ns, total: 18min 12s
Wall time: 18min 13s


In [15]:
%%time

cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(grid_search.best_estimator_, X, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.60 (+/- 0.27)
CPU times: user 688 ms, sys: 15.6 ms, total: 703 ms
Wall time: 680 ms


# Random Forest with Decision Stumps

In [16]:
%%time

from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=1024, max_depth=1)
cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(clf, X_pca, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

  from numpy.core.umath_tests import inner1d


Accuracy: 0.49 (+/- 0.09)
CPU times: user 34.9 s, sys: 172 ms, total: 35.1 s
Wall time: 35.1 s


In [17]:
%%time

clf = RandomForestClassifier(n_estimators=1024, max_depth=1)
cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(clf, X, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.48 (+/- 0.09)
CPU times: user 36.8 s, sys: 281 ms, total: 37.1 s
Wall time: 37.1 s


# AdaBoost

In [18]:
%%time

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                        n_estimators=1024)
cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(clf, X_pca, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.49 (+/- 0.09)
CPU times: user 40.4 s, sys: 15.6 ms, total: 40.4 s
Wall time: 40.4 s


In [19]:
%%time

clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                        n_estimators=1024)
cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(clf, X, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.39 (+/- 0.23)
CPU times: user 1min 59s, sys: 0 ns, total: 1min 59s
Wall time: 1min 59s
