Classification of the bacteria data using three different classifiers:
1. SVM
2. RandomForest with decision stumps
3. AdaBoost with decision stumps.

The data is each replicate of each species along the rows, and the fluorescense spectra readings at the log growth phase along the columns. 

In [1]:
import os

import numpy as np
import pandas as pd

In [2]:
os.getcwd()

'/mnt/d/dev/summer-bursary-2018/bacteria'

In [3]:
os.listdir('data')

['16ms_32ms_growth_phase_spectra.csv',
 '16_ms_lag_codes.csv',
 'bacteria.csv',
 'Classific.py',
 'graphs.pdf',
 'wavelengths.csv']

In [4]:
df = pd.read_csv('data/bacteria.csv', header=[0, 1, 2], index_col=0)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1043 entries, 0 to 1042
Columns: 135 entries, (lag, bc, 01) to (stat, se, 07)
dtypes: float64(135)
memory usage: 1.1 MB


# Preprocessing

In [5]:
log = df['log'].T
log

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8,9,...,1033,1034,1035,1036,1037,1038,1039,1040,1041,1042
species,replicate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
bc,1,-1.82347,0.2104,0.631193,0.420815,3.577017,-1.82357,1.543025,1.332621,1.332612,2.314516,...,4.138728,6.734073,4.208874,6.593463,4.1389,1.333,-0.35065,0.420801,-1.40267,3.015768
bc,2,-0.44557,-1.20939,0.57287,4.392246,1.973376,3.501094,1.973367,2.291637,0.954865,1.464087,...,10.63195,8.977024,6.557754,10.63198,9.104122,7.194205,1.464012,0.572874,-0.95479,0.063652
bc,3,-0.4751633,-0.13576,1.425496,-0.067887,3.122725,0.271546,0.06787,0.339423,2.579618,2.104424,...,3.055488,4.752502,4.209555,8.079031,4.209398,8.01109,0.4751684,-1.018219,-1.697019,-0.1357642
bc,4,2.085158,0.53618,-1.489386,3.27683,1.132023,1.72781,1.608614,1.608642,-0.774528,2.442737,...,9.355482,11.262144,8.163649,8.87848,9.355428,7.448531,-0.8936396,0.536191,-0.059576,-0.5361861
bc,5,1.711495,0.305624,-0.12225,-2.62849,-1.22255,3.178665,1.772723,2.689683,0.305657,-0.18338,...,7.153159,6.174922,8.131116,6.724978,9.048333,4.157457,1.222497,-0.18337,0.733502,-1.10026
bc,6,0.591825,0.328791,0.263034,-1.381,-0.92067,-0.26304,0.789153,4.77e-06,0.526096,2.630463,...,4.275186,3.88062,3.420118,8.681357,4.340649,4.932673,-0.32879,0.065758,0.526065,0.723358
bc,7,-0.80847,-0.99503,0.248757,1.243856,2.923092,1.243862,1.741436,1.990201,0.435362,0.621929,...,5.411671,8.397327,5.909316,6.220289,5.473695,4.292109,0.74628,-0.24876,1.430367,-1.11942
bc,8,0.54028,-0.60032,-0.42022,0.660376,-0.60034,1.260745,0.180102,2.70159,1.620941,2.581469,...,6.124318,3.542755,3.422409,5.523951,5.043552,3.902787,0.780402,0.780411,0.480254,-1.02054
bc,9,-0.31181,-0.56125,0.935429,3.180636,2.432262,1.184955,-1.06021,0.68603,0.686027,1.93332,...,4.428536,3.430552,2.682108,3.430487,4.428391,4.428468,0.685983,-0.81071,0.935429,-0.06236
bc,10,0.306702,0.061425,1.594791,2.523547,-4.61656,11.6332,-1.04651,-12.7411,-2.95508,-12.3093,...,10.03745,4.248737,6.404728,1.294172,-2.27719,-6.58755,4.048481,-2.88384,-0.92031,0.429887


In [6]:
log.isnull().values.any()

False

In [7]:
X = log.values
X = (X - X.min()) / (X.max() - X.min())

print(X.min(), X.mean(), X.max())

0.0 0.06617325919272571 1.0


In [8]:
y = log.reset_index()['species']
y.head()

0    bc
1    bc
2    bc
3    bc
4    bc
Name: species, dtype: object

In [9]:
y.describe()

count     41
unique     6
top       bc
freq      12
Name: species, dtype: object

In [10]:
print(y)

0     bc
1     bc
2     bc
3     bc
4     bc
5     bc
6     bc
7     bc
8     bc
9     bc
10    bc
11    bc
12    ec
13    ec
14    ec
15    ec
16    ec
17    lm
18    lm
19    lm
20    pa
21    pa
22    pa
23    pa
24    pa
25    pa
26    pa
27    pa
28    sa
29    sa
30    sa
31    sa
32    sa
33    sa
34    sa
35    sa
36    se
37    se
38    se
39    se
40    se
Name: species, dtype: object


In [11]:
from sklearn.utils import shuffle

X, y = shuffle(X, y)

print(X[:5])
print(y[:5])

[[0.00212806 0.00207087 0.00206828 ... 0.00208127 0.00208647 0.00210207]
 [0.00213364 0.00204894 0.00206482 ... 0.00217598 0.00215481 0.00211511]
 [0.00212317 0.00214725 0.00216331 ... 0.00204556 0.00212584 0.0021526 ]
 [0.00213469 0.00208776 0.00209517 ... 0.00214457 0.00213222 0.00207047]
 [0.00216423 0.00202528 0.00211791 ... 0.00209067 0.00209612 0.00217785]]
38    se
30    sa
11    bc
7     bc
18    lm
Name: species, dtype: object


# PCA + SVM

In [12]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.99, svd_solver='full')
pca.fit(X)

X_pca = pca.transform(X)
print(X_pca.shape)

(41, 1)


In [13]:
from sklearn.svm import SVC
from sklearn.model_selection import RepeatedStratifiedKFold, StratifiedKFold, cross_val_score, GridSearchCV

param_grid = {
    'kernel': ['linear', 'rbf'],
    'gamma': [10 ** -n for n in range(10)],
    'C': [10 ** n for n in range(-9, 2)]
}

cv = StratifiedKFold(n_splits=3)
clf = SVC()

grid_search = GridSearchCV(clf, param_grid, cv=cv, verbose=10, n_jobs=4)
grid_search.fit(X_pca, y)

print(grid_search.best_score_)
print(grid_search.best_params_)

Fitting 3 folds for each of 220 candidates, totalling 660 fits
[CV] C=1e-09, gamma=1, kernel=linear .................................
[CV] C=1e-09, gamma=1, kernel=linear .................................
[CV] C=1e-09, gamma=1, kernel=linear .................................
[CV]  C=1e-09, gamma=1, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] C=1e-09, gamma=1, kernel=rbf ....................................
[CV] C=1e-09, gamma=1, kernel=rbf ....................................
[CV]  C=1e-09, gamma=1, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-09, gamma=1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1e-09, gamma=1, kernel=rbf ....................................
[CV]  C=1e-09, gamma=1, kernel=linear, score=0.36363636363636365, total=   0.0s
[CV]  C=1e-09, gamma=1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1e-09, gamma=0.1, kernel=linear ...............................
[CV] C=1e-09, gamma=0.1, kernel=linear .......

[CV] C=1e-09, gamma=0.001, kernel=rbf ................................
[CV] C=1e-08, gamma=0.01, kernel=linear ..............................
[CV] C=1e-09, gamma=1e-06, kernel=linear .............................
[CV]  C=1e-09, gamma=1e-09, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] C=1e-09, gamma=1e-09, kernel=linear .............................
[CV]  C=1e-09, gamma=1e-06, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-09, gamma=0.001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-08, gamma=0.01, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] C=1e-09, gamma=1e-06, kernel=linear .............................
[CV] C=1e-09, gamma=0.001, kernel=rbf ................................
[CV] C=1e-08, gamma=0.01, kernel=linear ..............................
[CV]  C=1e-09, gamma=1e-09, kernel=linear, score=0.36363636363636365, total=   0.0s
[CV] C=1e-09, gamma=1e-09, kernel=rbf ................................
[CV]  C=1e-09, g

[Parallel(n_jobs=4)]: Batch computation too fast (0.0218s.) Setting batch_size=18.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Batch computation too fast (0.1060s.) Setting batch_size=66.
[Parallel(n_jobs=4)]: Done  44 tasks      | elapsed:    0.2s


[CV] C=1e-08, gamma=1, kernel=linear .................................
[CV]  C=1e-09, gamma=1e-07, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-08, gamma=0.01, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV]  C=1e-08, gamma=0.001, kernel=linear, score=0.36363636363636365, total=   0.0s
[CV] C=1e-09, gamma=1e-07, kernel=linear .............................
[CV] C=1e-08, gamma=0.001, kernel=rbf ................................
[CV] C=1e-08, gamma=0.001, kernel=linear .............................
[CV]  C=1e-08, gamma=1, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] C=1e-08, gamma=1e-06, kernel=linear .............................
[CV]  C=1e-09, gamma=1e-07, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-08, gamma=0.001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-08, gamma=0.001, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] C=1e-08, gamma=1e-09, kernel=linear ...........................

[CV]  C=1e-08, gamma=1e-05, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=1e-08, gamma=1e-08, kernel=rbf ................................
[CV] C=1e-08, gamma=1e-05, kernel=rbf ................................
[CV] C=1e-07, gamma=0.1, kernel=linear ...............................
[CV]  C=1e-07, gamma=0.0001, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] C=1e-07, gamma=0.0001, kernel=linear ............................
[CV]  C=1e-08, gamma=1e-08, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-08, gamma=1e-05, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-07, gamma=0.1, kernel=linear, score=0.36363636363636365, total=   0.0s
[CV] C=1e-08, gamma=1e-08, kernel=rbf ................................
[CV] C=1e-07, gamma=0.1, kernel=rbf ..................................
[CV] C=1e-08, gamma=1e-05, kernel=rbf ................................
[CV]  C=1e-07, gamma=0.0001, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] C=1

[CV]  C=0.0001, gamma=1e-09, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] C=1e-07, gamma=1e-06, kernel=rbf ................................
[CV] C=0.0001, gamma=1e-09, kernel=linear ............................
[CV]  C=1e-06, gamma=1e-07, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-05, gamma=1e-08, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-07, gamma=1e-06, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=1e-06, gamma=1e-07, kernel=rbf ................................
[CV]  C=0.0001, gamma=1e-09, kernel=linear, score=0.36363636363636365, total=   0.0s
[CV] C=1e-05, gamma=1e-08, kernel=rbf ................................
[CV] C=1e-07, gamma=1e-07, kernel=linear .............................
[CV] C=0.0001, gamma=1e-09, kernel=rbf ...............................
[CV]  C=1e-06, gamma=1e-07, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV]  C=1e-05, gamma=1e-08, kernel=rbf, score=0.26666666666666666, total=   0.0s

[CV]  C=1e-07, gamma=1e-09, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-06, gamma=1e-09, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=0.0001, gamma=1, kernel=rbf ...................................
[CV] C=1e-07, gamma=1e-09, kernel=linear .............................
[CV] C=1e-05, gamma=1, kernel=linear .................................
[CV]  C=0.001, gamma=0.1, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=0.0001, gamma=1, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=0.001, gamma=0.1, kernel=rbf ..................................
[CV]  C=1e-07, gamma=1e-09, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-05, gamma=1, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] C=0.0001, gamma=0.1, kernel=linear ..............................
[CV] C=1e-07, gamma=1e-09, kernel=linear .............................
[CV] C=1e-05, gamma=1, kernel=linear .................................
[CV]  C=0.001, ga

[CV] C=1e-05, gamma=0.01, kernel=linear ..............................
[CV]  C=0.001, gamma=0.001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=0.0001, gamma=0.001, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-06, gamma=0.1, kernel=linear, score=0.36363636363636365, total=   0.0s
[CV]  C=1e-05, gamma=0.01, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] C=0.001, gamma=0.001, kernel=rbf ................................
[CV] C=0.0001, gamma=0.001, kernel=linear ............................
[CV] C=1e-06, gamma=0.1, kernel=rbf ..................................
[CV]  C=0.001, gamma=0.001, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=1e-05, gamma=0.01, kernel=linear ..............................
[CV] C=0.001, gamma=0.0001, kernel=linear ............................
[CV]  C=0.0001, gamma=0.001, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-06, gamma=0.1, kernel=rbf, score=0.26666666666666666, total=   0.0

[CV]  C=1e-05, gamma=0.0001, kernel=linear, score=0.36363636363636365, total=   0.0s
[CV] C=0.001, gamma=1e-06, kernel=linear .............................
[CV]  C=1e-06, gamma=0.001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.0001, gamma=1e-05, kernel=linear ............................
[CV] C=1e-05, gamma=0.0001, kernel=rbf ...............................
[CV] C=1e-06, gamma=0.001, kernel=rbf ................................
[CV]  C=0.001, gamma=1e-06, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV]  C=0.0001, gamma=1e-05, kernel=linear, score=0.36363636363636365, total=   0.0s
[CV]  C=1e-05, gamma=0.0001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.001, gamma=1e-06, kernel=linear .............................
[CV]  C=1e-06, gamma=0.001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV] C=0.0001, gamma=1e-05, kernel=rbf ...............................
[CV] C=1e-05, gamma=0.0001, kernel=rbf ...............................
[CV] 

[CV] C=1e-06, gamma=1e-05, kernel=rbf ................................
[CV]  C=1e-05, gamma=1e-06, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=0.001, gamma=1e-08, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] C=1e-05, gamma=1e-06, kernel=rbf ................................
[CV]  C=0.0001, gamma=1e-07, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=1e-06, gamma=1e-05, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=0.001, gamma=1e-08, kernel=linear .............................
[CV] C=0.0001, gamma=1e-07, kernel=rbf ...............................
[CV] C=1e-06, gamma=1e-06, kernel=linear .............................
[CV]  C=1e-05, gamma=1e-06, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=0.001, gamma=1e-08, kernel=linear, score=0.36363636363636365, total=   0.0s
[CV] C=1e-05, gamma=1e-06, kernel=rbf ................................
[CV]  C=0.0001, gamma=1e-07, kernel=rbf, score=0.26666666666666666, total=   0.0s

[CV] C=0.01, gamma=0.1, kernel=linear ................................
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV]  C=1, gamma=0.01, kernel=rbf, score=0.5454545454545454, total=   0.0s
[CV]  C=0.01, gamma=0.1, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV] ......... C=10, gamma=0.001, kernel=rbf, score=0.4, total=   0.0s
[CV]  C=0.1, gamma=0.1, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV] C=0.01, gamma=0.1, kernel=linear ................................
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] C=0.1, gamma=0.01, kernel=linear ................................
[CV]  C=1, gamma=0.001, kernel=linear, score=0.4666666666666667, total=   0.0s
[CV]  C=0.01, gamma=0.1, kernel=linear, score=0.5

[CV]  C=0.1, gamma=0.0001, kernel=linear, score=0.4666666666666667, total=   0.0s
[CV]  C=0.01, gamma=0.001, kernel=linear, score=0.5454545454545454, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=linear ..............................
[CV] C=1, gamma=1e-05, kernel=linear .................................
[CV] C=10, gamma=1e-05, kernel=rbf ...................................
[CV] C=0.01, gamma=0.001, kernel=rbf .................................
[CV]  C=0.01, gamma=0.001, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=10, gamma=1e-05, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV]  C=0.1, gamma=0.0001, kernel=linear, score=0.4666666666666667, total=   0.0s
[CV]  C=1, gamma=1e-05, kernel=linear, score=0.4666666666666667, total=   0.0s
[CV] C=10, gamma=1e-06, kernel=linear ................................
[CV] C=0.01, gamma=0.001, kernel=rbf .................................
[CV] C=0.1, gamma=0.0001, kernel=linear ..............................
[CV] C=1, gamma=1e-0

[CV] C=10, gamma=1e-08, kernel=linear ................................
[CV]  C=1, gamma=1e-07, kernel=linear, score=0.5454545454545454, total=   0.0s
[CV]  C=0.01, gamma=1e-05, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=0.1, gamma=1e-06, kernel=linear, score=0.5454545454545454, total=   0.0s
[CV]  C=10, gamma=1e-08, kernel=linear, score=0.3333333333333333, total=   0.0s
[CV] C=1, gamma=1e-07, kernel=rbf ....................................
[CV] C=0.01, gamma=1e-05, kernel=rbf .................................
[CV] C=0.1, gamma=1e-06, kernel=rbf ..................................
[CV] C=10, gamma=1e-08, kernel=linear ................................


[Parallel(n_jobs=4)]: Done 218 tasks      | elapsed:    0.5s


[CV]  C=1, gamma=1e-07, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=0.1, gamma=1e-06, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=0.01, gamma=1e-05, kernel=rbf, score=0.36363636363636365, total=   0.0s
[CV]  C=10, gamma=1e-08, kernel=linear, score=0.4666666666666667, total=   0.0s
[CV] C=1, gamma=1e-07, kernel=rbf ....................................
[CV] C=0.1, gamma=1e-06, kernel=rbf ..................................
[CV] C=0.01, gamma=1e-06, kernel=linear ..............................
[CV] C=10, gamma=1e-08, kernel=linear ................................
[CV]  C=0.1, gamma=1e-06, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=1, gamma=1e-07, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=0.01, gamma=1e-06, kernel=linear, score=0.26666666666666666, total=   0.0s
[CV]  C=10, gamma=1e-08, kernel=linear, score=0.5454545454545454, total=   0.0s
[CV] C=0.1, gamma=1e-06, kernel=rbf ..................................
[CV] C=1, 

[CV] C=0.01, gamma=1e-08, kernel=linear ..............................
[CV] C=0.1, gamma=1e-09, kernel=linear ...............................
[CV]  C=10, gamma=1, kernel=linear, score=0.3333333333333333, total=   0.0s
[CV]  C=0.01, gamma=1e-08, kernel=linear, score=0.5454545454545454, total=   0.0s
[CV]  C=0.1, gamma=1e-09, kernel=linear, score=0.4666666666666667, total=   0.0s
[CV] C=0.01, gamma=1e-08, kernel=rbf .................................
[CV] C=10, gamma=1, kernel=linear ....................................
[CV] C=0.1, gamma=1e-09, kernel=linear ...............................
[CV]  C=0.01, gamma=1e-08, kernel=rbf, score=0.26666666666666666, total=   0.0s
[CV]  C=10, gamma=1, kernel=linear, score=0.4666666666666667, total=   0.0s
[CV]  C=0.1, gamma=1e-09, kernel=linear, score=0.4666666666666667, total=   0.0s
[CV] C=0.01, gamma=1e-08, kernel=rbf .................................
[CV] C=10, gamma=1, kernel=linear ....................................
[CV] C=0.1, gamma=1e-09, ke

[Parallel(n_jobs=4)]: Done 660 out of 660 | elapsed:    0.9s finished


In [14]:
%%time

cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(grid_search.best_estimator_, X_pca, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.49 (+/- 0.07)
CPU times: user 62.5 ms, sys: 15.6 ms, total: 78.1 ms
Wall time: 69.1 ms


In [15]:
%%time

cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(grid_search.best_estimator_, X, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.49 (+/- 0.07)
CPU times: user 172 ms, sys: 0 ns, total: 172 ms
Wall time: 160 ms


# Random Forest with Decision Stumps

In [16]:
%%time

from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=1024, max_depth=1)
cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(clf, X_pca, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

  from numpy.core.umath_tests import inner1d


Accuracy: 0.49 (+/- 0.08)
CPU times: user 34.6 s, sys: 172 ms, total: 34.8 s
Wall time: 34.8 s


In [17]:
%%time

clf = RandomForestClassifier(n_estimators=1024, max_depth=1)
cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(clf, X, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.48 (+/- 0.10)
CPU times: user 38 s, sys: 203 ms, total: 38.2 s
Wall time: 38.3 s


# AdaBoost

In [18]:
%%time

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                        n_estimators=1024)
cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(clf, X_pca, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.49 (+/- 0.08)
CPU times: user 42.8 s, sys: 15.6 ms, total: 42.9 s
Wall time: 42.9 s


In [19]:
%%time

clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                        n_estimators=1024)
cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=20)
scores = cross_val_score(clf, X, y, cv=cv)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.37 (+/- 0.24)
CPU times: user 2min 5s, sys: 15.6 ms, total: 2min 5s
Wall time: 2min 5s
