### Import libraries.

In [9]:
import pandas as pd
import numpy as np

import pickle
from functions import get_scores
from functions import run_model
from re import search
from tabulate import tabulate

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn import svm
from sklearn import metrics

### Load data.

In [174]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [143]:
df = pickle.load(open("../datasets/occupancy.p", "rb"))
# df = pd.read_csv('../datasets/occupancy.csv')

In [144]:
df

Unnamed: 0_level_0,temperature,humidity,light,co2,humidity_ratio,occupancy,weekday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-02-04 17:51:00,23.180,27.2720,426.00,721.25,0.004793,1,1
2015-02-04 17:52:00,23.150,27.2675,429.50,714.00,0.004783,1,1
2015-02-04 17:53:00,23.150,27.2450,426.00,713.50,0.004779,1,1
2015-02-04 17:54:00,23.150,27.2000,426.00,708.25,0.004772,1,1
2015-02-04 17:55:00,23.100,27.2000,426.00,704.50,0.004757,1,1
...,...,...,...,...,...,...,...
2015-02-18 09:15:00,20.815,27.7175,429.75,1505.25,0.004213,1,1
2015-02-18 09:16:00,20.865,27.7450,423.50,1514.50,0.004230,1,1
2015-02-18 09:17:00,20.890,27.7450,423.50,1521.50,0.004237,1,1
2015-02-18 09:18:00,20.890,28.0225,418.75,1632.00,0.004279,1,1


### Createa a dataframe for storing model scores

In [145]:
column_names = ['Model name', 'Features', 'Best score', 'Train score', 'Test score', 
                'Sensitivity', 'Specificity', 'Precision', 'Accuracy', 'F1-score']
scores_df = pd.DataFrame(columns=column_names)
scores_df.head()

Unnamed: 0,Model name,Features,Best score,Train score,Test score,Sensitivity,Specificity,Precision,Accuracy,F1-score


### Features and target variables.

In [146]:
features_list = [['temperature', 'humidity', 'light', 'co2', 'humidity_ratio'],
                 ['temperature', 'humidity', 'light', 'co2'],
                 ['temperature', 'humidity', 'co2', 'humidity_ratio'],
                 ['temperature', 'humidity', 'light', 'humidity_ratio'],
                 ['temperature', 'humidity', 'humidity_ratio'],
                 ['temperature', 'humidity'],
                 ['temperature', 'light'],
                 ['humidity', 'light'],
                 ['light', 'humidity_ratio'],
                 ['temperature', 'co2'],
                 ['light', 'co2'],
                 ['temperature', 'humidity', 'light', 'co2', 'humidity_ratio', 'weekday'],
                 ['temperature', 'humidity', 'light', 'co2', 'weekday'],
                 ['temperature', 'humidity', 'co2', 'humidity_ratio', 'weekday'],
                 ['temperature', 'humidity', 'light', 'humidity_ratio', 'weekday'],
                 ['temperature', 'humidity', 'humidity_ratio', 'weekday'],
                 ['temperature', 'humidity', 'weekday'],
                 ['temperature', 'light', 'weekday'],
                 ['humidity', 'light', 'weekday'],
                 ['light', 'humidity_ratio', 'weekday'],
                 ['temperature', 'co2', 'weekday'],
                 ['light', 'co2', 'weekday']
                ]
target = 'occupancy'

### Initialize common grid search parameters for all models.

In [147]:
cv_folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
verbose = 1
n_jobs = 4

### Random Forest (RF)

In [148]:
model_name = 'rf'
params = { 
    'n_estimators' : [75,100,125],
    'max_features' : [None, 'auto'],
    'max_depth' : [None, 5, 6]
}

In [149]:
for i, features in enumerate(features_list):
    model = RandomForestClassifier(random_state=0)
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   11.1s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   18.9s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   14.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   10.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   18.9s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   15.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   14.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   11.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    8.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    9.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.1s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   10.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   12.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    9.8s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    9.6s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   17.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.7s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   14.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   10.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   18.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   15.9s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   15.2s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   10.9s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   10.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    9.8s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   10.5s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   11.5s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   10.1s finished


### Linear Discriminant Analysis (LDA)

In [150]:
# https://machinelearningmastery.com/linear-discriminant-analysis-with-python/

In [151]:
model_name = 'lda'
params = { 
    'solver' : ['svd', 'lsqr', 'eigen']
}

In [152]:
for i, features in enumerate(features_list):
    model = LinearDiscriminantAnalysis()
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished


### Classification and Regression Trees (CART)

In [153]:
# https://www.datacamp.com/community/tutorials/decision-tree-classification-python

In [154]:
model_name = 'cart'
params = { 
    'max_depth' : [None, 2, 5],
    'max_features' : ['auto', 'sqrt', 'log2']
}

In [155]:
for i, features in enumerate(features_list):
    model = DecisionTreeClassifier(random_state=0)
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  38 out of  45 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  38 out of  45 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  38 out of  45 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished


### Gradient Boosting Machine (GBM)

In [156]:
# https://machinelearningmastery.com/gradient-boosting-machine-ensemble-in-python/

In [157]:
model_name = 'gbm'
params = { 
    'learning_rate' : [0.1, 0.2, 0.3],
    'n_estimators' : [100, 200],
    'max_depth' : [3, 5]
}

In [158]:
for i, features in enumerate(features_list):
    model = GradientBoostingClassifier(random_state=0)
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   17.7s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   27.5s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   13.5s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   20.6s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   16.1s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   24.6s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   13.4s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   20.6s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   12.3s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   19.0s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.2s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   12.8s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.0s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:    9.8s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.4s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   11.3s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.5s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   13.3s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    9.0s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   14.0s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.0s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   13.1s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   19.0s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   28.7s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   13.7s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   21.0s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   16.6s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   25.4s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   13.9s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   21.5s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   12.9s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   19.7s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.6s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   13.4s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.4s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   10.1s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.0s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   12.6s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    9.2s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   14.2s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    9.6s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   15.0s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.7s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   13.5s finished


### K-Nearest Neighbor (KNN)

In [159]:
model_name = 'knn'
params = { 
    'n_neighbors' : [3, 5, 10],
    'p' : [1,2],
    'leaf_size' : [1, 5, 10]
}

In [160]:
for i, features in enumerate(features_list):
    model = KNeighborsClassifier()    
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.6s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.8s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.3s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.9s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.5s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.3s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.9s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.2s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.5s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.7s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.9s finished


### Support Vector Classification (SVC)

In [161]:
model_name = 'svc'
params = {
    'C': [0.1, 10, 100], 
    'gamma': [1,0.01,0.001],
    'kernel': ['rbf', 'sigmoid']
    }

In [162]:
for i, features in enumerate(features_list):
    model = svm.SVC()    
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   17.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   32.9s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   16.3s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   31.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   17.3s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   37.8s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    9.3s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   23.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   13.9s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   33.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   13.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   33.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.9s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   28.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   10.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   28.9s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   31.9s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   16.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   41.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   12.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   24.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   19.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   36.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   19.1s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   35.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   19.7s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   41.5s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   10.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   26.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   14.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   33.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   13.3s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   31.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   28.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   11.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   30.2s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    9.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   34.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   17.1s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   38.5s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   14.6s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   28.6s finished


### Adaptive Boosting (AdaBoost)

In [163]:
model_name = 'ada'
params = {
    'n_estimators' : [5, 10, 50],
    'learning_rate' : [1, 2]
    }

In [164]:
for i, features in enumerate(features_list):
    model = AdaBoostClassifier()    
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.8s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.2s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.6s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.0s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.7s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.1s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.7s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.6s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.0s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.9s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.9s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.8s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.6s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.0s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.7s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.1s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.6s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.6s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.0s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.9s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.9s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished


### Model summary table.

In [165]:
scores_df

Unnamed: 0,Model name,Features,Best score,Train score,Test score,Sensitivity,Specificity,Precision,Accuracy,F1-score
0,rf0,"temperature, humidity, light, co2, humidity_ratio",0.9938,1.0000,0.9953,0.9873,0.9974,0.9904,0.9953,0.9889
1,rf1,"temperature, humidity, light, co2",0.9940,1.0000,0.9955,0.9884,0.9974,0.9905,0.9955,0.9894
2,rf2,"temperature, humidity, co2, humidity_ratio",0.9870,0.9999,0.9864,0.9672,0.9915,0.9682,0.9864,0.9677
3,rf3,"temperature, humidity, light, humidity_ratio",0.9931,0.9996,0.9944,0.9831,0.9974,0.9904,0.9944,0.9867
4,rf4,"temperature, humidity, humidity_ratio",0.9573,0.9905,0.9613,0.9037,0.9768,0.9124,0.9613,0.9080
...,...,...,...,...,...,...,...,...,...,...
149,ada17,"temperature, light, weekday",0.9914,0.9914,0.9920,0.9915,0.9921,0.9710,0.9920,0.9812
150,ada18,"humidity, light, weekday",0.9914,0.9914,0.9920,0.9915,0.9921,0.9710,0.9920,0.9812
151,ada19,"light, humidity_ratio, weekday",0.9914,0.9914,0.9920,0.9915,0.9921,0.9710,0.9920,0.9812
152,ada20,"temperature, co2, weekday",0.9205,0.9228,0.9240,0.8180,0.9524,0.8215,0.9240,0.8197


In [166]:
"""
model_name = 'mnb'
scores_df = scores_df[~scores_df['Model name'].str.contains(model_name)]
scores_df.reset_index(drop=True, inplace=True)
""";

In [179]:
scores_df.sort_values(by='Accuracy', ascending=False).head(10)

Unnamed: 0,Model name,Features,Best score,Train score,Test score,Sensitivity,Specificity,Precision,Accuracy,F1-score
1,rf1,"temperature, humidity, light, co2",0.994,1.0,0.9955,0.9884,0.9974,0.9905,0.9955,0.9894
0,rf0,"temperature, humidity, light, co2, humidity_ratio",0.9938,1.0,0.9953,0.9873,0.9974,0.9904,0.9953,0.9889
28,gbm1,"temperature, humidity, light, co2",0.9932,0.9989,0.9949,0.9873,0.9969,0.9883,0.9949,0.9878
27,gbm0,"temperature, humidity, light, co2, humidity_ratio",0.993,1.0,0.9944,0.9862,0.9966,0.9873,0.9944,0.9868
3,rf3,"temperature, humidity, light, humidity_ratio",0.9931,0.9996,0.9944,0.9831,0.9974,0.9904,0.9944,0.9867
30,gbm3,"temperature, humidity, light, humidity_ratio",0.9925,0.9996,0.994,0.982,0.9972,0.9893,0.994,0.9857
7,rf7,"humidity, light",0.9911,0.9994,0.9937,0.9831,0.9966,0.9872,0.9937,0.9852
19,cart1,"temperature, humidity, light, co2",0.9925,1.0,0.9931,0.9831,0.9957,0.9841,0.9931,0.9836
37,knn1,"temperature, humidity, light, co2",0.9908,0.9945,0.9926,0.9894,0.9935,0.976,0.9926,0.9827
36,knn0,"temperature, humidity, light, co2, humidity_ratio",0.9908,0.9945,0.9926,0.9894,0.9935,0.976,0.9926,0.9827


In [168]:
scores_df.shape

(154, 10)

In [169]:
scores_df.to_csv('../models/scores2.csv', index=False)
scores_df.to_pickle('../models/scores2.p')

In [2]:
scores_df = pd.read_csv('../models/scores.csv')
scores_df.sort_values(by='Accuracy', ascending=False).head(10)

Unnamed: 0,Model name,Features,Best score,Train score,Test score,Sensitivity,Specificity,Precision,Accuracy,F1-score
1,rf1,"temperature, humidity, light, co2",0.994,1.0,0.9955,0.9884,0.9974,0.9905,0.9955,0.9894
0,rf0,"temperature, humidity, light, co2, humidity_ratio",0.9938,1.0,0.9953,0.9873,0.9974,0.9904,0.9953,0.9889
28,gbm1,"temperature, humidity, light, co2",0.9932,0.9989,0.9949,0.9873,0.9969,0.9883,0.9949,0.9878
27,gbm0,"temperature, humidity, light, co2, humidity_ratio",0.993,1.0,0.9944,0.9862,0.9966,0.9873,0.9944,0.9868
3,rf3,"temperature, humidity, light, humidity_ratio",0.9931,0.9996,0.9944,0.9831,0.9974,0.9904,0.9944,0.9867
30,gbm3,"temperature, humidity, light, humidity_ratio",0.9925,0.9996,0.994,0.982,0.9972,0.9893,0.994,0.9857
7,rf7,"humidity, light",0.9911,0.9994,0.9937,0.9831,0.9966,0.9872,0.9937,0.9852
19,cart1,"temperature, humidity, light, co2",0.9925,1.0,0.9931,0.9831,0.9957,0.9841,0.9931,0.9836
37,knn1,"temperature, humidity, light, co2",0.9908,0.9945,0.9926,0.9894,0.9935,0.976,0.9926,0.9827
36,knn0,"temperature, humidity, light, co2, humidity_ratio",0.9908,0.9945,0.9926,0.9894,0.9935,0.976,0.9926,0.9827


In [6]:
scores2_df = pd.read_csv('../models/scores2.csv')
scores2_df.sort_values(by='Accuracy', ascending=False).head(10)

Unnamed: 0,Model name,Features,Best score,Train score,Test score,Sensitivity,Specificity,Precision,Accuracy,F1-score
14,rf14,"temperature, humidity, light, humidity_ratio, ...",0.994,0.9996,0.996,0.9905,0.9974,0.9905,0.996,0.9905
12,rf12,"temperature, humidity, light, co2, weekday",0.9945,1.0,0.9958,0.9894,0.9974,0.9905,0.9958,0.9899
11,rf11,"temperature, humidity, light, co2, humidity_ra...",0.9946,1.0,0.9955,0.9905,0.9969,0.9884,0.9955,0.9894
1,rf1,"temperature, humidity, light, co2",0.994,1.0,0.9955,0.9884,0.9974,0.9905,0.9955,0.9894
0,rf0,"temperature, humidity, light, co2, humidity_ratio",0.9938,1.0,0.9953,0.9873,0.9974,0.9904,0.9953,0.9889
77,gbm11,"temperature, humidity, light, co2, humidity_ra...",0.994,1.0,0.9951,0.9884,0.9969,0.9884,0.9951,0.9884
67,gbm1,"temperature, humidity, light, co2",0.9932,0.9989,0.9949,0.9873,0.9969,0.9883,0.9949,0.9878
18,rf18,"humidity, light, weekday",0.992,0.9994,0.9949,0.9884,0.9966,0.9873,0.9949,0.9878
3,rf3,"temperature, humidity, light, humidity_ratio",0.9931,0.9996,0.9944,0.9831,0.9974,0.9904,0.9944,0.9867
66,gbm0,"temperature, humidity, light, co2, humidity_ratio",0.993,1.0,0.9944,0.9862,0.9966,0.9873,0.9944,0.9868


In [5]:
scores_df.shape, scores2_df.shape

((63, 10), (154, 10))

In [12]:
print(tabulate(scores2_df.sort_values(by='Accuracy', ascending=False).head(10), tablefmt="pipe", headers="keys"))

|    | Model name   | Features                                                   |   Best score |   Train score |   Test score |   Sensitivity |   Specificity |   Precision |   Accuracy |   F1-score |
|---:|:-------------|:-----------------------------------------------------------|-------------:|--------------:|-------------:|--------------:|--------------:|------------:|-----------:|-----------:|
| 14 | rf14         | temperature, humidity, light, humidity_ratio, weekday      |       0.994  |        0.9996 |       0.996  |        0.9905 |        0.9974 |      0.9905 |     0.996  |     0.9905 |
| 12 | rf12         | temperature, humidity, light, co2, weekday                 |       0.9945 |        1      |       0.9958 |        0.9894 |        0.9974 |      0.9905 |     0.9958 |     0.9899 |
| 11 | rf11         | temperature, humidity, light, co2, humidity_ratio, weekday |       0.9946 |        1      |       0.9955 |        0.9905 |        0.9969 |      0.9884 |     0.9955 |     0.98