### Import libraries.

In [1]:
import pandas as pd
import numpy as np

import pickle
from functions import get_scores
from functions import run_model
from re import search
from tabulate import tabulate

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn import svm
from sklearn import metrics

### Load data.

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [3]:
# df = pickle.load(open("../datasets/occupancy.p", "rb"))
# df = pd.read_csv('../datasets/occupancy.csv')
df = pickle.load(open("../datasets/occupancy2.p", "rb"))

In [4]:
df

Unnamed: 0_level_0,temperature,humidity,light,co2,humidity_ratio,occupancy,weekday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-02-04 17:51:00,23.18,27.272,426.0,721.25,0.004793,1,1
2015-02-04 17:52:00,23.15,27.2675,429.5,714.0,0.004783,1,1
2015-02-04 17:53:00,23.15,27.245,426.0,713.5,0.004779,1,1
2015-02-04 17:54:00,23.15,27.2,426.0,708.25,0.004772,1,1
2015-02-04 17:55:00,23.1,27.2,426.0,704.5,0.004757,1,1
2015-02-04 17:56:00,23.1,27.2,419.0,701.0,0.004757,1,1
2015-02-04 17:57:00,23.1,27.2,419.0,701.666667,0.004757,1,1
2015-02-04 17:58:00,23.1,27.2,419.0,699.0,0.004757,1,1
2015-02-04 17:59:00,23.1,27.2,419.0,689.333333,0.004757,1,1
2015-02-04 18:00:00,23.075,27.175,419.0,688.0,0.004745,1,1


### Createa a dataframe for storing model scores

In [5]:
column_names = ['Model name', 'Features', 'Best score', 'Train score', 'Test score', 
                'Sensitivity', 'Specificity', 'Precision', 'Accuracy', 'F1-score']
scores_df = pd.DataFrame(columns=column_names)
scores_df.head()

Unnamed: 0,Model name,Features,Best score,Train score,Test score,Sensitivity,Specificity,Precision,Accuracy,F1-score


### Features and target variables.

In [6]:
features_list = [['temperature', 'humidity', 'light', 'co2', 'humidity_ratio'],
                 ['temperature', 'humidity', 'light', 'co2'],
                 ['temperature', 'humidity', 'co2', 'humidity_ratio'],
                 ['temperature', 'humidity', 'light', 'humidity_ratio'],
                 ['temperature', 'humidity', 'humidity_ratio'],
                 ['temperature', 'humidity'],
                 ['temperature', 'light'],
                 ['humidity', 'light'],
                 ['light', 'humidity_ratio'],
                 ['temperature', 'co2'],
                 ['light', 'co2'],
                 ['temperature', 'humidity', 'light', 'co2', 'humidity_ratio', 'weekday'],
                 ['temperature', 'humidity', 'light', 'co2', 'weekday'],
                 ['temperature', 'humidity', 'co2', 'humidity_ratio', 'weekday'],
                 ['temperature', 'humidity', 'light', 'humidity_ratio', 'weekday'],
                 ['temperature', 'humidity', 'humidity_ratio', 'weekday'],
                 ['temperature', 'humidity', 'weekday'],
                 ['temperature', 'light', 'weekday'],
                 ['humidity', 'light', 'weekday'],
                 ['light', 'humidity_ratio', 'weekday'],
                 ['temperature', 'co2', 'weekday'],
                 ['light', 'co2', 'weekday']
                ]
target = 'occupancy'

### Initialize common grid search parameters for all models.

In [7]:
cv_folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
verbose = 1
n_jobs = 4

### Random Forest (RF)

In [8]:
model_name = 'rf'
params = { 
    'n_estimators' : [75,100,125],
    'max_features' : [None, 'auto'],
    'max_depth' : [None, 5, 6]
}

In [9]:
for i, features in enumerate(features_list):
    model = RandomForestClassifier(random_state=0)
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   10.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   18.2s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   15.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   10.3s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   18.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.3s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   14.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   14.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.7s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   10.8s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.6s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    7.2s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.3s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    8.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    9.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   11.2s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.7s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    9.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    9.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   16.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   13.2s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    9.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   17.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   13.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   13.8s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    9.9s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.7s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    7.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    8.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.9s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    9.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   10.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    9.3s finished


### Linear Discriminant Analysis (LDA)

In [10]:
# https://machinelearningmastery.com/linear-discriminant-analysis-with-python/

In [11]:
model_name = 'lda'
params = { 
    'solver' : ['svd', 'lsqr', 'eigen']
}

In [12]:
for i, features in enumerate(features_list):
    model = LinearDiscriminantAnalysis()
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 3 candidates, totalling 15 fits
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed:    0.1s finished


### Classification and Regression Trees (CART)

In [13]:
# https://www.datacamp.com/community/tutorials/decision-tree-classification-python

In [14]:
model_name = 'cart'
params = { 
    'max_depth' : [None, 2, 5],
    'max_features' : ['auto', 'sqrt', 'log2']
}

In [15]:
for i, features in enumerate(features_list):
    model = DecisionTreeClassifier(random_state=0)
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  38 out of  45 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  38 out of  45 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  38 out of  45 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  38 out of  45 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:    0.1s finished


### Gradient Boosting Machine (GBM)

In [16]:
# https://machinelearningmastery.com/gradient-boosting-machine-ensemble-in-python/

In [17]:
model_name = 'gbm'
params = { 
    'learning_rate' : [0.1, 0.2, 0.3],
    'n_estimators' : [100, 200],
    'max_depth' : [3, 5]
}

In [18]:
for i, features in enumerate(features_list):
    model = GradientBoostingClassifier(random_state=0)
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   16.2s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   24.9s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   12.1s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   18.8s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   14.8s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   22.8s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   12.3s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   19.2s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   11.2s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   17.5s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.5s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   11.8s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.5s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:    8.6s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.9s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   10.7s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.9s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   12.3s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.4s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   13.0s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.3s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   11.6s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   16.7s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   25.8s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   12.6s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   19.5s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   15.3s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   23.4s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   12.7s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   19.7s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   12.3s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   18.4s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.7s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   12.1s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.8s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:    9.2s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.3s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   11.4s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.3s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   13.0s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.9s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   13.7s finished


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.7s
[Parallel(n_jobs=4)]: Done  60 out of  60 | elapsed:   23.5s finished


### K-Nearest Neighbor (KNN)

In [19]:
model_name = 'knn'
params = { 
    'n_neighbors' : [3, 5, 10],
    'p' : [1,2],
    'leaf_size' : [1, 5, 10]
}

In [20]:
for i, features in enumerate(features_list):
    model = KNeighborsClassifier()    
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.7s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.6s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.8s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.9s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.2s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.5s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.6s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.8s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.9s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.1s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.7s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.5s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.9s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.2s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.1s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    2.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.6s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.8s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.6s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:    1.8s finished


### Support Vector Classification (SVC)

In [21]:
model_name = 'svc'
params = {
    'C': [0.1, 10, 100], 
    'gamma': [1,0.01,0.001],
    'kernel': ['rbf', 'sigmoid']
    }

In [22]:
for i, features in enumerate(features_list):
    model = svm.SVC()    
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   18.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   32.5s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   15.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   28.9s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   16.7s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   36.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    9.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   23.5s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   14.6s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   36.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   13.6s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   33.5s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.4s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   29.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   11.3s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   30.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.9s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   30.2s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   16.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   40.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   11.2s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   22.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   17.5s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   34.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   18.1s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   33.6s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   19.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   40.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   10.7s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   25.2s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   14.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   32.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   13.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   31.0s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.8s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   25.9s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   10.1s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   27.4s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.1s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   28.1s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   16.3s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   37.3s finished


Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   14.0s
[Parallel(n_jobs=4)]: Done  90 out of  90 | elapsed:   26.3s finished


### Adaptive Boosting (AdaBoost)

In [23]:
model_name = 'ada'
params = {
    'n_estimators' : [5, 10, 50],
    'learning_rate' : [1, 2]
    }

In [24]:
for i, features in enumerate(features_list):
    model = AdaBoostClassifier()    
    scores = run_model(df, features, target, params, model, model_name+str(i))
    scores_series = pd.Series(scores, index=scores_df.columns)
    scores_df = scores_df.append(scores_series, ignore_index=True)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.6s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.9s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.6s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.0s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.6s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.0s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.9s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.7s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.4s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.4s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.7s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.4s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.7s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.6s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.9s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.6s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    1.0s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.6s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.9s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.4s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.7s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.4s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.7s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  23 out of  30 | elapsed:    0.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    0.8s finished


### Model summary table.

In [30]:
scores_df.head()

Unnamed: 0,Model name,Features,Best score,Train score,Test score,Sensitivity,Specificity,Precision,Accuracy,F1-score
0,rf0,"temperature, humidity, light, co2, humidity_ratio",0.9937,1.0,0.9951,0.9884,0.9969,0.9884,0.9951,0.9884
1,rf1,"temperature, humidity, light, co2",0.994,1.0,0.9955,0.9884,0.9974,0.9905,0.9955,0.9894
2,rf2,"temperature, humidity, co2, humidity_ratio",0.987,0.9999,0.9864,0.9672,0.9915,0.9682,0.9864,0.9677
3,rf3,"temperature, humidity, light, humidity_ratio",0.9932,0.9996,0.9951,0.9852,0.9977,0.9915,0.9951,0.9883
4,rf4,"temperature, humidity, humidity_ratio",0.9573,0.9905,0.9613,0.9037,0.9768,0.9124,0.9613,0.908


In [26]:
"""
model_name = 'mnb'
scores_df = scores_df[~scores_df['Model name'].str.contains(model_name)]
scores_df.reset_index(drop=True, inplace=True)
""";

In [31]:
scores_df.sort_values(by='Accuracy', ascending=False).head(10)

Unnamed: 0,Model name,Features,Best score,Train score,Test score,Sensitivity,Specificity,Precision,Accuracy,F1-score
12,rf12,"temperature, humidity, light, co2, weekday",0.9944,1.0,0.9958,0.9905,0.9972,0.9894,0.9958,0.99
11,rf11,"temperature, humidity, light, co2, humidity_ratio, weekday",0.9943,1.0,0.9955,0.9894,0.9972,0.9894,0.9955,0.9894
1,rf1,"temperature, humidity, light, co2",0.994,1.0,0.9955,0.9884,0.9974,0.9905,0.9955,0.9894
14,rf14,"temperature, humidity, light, humidity_ratio, weekday",0.994,0.9997,0.9953,0.9873,0.9974,0.9904,0.9953,0.9889
0,rf0,"temperature, humidity, light, co2, humidity_ratio",0.9937,1.0,0.9951,0.9884,0.9969,0.9884,0.9951,0.9884
3,rf3,"temperature, humidity, light, humidity_ratio",0.9932,0.9996,0.9951,0.9852,0.9977,0.9915,0.9951,0.9883
78,gbm12,"temperature, humidity, light, co2, weekday",0.994,1.0,0.9949,0.9862,0.9972,0.9894,0.9949,0.9878
18,rf18,"humidity, light, weekday",0.992,0.9993,0.9944,0.9873,0.9963,0.9863,0.9944,0.9868
69,gbm3,"temperature, humidity, light, humidity_ratio",0.9925,0.9996,0.9942,0.9831,0.9972,0.9894,0.9942,0.9862
67,gbm1,"temperature, humidity, light, co2",0.9929,1.0,0.9942,0.9831,0.9972,0.9894,0.9942,0.9862


In [32]:
scores_df.shape

(154, 10)

In [33]:
scores_df.to_csv('../models/scores3.csv', index=False)
scores_df.to_pickle('../models/scores3.p')

In [38]:
scores_df = pd.read_csv('../models/scores.csv')
scores_df.sort_values(by='Accuracy', ascending=False).head(10)

Unnamed: 0,Model name,Features,Best score,Train score,Test score,Sensitivity,Specificity,Precision,Accuracy,F1-score
1,rf1,"temperature, humidity, light, co2",0.994,1.0,0.9955,0.9884,0.9974,0.9905,0.9955,0.9894
0,rf0,"temperature, humidity, light, co2, humidity_ratio",0.9938,1.0,0.9953,0.9873,0.9974,0.9904,0.9953,0.9889
28,gbm1,"temperature, humidity, light, co2",0.9932,0.9989,0.9949,0.9873,0.9969,0.9883,0.9949,0.9878
27,gbm0,"temperature, humidity, light, co2, humidity_ratio",0.993,1.0,0.9944,0.9862,0.9966,0.9873,0.9944,0.9868
3,rf3,"temperature, humidity, light, humidity_ratio",0.9931,0.9996,0.9944,0.9831,0.9974,0.9904,0.9944,0.9867
30,gbm3,"temperature, humidity, light, humidity_ratio",0.9925,0.9996,0.994,0.982,0.9972,0.9893,0.994,0.9857
7,rf7,"humidity, light",0.9911,0.9994,0.9937,0.9831,0.9966,0.9872,0.9937,0.9852
19,cart1,"temperature, humidity, light, co2",0.9925,1.0,0.9931,0.9831,0.9957,0.9841,0.9931,0.9836
37,knn1,"temperature, humidity, light, co2",0.9908,0.9945,0.9926,0.9894,0.9935,0.976,0.9926,0.9827
36,knn0,"temperature, humidity, light, co2, humidity_ratio",0.9908,0.9945,0.9926,0.9894,0.9935,0.976,0.9926,0.9827


In [36]:
scores2_df = pd.read_csv('../models/scores2.csv')
scores2_df.sort_values(by='Accuracy', ascending=False).head(10)

Unnamed: 0,Model name,Features,Best score,Train score,Test score,Sensitivity,Specificity,Precision,Accuracy,F1-score
14,rf14,"temperature, humidity, light, humidity_ratio, weekday",0.994,0.9996,0.996,0.9905,0.9974,0.9905,0.996,0.9905
12,rf12,"temperature, humidity, light, co2, weekday",0.9945,1.0,0.9958,0.9894,0.9974,0.9905,0.9958,0.9899
11,rf11,"temperature, humidity, light, co2, humidity_ratio, weekday",0.9946,1.0,0.9955,0.9905,0.9969,0.9884,0.9955,0.9894
1,rf1,"temperature, humidity, light, co2",0.994,1.0,0.9955,0.9884,0.9974,0.9905,0.9955,0.9894
0,rf0,"temperature, humidity, light, co2, humidity_ratio",0.9938,1.0,0.9953,0.9873,0.9974,0.9904,0.9953,0.9889
77,gbm11,"temperature, humidity, light, co2, humidity_ratio, weekday",0.994,1.0,0.9951,0.9884,0.9969,0.9884,0.9951,0.9884
67,gbm1,"temperature, humidity, light, co2",0.9932,0.9989,0.9949,0.9873,0.9969,0.9883,0.9949,0.9878
18,rf18,"humidity, light, weekday",0.992,0.9994,0.9949,0.9884,0.9966,0.9873,0.9949,0.9878
3,rf3,"temperature, humidity, light, humidity_ratio",0.9931,0.9996,0.9944,0.9831,0.9974,0.9904,0.9944,0.9867
66,gbm0,"temperature, humidity, light, co2, humidity_ratio",0.993,1.0,0.9944,0.9862,0.9966,0.9873,0.9944,0.9868


In [34]:
scores3_df = pd.read_csv('../models/scores3.csv')
scores3_df.sort_values(by='Accuracy', ascending=False).head(10)

Unnamed: 0,Model name,Features,Best score,Train score,Test score,Sensitivity,Specificity,Precision,Accuracy,F1-score
12,rf12,"temperature, humidity, light, co2, weekday",0.9944,1.0,0.9958,0.9905,0.9972,0.9894,0.9958,0.99
11,rf11,"temperature, humidity, light, co2, humidity_ratio, weekday",0.9943,1.0,0.9955,0.9894,0.9972,0.9894,0.9955,0.9894
1,rf1,"temperature, humidity, light, co2",0.994,1.0,0.9955,0.9884,0.9974,0.9905,0.9955,0.9894
14,rf14,"temperature, humidity, light, humidity_ratio, weekday",0.994,0.9997,0.9953,0.9873,0.9974,0.9904,0.9953,0.9889
0,rf0,"temperature, humidity, light, co2, humidity_ratio",0.9937,1.0,0.9951,0.9884,0.9969,0.9884,0.9951,0.9884
3,rf3,"temperature, humidity, light, humidity_ratio",0.9932,0.9996,0.9951,0.9852,0.9977,0.9915,0.9951,0.9883
78,gbm12,"temperature, humidity, light, co2, weekday",0.994,1.0,0.9949,0.9862,0.9972,0.9894,0.9949,0.9878
18,rf18,"humidity, light, weekday",0.992,0.9993,0.9944,0.9873,0.9963,0.9863,0.9944,0.9868
69,gbm3,"temperature, humidity, light, humidity_ratio",0.9925,0.9996,0.9942,0.9831,0.9972,0.9894,0.9942,0.9862
67,gbm1,"temperature, humidity, light, co2",0.9929,1.0,0.9942,0.9831,0.9972,0.9894,0.9942,0.9862


In [39]:
scores_df.shape, scores2_df.shape, scores3_df.shape

((63, 10), (154, 10), (154, 10))

In [12]:
print(tabulate(scores2_df.sort_values(by='Accuracy', ascending=False).head(10), tablefmt="pipe", headers="keys"))

|    | Model name   | Features                                                   |   Best score |   Train score |   Test score |   Sensitivity |   Specificity |   Precision |   Accuracy |   F1-score |
|---:|:-------------|:-----------------------------------------------------------|-------------:|--------------:|-------------:|--------------:|--------------:|------------:|-----------:|-----------:|
| 14 | rf14         | temperature, humidity, light, humidity_ratio, weekday      |       0.994  |        0.9996 |       0.996  |        0.9905 |        0.9974 |      0.9905 |     0.996  |     0.9905 |
| 12 | rf12         | temperature, humidity, light, co2, weekday                 |       0.9945 |        1      |       0.9958 |        0.9894 |        0.9974 |      0.9905 |     0.9958 |     0.9899 |
| 11 | rf11         | temperature, humidity, light, co2, humidity_ratio, weekday |       0.9946 |        1      |       0.9955 |        0.9905 |        0.9969 |      0.9884 |     0.9955 |     0.98