In [69]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from timeit import default_timer

import pickle

import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

import lightgbm as lgb

In [119]:
# Load saved X,y data
with open("Xy.pickle", 'rb') as picklefile: 
    X_tr, X_ts, y_tr, y_ts, X, y = pickle.load(picklefile)

h = ['h_x1', 'h_y1', 'h_z1','h_g1', 'h_g2', 'h_g3', 'h_m1', 'h_m2', 'h_m3']
c = ['c_x1', 'c_y1', 'c_z1','c_g1', 'c_g2', 'c_g3', 'c_m1', 'c_m2', 'c_m3']
a = ['a_x1', 'a_y1', 'a_z1','a_g1', 'a_g2', 'a_g3', 'a_m1', 'a_m2', 'a_m3']

acce= h+c+a# Features belonging to wrist sensor

If desired, set variable num_samples to reduce the size of dataset when checking the code runs through the end of the process

In [121]:
num_samples = X_tr.shape[0]
X_train = X_tr[:num_samples]
y_train = y_tr[:num_samples]
X_test = X_ts[:num_samples]
y_test = y_ts[:num_samples]

X_train = X_train[h]
X_test = X_test[h]

In [139]:
len(y.unique())

12

In [149]:
ne = [20,40,80,120]
md = [-1]
lr = [0.01,0.1]
mb = [255,510,1020]
bt = ['gbdt','dart']
nb = [50,100]

param_grid = dict(n_estimators=ne, max_depth=md, learning_rate=lr,
                  boosting_type=bt, max_bin=mb,num_boost_round=nb)

In [150]:
clf = lgb.LGBMClassifier(silent=False,
                         objective='multiclass',
                         num_classes=12,
                         metric = ['multi_error','multi_logloss'],
                         early_stopping_rounds=5)

In [151]:
print('Start Process')
start = default_timer()
# Set up randomized Cross Validated grid search.
grid_lgb = GridSearchCV(clf,
                        param_grid, cv=5,
                        scoring=['f1_macro','recall_macro','precision_macro'],
                        refit=False,n_jobs=-1,verbose=5)

# Fit randomized CV grid
grid_lgb.fit(X_train,y_train)

print('{:.2f}'.format(default_timer()-start))
print('grid_gbc fit')

Start Process
Fitting 5 folds for each of 96 candidates, totalling 480 fits
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=20, num_boost_round=50 
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=20, num_boost_round=50 
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=20, num_boost_round=50 
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=20, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=20, num_boost_round=50, f1_macro=0.6387471287029373, recall_macro=0.6338594579944855, precision_macro=0.6625516280417979, total= 1.5min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=20, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=20, num_boost_round=50, f1_macro=0.6360807604550394, recall_macro=0.631

[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed: 10.9min


[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=40, num_boost_round=50, f1_macro=0.6508122796145004, recall_macro=0.6470553453884278, precision_macro=0.6771569618869053, total= 4.0min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=40, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=40, num_boost_round=50, f1_macro=0.6494627285973046, recall_macro=0.6456497184041708, precision_macro=0.6791688206796939, total= 4.0min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=40, num_boost_round=100 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=40, num_boost_round=50, f1_macro=0.6504068350444053, recall_macro=0.6467175915146953, precision_macro=0.677215229152076, total= 3.3min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=40, num_boost_round=100 
[CV]  

[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=120, num_boost_round=100 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=120, num_boost_round=50, f1_macro=0.6777248577656145, recall_macro=0.6737580556198921, precision_macro=0.7086158523972473, total=14.4min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=120, num_boost_round=100 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=120, num_boost_round=100, f1_macro=0.6750764258790176, recall_macro=0.6713959117948027, precision_macro=0.7057980372016451, total=15.0min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=20, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=255, max_depth=-1, n_estimators=120, num_boost_round=100, f1_macro=0.6765465174490504, recall_macro=0.6730428044889972, precision_macro=0.7064476933653959, total=15.0mi

[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=80, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=80, num_boost_round=50, f1_macro=0.6631045468581134, recall_macro=0.6602569820443319, precision_macro=0.6924267151154133, total= 7.2min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=80, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=80, num_boost_round=50, f1_macro=0.6634342063242946, recall_macro=0.6598633130767929, precision_macro=0.6940931845519048, total= 8.1min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=80, num_boost_round=100 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=80, num_boost_round=50, f1_macro=0.6647100912881095, recall_macro=0.6620385202178777, precision_macro=0.6929813462485382, total= 8.6min
[CV] b

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed: 234.7min


[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=80, num_boost_round=50, f1_macro=0.6649919921556399, recall_macro=0.6615523952548571, precision_macro=0.6944346923520501, total= 7.1min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=80, num_boost_round=100 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=80, num_boost_round=100, f1_macro=0.6631045468581134, recall_macro=0.6602569820443319, precision_macro=0.6924267151154133, total= 8.0min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=80, num_boost_round=100 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=80, num_boost_round=100, f1_macro=0.6634342063242946, recall_macro=0.6598633130767929, precision_macro=0.6940931845519048, total= 9.0min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=120, num_boost_round=50 
[C

[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=1020, max_depth=-1, n_estimators=40, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=120, num_boost_round=100, f1_macro=0.6764867892246377, recall_macro=0.6735460942156202, precision_macro=0.7050623175107087, total=14.5min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=1020, max_depth=-1, n_estimators=40, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=1020, max_depth=-1, n_estimators=40, num_boost_round=50, f1_macro=0.6500020070900684, recall_macro=0.6455880619775991, precision_macro=0.6799075650662251, total= 3.7min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=1020, max_depth=-1, n_estimators=40, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=510, max_depth=-1, n_estimators=120, num_boost_round=100, f1_macro=0.677255093139118, recall_macro=0.6736160641960222, precision_macro=0.7072953006362971, total=14.5min


[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=1020, max_depth=-1, n_estimators=120, num_boost_round=100 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=1020, max_depth=-1, n_estimators=120, num_boost_round=50, f1_macro=0.6773993880503036, recall_macro=0.6736875944708148, precision_macro=0.7077176087024944, total=15.4min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=1020, max_depth=-1, n_estimators=120, num_boost_round=100 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=1020, max_depth=-1, n_estimators=120, num_boost_round=100, f1_macro=0.6753002683568012, recall_macro=0.6720371588442169, precision_macro=0.7053562891017887, total=15.5min
[CV] boosting_type=gbdt, learning_rate=0.01, max_bin=1020, max_depth=-1, n_estimators=120, num_boost_round=100 
[CV]  boosting_type=gbdt, learning_rate=0.01, max_bin=1020, max_depth=-1, n_estimators=120, num_boost_round=100, f1_macro=0.6744149167365284, recall_macro=0.6709358764142386, precision_macro=0.7049127132094299, tota

[CV] boosting_type=gbdt, learning_rate=0.1, max_bin=255, max_depth=-1, n_estimators=80, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.1, max_bin=255, max_depth=-1, n_estimators=40, num_boost_round=100, f1_macro=0.7352427017085894, recall_macro=0.7290862139678577, precision_macro=0.758355438283389, total= 5.4min
[CV] boosting_type=gbdt, learning_rate=0.1, max_bin=255, max_depth=-1, n_estimators=80, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.1, max_bin=255, max_depth=-1, n_estimators=80, num_boost_round=50, f1_macro=0.7736446941420496, recall_macro=0.7667941325917531, precision_macro=0.7896155076640946, total=608.1min
[CV] boosting_type=gbdt, learning_rate=0.1, max_bin=255, max_depth=-1, n_estimators=80, num_boost_round=50 
[CV]  boosting_type=gbdt, learning_rate=0.1, max_bin=255, max_depth=-1, n_estimators=80, num_boost_round=50, f1_macro=0.7733933225756524, recall_macro=0.7674752181471561, precision_macro=0.7882087868337431, total=608.3min
[CV] boosti

KeyboardInterrupt: 

In [136]:
results = grid_lgb.cv_results_

In [127]:
results.keys()

dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_learning_rate', 'param_max_depth', 'param_n_estimators', 'params', 'split0_test_f1_macro', 'split1_test_f1_macro', 'split2_test_f1_macro', 'split3_test_f1_macro', 'split4_test_f1_macro', 'mean_test_f1_macro', 'std_test_f1_macro', 'rank_test_f1_macro', 'split0_train_f1_macro', 'split1_train_f1_macro', 'split2_train_f1_macro', 'split3_train_f1_macro', 'split4_train_f1_macro', 'mean_train_f1_macro', 'std_train_f1_macro', 'split0_test_recall_macro', 'split1_test_recall_macro', 'split2_test_recall_macro', 'split3_test_recall_macro', 'split4_test_recall_macro', 'mean_test_recall_macro', 'std_test_recall_macro', 'rank_test_recall_macro', 'split0_train_recall_macro', 'split1_train_recall_macro', 'split2_train_recall_macro', 'split3_train_recall_macro', 'split4_train_recall_macro', 'mean_train_recall_macro', 'std_train_recall_macro', 'split0_test_precision_macro', 'split1_test_precision_macro', 'split2_test_

In [128]:
results['params']

[{'learning_rate': 0.1, 'max_depth': 15, 'n_estimators': 20},
 {'learning_rate': 0.1, 'max_depth': 15, 'n_estimators': 30},
 {'learning_rate': 0.1, 'max_depth': 20, 'n_estimators': 20},
 {'learning_rate': 0.1, 'max_depth': 20, 'n_estimators': 30},
 {'learning_rate': 1, 'max_depth': 15, 'n_estimators': 20},
 {'learning_rate': 1, 'max_depth': 15, 'n_estimators': 30},
 {'learning_rate': 1, 'max_depth': 20, 'n_estimators': 20},
 {'learning_rate': 1, 'max_depth': 20, 'n_estimators': 30}]

In [129]:
results['rank_test_f1_macro']

array([8, 6, 7, 5, 3, 1, 4, 2], dtype=int32)

In [130]:
results['rank_test_precision_macro']

array([8, 6, 7, 5, 3, 1, 4, 2], dtype=int32)

In [131]:
results['rank_test_recall_macro']

array([8, 6, 7, 5, 3, 1, 4, 2], dtype=int32)

In [132]:
results['mean_test_f1_macro']

array([ 0.69596988,  0.71624106,  0.695998  ,  0.71652518,  0.76309248,
        0.78132051,  0.75962115,  0.77685855])

In [133]:
results['mean_test_precision_macro']

array([ 0.72427929,  0.74267536,  0.72430081,  0.74306245,  0.76779099,
        0.7854946 ,  0.7638772 ,  0.780947  ])

In [134]:
results['mean_test_recall_macro']

array([ 0.69151866,  0.71057472,  0.6915677 ,  0.71081677,  0.76055746,
        0.77889592,  0.75754852,  0.77462158])