# Experiment 02: BCI (GPU version)

This experiment uses a Brain Computer Interface dataset. The purpose is to try and predict when the participant is paying attention. The dataset consists of recordings from a number of electrodes placed over the scalp.

The details of the machine we used and the version of the libraries can be found in [experiment 01](01_airline.ipynb).

In [1]:
import json
import sys
import warnings
import numpy as np
import pandas as pd
import pkg_resources
from libs.loaders import load_bci
from libs.timer import Timer
from libs.metrics import classification_metrics_binary, classification_metrics_binary_prob, binarize_prediction
import xgboost as xgb
import lightgbm as lgb

warnings.filterwarnings('ignore')

print("System version: {}".format(sys.version))
print("XGBoost version: {}".format(pkg_resources.get_distribution('xgboost').version))
print("LightGBM version: {}".format(pkg_resources.get_distribution('lightgbm').version))

Using TensorFlow backend.


System version: 3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
XGBoost version: 0.6
LightGBM version: 0.2


## Data loading and management


The dataset has been preprepared by extracting 800ms epochs from each channel. The data was then lowpass filtered at 18Hz and downsampled by a factor of 6. This results is a feature vector of 2048. 

In [2]:
%%time
X, y, X_test, y_test = load_bci()

INFO:libs.loaders:MOUNT_POINT not found in environment. Defaulting to /fileshare


CPU times: user 2.1 s, sys: 472 ms, total: 2.57 s
Wall time: 18.9 s


In [3]:
X_train = np.concatenate(X)
y_train = np.concatenate(y)
X_test = np.concatenate(X_test)
y_test = np.concatenate(y_test)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(14519, 2048)
(14519,)
(5978, 2048)
(5978,)


In [4]:
dtrain = xgb.DMatrix(data=X_train, label=y_train)
dtest = xgb.DMatrix(data=X_test, label=y_test)

In [6]:
lgb_train = lgb.Dataset(X_train, y_train, free_raw_data=False)
lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train, free_raw_data=False)

### XGBoost

In [7]:
results_dict = dict()
num_rounds = 100

In [8]:
params = {'max_depth':3, 
          'objective':'binary:logistic', 
          'min_child_weight':1, 
          'eta':0.1, 
          'colsample_bytree':1, 
          'scale_pos_weight':2, 
          'gamma':0.1, 
          'reg_lamda':1, 
          'subsample':1,
          'tree_method':'exact', 
          'updater':'grow_gpu'
          }


In [13]:
with Timer() as t_train:
    xgb_clf_pipeline = xgb.train(params, dtrain, num_boost_round=num_rounds)
    
with Timer() as t_test:
    y_prob_xgb = xgb_clf_pipeline.predict(dtest)

In [14]:
y_pred_xgb = binarize_prediction(y_prob_xgb)

In [15]:
report_xgb = classification_metrics_binary(y_test, y_pred_xgb)
report2_xgb = classification_metrics_binary_prob(y_test, y_prob_xgb)
report_xgb.update(report2_xgb)

In [17]:
results_dict['xgb']={
    'train_time': t_train.interval,
    'test_time': t_test.interval,
    'performance': report_xgb 
}

In [18]:
del xgb_clf_pipeline

Now let's try with XGBoost histogram.

In [19]:
params = {'max_depth':0, 
          'objective':'binary:logistic', 
          'min_child_weight':1, 
          'eta':0.1, 
          'colsample_bytree':0.80, 
          'scale_pos_weight':2, 
          'gamma':0.1, 
          'reg_lamda':1, 
          'subsample':1,
          'tree_method':'hist', 
          'max_leaves':2**3, 
          'grow_policy':'lossguide', 
         }


In [20]:
with Timer() as t_train:
    xgb_hist_clf_pipeline = xgb.train(params, dtrain, num_boost_round=num_rounds)
    
with Timer() as t_test:
    y_prob_xgb_hist = xgb_hist_clf_pipeline.predict(dtest)

In [21]:
y_pred_xgb_hist = binarize_prediction(y_prob_xgb_hist)

In [22]:
report_xgb_hist = classification_metrics_binary(y_test, y_pred_xgb_hist)
report2_xgb_hist = classification_metrics_binary_prob(y_test, y_prob_xgb_hist)
report_xgb_hist.update(report2_xgb_hist)

In [23]:
results_dict['xgb_hist']={
    'train_time': t_train.interval,
    'test_time': t_test.interval,
    'performance': report_xgb_hist
}

In [24]:
del xgb_hist_clf_pipeline

### LightGBM

In [25]:
params = {'num_leaves': 2**3,
         'learning_rate': 0.1,
         'scale_pos_weight': 2,
         'min_split_gain': 0.1,
         'min_child_weight': 1,
         'reg_lambda': 1,
         'subsample': 1,
         'objective':'binary',
         'task': 'train'
         }

In [30]:
with Timer() as t_train:
    lgbm_clf_pipeline = lgb.train(params, lgb_train, num_boost_round=num_rounds)
    
with Timer() as t_test:
    y_prob_lgbm = lgbm_clf_pipeline.predict(X_test)

In [31]:
y_pred_lgbm = binarize_prediction(y_prob_lgbm)

In [32]:
report_lgbm = classification_metrics_binary(y_test, y_pred_lgbm)
report2_lgbm = classification_metrics_binary_prob(y_test, y_prob_lgbm)
report_lgbm.update(report2_lgbm)

In [33]:
results_dict['lgbm']={
    'train_time': t_train.interval,
    'test_time': t_test.interval,
    'performance': report_lgbm 
}

In [34]:
del lgbm_clf_pipeline

Finally, we show the results

In [35]:
# Results
print(json.dumps(results_dict, indent=4, sort_keys=True))

{
    "lgbm": {
        "performance": {
            "AUC": 0.7714542124542124,
            "Accuracy": 0.8813984610237537,
            "F1": 0.13851761846901578,
            "Precision": 0.6,
            "Recall": 0.0782967032967033
        },
        "test_time": 0.009907090001433971,
        "train_time": 2.7659428379993187
    },
    "xgb": {
        "performance": {
            "AUC": 0.7716584249084248,
            "Accuracy": 0.8798929407828705,
            "F1": 0.09343434343434343,
            "Precision": 0.578125,
            "Recall": 0.050824175824175824
        },
        "test_time": 0.0064387769998575095,
        "train_time": 12.934047714998997
    },
    "xgb_hist": {
        "performance": {
            "AUC": 0.7736170852956569,
            "Accuracy": 0.8805620608899297,
            "F1": 0.12068965517241378,
            "Precision": 0.5833333333333334,
            "Recall": 0.0673076923076923
        },
        "test_time": 0.00308577800024068,
        "train_time