In [1]:
import os
import numpy as np
import pickle
import h5py
import scipy.signal as sps
import scipy.interpolate as spi
import matplotlib.pyplot as plt
import pandas as pd
import time
from datetime import datetime
from feature_editing import FeatureDesigner
from dataset_parsing import DataInfo, DataProducer
import xgboost as xgb


In [2]:
features = [ 
    'chestACCMean',
    'chestACCStd',
    # 'chestACC0Mean',
    # 'chestACC0Std',
    # 'chestACC0Freq',
    # 'chestACC1Mean',
    # 'chestACC1Std',
    # 'chestACC1Freq',
    # 'chestACC2Mean',
    # 'chestACC2Std',
    # 'chestACC2Freq',
    'chestECGMeanHR',
    'chestECGStdHR',
    'chestECGMeanHRV',
    'chestECGStdHRV',
    # 'chestECGTINN',
    # 'chestECGRMS',
    'chestEMGMean',
    'chestEMGStd',
    'chestEMGFq',
    'chestEDAMean',
    # 'chestEDAMax',
    # 'chestEDAMin',
    # 'chestEDADyn',
    'chestTempMean',
    # 'chestTempMax',
    # 'chestTempMin',
    # 'chestTempDyn',
    # 'chestResp',
    'wristACCMean',
    'wristACCMax',
    'wristACCMin',
    'wristBVPMeanHR',
    'wristBVPStdHR',
    'wristBVPMeanHRV',
    'wristBVPStdHRV',
    # 'wristBVPTINN',
    # 'wristBVPRMS',
    'wristEDAMean',
    # 'wristEDAMax',
    # 'wristEDAMin',
    # 'wristEDADyn',
    'wristTEMPMean',
    # 'wristTEMPMax',
    # 'wristTEMPMin',
    # 'wristTEMPDyn'
    ]
f = h5py.File('../data/formatted_data_feat.h5', 'r')
test_subjects = ['S2']
train_subjects = [sub for sub in f.keys() if sub.startswith('S') and sub not in test_subjects]

train_arr = np.array([]).reshape(0, len(features))
test_arr = np.array([]).reshape(0, len(features))
train_labels = np.array([])
test_labels = np.array([])

for sub in train_subjects:
    sub_train_arr = np.array([f[sub][ftr][:] for ftr in features]).T
    train_arr = np.concatenate((train_arr, sub_train_arr))
    train_labels = np.concatenate((train_labels, f[sub]['label'][:]))
for sub in test_subjects:
    sub_test_arr = np.array([f[sub][ftr][:] for ftr in features]).T
    test_arr = np.concatenate((test_arr, sub_test_arr))
    test_labels = np.concatenate((test_labels, f[sub]['label'][:]))

print("train arr size", train_arr.shape)
print("test arr size", test_arr.shape)
print("train labels size", train_labels.shape)
print("test labels size", test_labels.shape)


train arr size (18371500, 20)
test arr size (4255300, 20)
train labels size (18371500,)
test labels size (4255300,)


In [3]:
x_train = xgb.DMatrix(train_arr, label=[1 if label==2 else 0 for label in train_labels])
x_test = xgb.DMatrix(test_arr, label=[1 if label==2 else 0 for label in test_labels])
num_round = 10
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
param['nthread'] = 4
param['eval_metric'] = 'auc'
evallist = [(x_train, 'eval'), (x_test, 'train')]
bst = xgb.train(param, x_train, num_round, evallist)

[0]	eval-auc:0.85531	train-auc:0.62182
[1]	eval-auc:0.96806	train-auc:0.63936
[2]	eval-auc:0.98377	train-auc:0.54903
[3]	eval-auc:0.98858	train-auc:0.69259
[4]	eval-auc:0.99231	train-auc:0.73375
[5]	eval-auc:0.99395	train-auc:0.73375
[6]	eval-auc:0.99651	train-auc:0.73375
[7]	eval-auc:0.99738	train-auc:0.83836
[8]	eval-auc:0.99799	train-auc:0.83472
[9]	eval-auc:0.99862	train-auc:0.81225
