In [1]:
import os
import numpy as np
import pickle
import h5py
import scipy.signal as sps
import scipy.interpolate as spi
import matplotlib.pyplot as plt
import pandas as pd
import time
from datetime import datetime
from feature_editing import FeatureDesigner
from dataset_parsing import DataInfo, DataProducer
import xgboost as xgb


In [2]:
features = [ 
    'chestACCMean',
    'chestACCStd',
    # 'chestACC0Mean',
    # 'chestACC0Std',
    # 'chestACC0Freq',
    # 'chestACC1Mean',
    # 'chestACC1Std',
    # 'chestACC1Freq',
    # 'chestACC2Mean',
    # 'chestACC2Std',
    # 'chestACC2Freq',
    'chestECGMeanHR',
    'chestECGStdHR',
    'chestECGMeanHRV',
    'chestECGStdHRV',
    # 'chestECGTINN',
    # 'chestECGRMS',
    'chestEMGMean',
    'chestEMGStd',
    'chestEMGFq',
    'chestEDAMean',
    # 'chestEDAMax',
    # 'chestEDAMin',
    # 'chestEDADyn',
    # 'chestTempMean',
    # 'chestTempMax',
    # 'chestTempMin',
    # 'chestTempDyn',
    # 'chestResp',
    'wristACCMean',
    # 'wristACCMax',
    # 'wristACCMin',
    'wristBVPMeanHR',
    # 'wristBVPStdHR',
    'wristBVPMeanHRV',
    # 'wristBVPStdHRV',
    # 'wristBVPTINN',
    # 'wristBVPRMS',
    'wristEDAMean',
    # 'wristEDAMax',
    # 'wristEDAMin',
    # 'wristEDADyn',
    'wristTEMPMean',
    # 'wristTEMPMax',
    # 'wristTEMPMin',
    # 'wristTEMPDyn'
    ]
f = h5py.File('../data/formatted_data_feat.h5', 'r')
test_subjects = ['S2']
train_subjects = [sub for sub in f.keys() if sub.startswith('S') and sub not in test_subjects]

train_arr = np.array([]).reshape(0, len(features))
test_arr = np.array([]).reshape(0, len(features))
train_labels = np.array([])
test_labels = np.array([])

for sub in test_subjects:
    sub_test_arr = np.array([f[sub][ftr][:] for ftr in features]).T
    test_arr = np.concatenate((test_arr, sub_test_arr))
    test_labels = np.concatenate((test_labels, f[sub]['label'][:]))
for sub in train_subjects[:len(train_subjects)//2]:
    sub_train_arr = np.array([f[sub][ftr][:] for ftr in features]).T
    train_arr = np.concatenate((train_arr, sub_train_arr))
    train_labels = np.concatenate((train_labels, f[sub]['label'][:]))

print("train arr size", train_arr.shape)
print("test arr size", test_arr.shape)
print("train labels size", train_labels.shape)
print("test labels size", test_labels.shape)


train arr size (27031900, 15)
test arr size (4255300, 15)
train labels size (27031900,)
test labels size (4255300,)


In [5]:
x_train = xgb.DMatrix(train_arr, label=[1 if label==2 else 0 for label in train_labels])
x_test = xgb.DMatrix(test_arr, label=[1 if label==2 else 0 for label in test_labels])
num_round = 25 
param = {'max_depth': 2, 
         'eta': 1, 
         'objective': 'binary:logistic', 
         'nthread': 4,
         'eval_metric': 'auc',
         'tree_method': 'gpu_hist'}

evallist = [(x_train, 'eval'), (x_test, 'train')]
bst = xgb.train(param, x_train, num_round, evallist)

[0]	eval-auc:0.83913	train-auc:0.49623
[1]	eval-auc:0.93823	train-auc:0.49010
[2]	eval-auc:0.96515	train-auc:0.66124
[3]	eval-auc:0.98245	train-auc:0.65896
[4]	eval-auc:0.98619	train-auc:0.35782
[5]	eval-auc:0.98933	train-auc:0.55758
[6]	eval-auc:0.99098	train-auc:0.57572
[7]	eval-auc:0.99317	train-auc:0.73798
[8]	eval-auc:0.99415	train-auc:0.73387
[9]	eval-auc:0.99487	train-auc:0.83868
[10]	eval-auc:0.99605	train-auc:0.80949
[11]	eval-auc:0.99677	train-auc:0.81232
[12]	eval-auc:0.99747	train-auc:0.81232
[13]	eval-auc:0.99775	train-auc:0.85635
[14]	eval-auc:0.99827	train-auc:0.81220
[15]	eval-auc:0.99854	train-auc:0.77713
[16]	eval-auc:0.99859	train-auc:0.79215
[17]	eval-auc:0.99880	train-auc:0.80219
[18]	eval-auc:0.99892	train-auc:0.80133
[19]	eval-auc:0.99905	train-auc:0.81668
[20]	eval-auc:0.99916	train-auc:0.81668
[21]	eval-auc:0.99921	train-auc:0.81918
[22]	eval-auc:0.99929	train-auc:0.85392
[23]	eval-auc:0.99933	train-auc:0.82549
[24]	eval-auc:0.99938	train-auc:0.78932


In [2]:
features = [ 
    'chestACCMean',
    'chestACCStd',
    # 'chestACC0Mean',
    # 'chestACC0Std',
    # 'chestACC0Freq',
    # 'chestACC1Mean',
    # 'chestACC1Std',
    # 'chestACC1Freq',
    # 'chestACC2Mean',
    # 'chestACC2Std',
    # 'chestACC2Freq',
    'chestECGMeanHR',
    # 'chestECGStdHR',
    'chestECGMeanHRV',
    # 'chestECGStdHRV',
    # 'chestECGTINN',
    # 'chestECGRMS',
    'chestEMGMean',
    'chestEMGStd',
    'chestEMGFq',
    'chestEDAMean',
    # 'chestEDAMax',
    # 'chestEDAMin',
    # 'chestEDADyn',
    # 'chestTempMean',
    # 'chestTempMax',
    # 'chestTempMin',
    # 'chestTempDyn',
    # 'chestResp',
    'wristACCMean',
    # 'wristACCMax',
    # 'wristACCMin',
    'wristBVPMeanHR',
    # 'wristBVPStdHR',
    'wristBVPMeanHRV',
    # 'wristBVPStdHRV',
    # 'wristBVPTINN',
    # 'wristBVPRMS',
    'wristEDAMean',
    # 'wristEDAMax',
    # 'wristEDAMin',
    # 'wristEDADyn',
    'wristTEMPMean',
    # 'wristTEMPMax',
    # 'wristTEMPMin',
    # 'wristTEMPDyn'
    ]
f = h5py.File('../data/formatted_data_feat.h5', 'r')
test_subjects = ['S2', 'S3']
train_subjects = [sub for sub in f.keys() if sub.startswith('S') and sub not in test_subjects]

train_arr = np.array([]).reshape(0, len(features))
test_arr = np.array([]).reshape(0, len(features))
train_labels = np.array([])
test_labels = np.array([])

for sub in test_subjects:
    sub_test_arr = np.array([f[sub][ftr][:] for ftr in features]).T
    test_arr = np.concatenate((test_arr, sub_test_arr))
    test_labels = np.concatenate((test_labels, f[sub]['label'][:]))
x_test = xgb.DMatrix(test_arr, label=[1 if label==2 else 0 for label in test_labels])
for sub in train_subjects[:10]:
# for sub in train_subjects[:len(train_subjects)//2]:
    sub_train_arr = np.array([f[sub][ftr][:] for ftr in features]).T
    train_arr = np.concatenate((train_arr, sub_train_arr))
    train_labels = np.concatenate((train_labels, f[sub]['label'][:]))
del sub_train_arr
del sub_test_arr
print("train arr size", train_arr.shape)
print("test arr size", test_arr.shape)
print("train labels size", train_labels.shape)
print("test labels size", test_labels.shape)

x_train = xgb.DMatrix(train_arr, label=[1 if label==2 else 0 for label in train_labels])
num_round = 10 
param = {'max_depth': 2, 
        'eta': 1, 
        'objective': 'binary:logistic', 
        'nthread': 4,
        'eval_metric': 'auc',
        'tree_method': 'gpu_hist'}

evallist = [(x_train, 'eval'), (x_test, 'train')]
bst = xgb.train(param, x_train, num_round, evallist)



train arr size (40858300, 13)
test arr size (8800400, 13)
train labels size (40858300,)
test labels size (8800400,)
[0]	eval-auc:0.77139	train-auc:0.47664
[1]	eval-auc:0.92526	train-auc:0.75161
[2]	eval-auc:0.95159	train-auc:0.75822
[3]	eval-auc:0.96569	train-auc:0.73047
[4]	eval-auc:0.97837	train-auc:0.70257
[5]	eval-auc:0.98233	train-auc:0.77965
[6]	eval-auc:0.98623	train-auc:0.76724
[7]	eval-auc:0.99035	train-auc:0.75961
[8]	eval-auc:0.99168	train-auc:0.77757
[9]	eval-auc:0.99287	train-auc:0.80711


In [2]:
import treelite
model = treelite.Model.from_xgboost(bst)

TreeliteError: std::bad_alloc