In [85]:
import os
import pickle
import numpy as np
import pandas as pd
from glob import glob
from scipy import signal
from statistics import mode
from window_slider import Slider
import matplotlib

In [86]:
os.getcwd()

'/home/taehoon/cs565/modeling'

# Save BVP, ACC, TEMP from wrist-sensors

In [87]:
WESAD_dir = './data/WESAD/'

In [113]:
subject_dir = glob(WESAD_dir + 'S*')
os.mkdir("./data/wrist-only")

In [None]:
for dir in subject_dir:
    pkl_file = dir + "/" + dir.split('/')[-1] + ".pkl"
    with open(pkl_file, 'rb') as f:
        data = pickle.load(f, encoding='latin1')
    del data['signal']['chest']
    del data['signal']['wrist']['EDA']
    data['signal'] = data['signal']['wrist']
    
    with open(f"./data/wrist-only/{dir.split('/')[-1]}.pkl", 'wb') as f:
        pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)

# Windowing & feature extraction
### The features of each sensor

In [90]:
class BVP:
    def __init__(self):
        self.features = self.build_features()
    def build_features(self):
        feat = {"min": [],
                "max": [],
                "mean": [],
                "std": [],
                "n_peak": [],
                "peak_f": [],
                "weight_amp_avg": [],
                "weight_energy_avg": [],
                "power_entropy": []}
        return feat
        
class TEMP:
    def __init__(self):
        self.features = self.build_features()
    def build_features(self):
        feat = {"min": [],
                "max": [],
                "mean": [],
                "std": []}
        return feat
        
class ACC:
    def __init__(self):        
        self.axis = {"x": self.build_features(),
                     "y": self.build_features(),
                     "z": self.build_features()}
        self.mag = []
    def build_features(self):
        feat = {"min": [],
                "max": [],
                "mean": [],
                "std": []}
        return feat

### Extract features

In [108]:
hz = {"BVP": 64, "TEMP": 4, "ACC": 32, "GT": 700}
overlap_rate = 0.5
window_size = 30

In [109]:
max_beat_per_sec = 200 / 60
min_sec_per_beat = 1 / max_beat_per_sec
dist = int(min_sec_per_beat * hz['BVP'])

In [110]:
pkl_files = glob('./data/wrist-only/S*')

In [111]:
concat_all_subject = []
for file in pkl_files:
    with open(file, 'rb') as f:
        data = pickle.load(f, encoding='latin1')
    if data['subject'] not in ['S14', 'S15', 'S16', 'S17']: continue
    df = pd.DataFrame()
    for sensor, val in data['signal'].items():
        if sensor == "ACC": continue
        if sensor == "BVP": obj_sensor = BVP()
        if sensor == "TEMP": obj_sensor = TEMP()

        height = val.flatten().mean()
        bucket_size = window_size * hz[sensor]
        overlap_count = int(bucket_size * overlap_rate)
        slider = Slider(bucket_size, overlap_count)
        val = val.flatten() if val.shape[1] == 1 else val.transpose()
        slider.fit(val)
        while True:
            window_data = slider.slide()
            if slider.reached_end_of_list(): break
            # do your stuff
            obj_sensor.features["min"].append(window_data.min())
            obj_sensor.features["max"].append(window_data.max())
            obj_sensor.features["mean"].append(window_data.mean())
            obj_sensor.features["std"].append(window_data.std())
            if sensor == "BVP":
                peaks = signal.find_peaks(window_data, height=height, distance=dist)[0]
                n_peak = len(peaks) / window_size
                obj_sensor.features["n_peak"].append(n_peak)
    
        df[f'{sensor}_min'] = obj_sensor.features["min"]
        df[f'{sensor}_max'] = obj_sensor.features["max"]
        df[f'{sensor}_mean'] = obj_sensor.features["mean"]
        df[f'{sensor}_std'] = obj_sensor.features["std"]
        if sensor == "BVP":
            df[f'{sensor}_n_peak'] = obj_sensor.features["n_peak"]
        
    bucket_size = window_size * hz["GT"]
    overlap_count = int(bucket_size * overlap_rate)
    slider = Slider(bucket_size, overlap_count)
    slider.fit(data['label'].flatten())
    gt_list = []
    while True:
        window_data = slider.slide()
        if slider.reached_end_of_list(): break
        gt_list.append(mode(window_data))
    df['labels'] = gt_list
    df = df.query('labels == 1 or labels == 2')
    df['id'] = data['subject']
    concat_all_subject.append(df)

### Save dataset

In [112]:
df_concat = pd.concat(concat_all_subject, axis=0, ignore_index=True)
df_concat.to_csv(f"./data/processed_w{window_size}_o{str(overlap_rate).replace('0.', '')}_comp_npeak.csv", index=False)
# df_concat.to_csv(f"./data/processed_w{window_size}_o{str(overlap_rate).replace('0.', '')}.csv", index=False)
df_concat.head()

Unnamed: 0,BVP_min,BVP_max,BVP_mean,BVP_std,BVP_n_peak,TEMP_min,TEMP_max,TEMP_mean,TEMP_std,labels,id
0,-117.07,126.23,0.054844,38.785298,1.233333,31.99,32.05,32.020417,0.016703,1,S14
1,-210.57,170.49,-0.225891,48.459218,1.133333,31.99,32.09,32.030667,0.023725,1,S14
2,-210.57,170.49,-0.789958,44.565724,1.133333,32.0,32.09,32.057417,0.027884,1,S14
3,-117.63,161.67,-0.182208,38.299312,1.133333,32.05,32.13,32.082167,0.015978,1,S14
4,-117.63,161.67,0.571495,38.217082,1.1,32.05,32.13,32.0905,0.015158,1,S14


# Check validity

In [106]:
df_concat = pd.concat(concat_all_subject, axis=0, ignore_index=True)
df_concat['labels'] = df_concat['labels'].replace([1,2,3], ['non-stress', 'stress', 'non-stress'])

In [107]:
df_concat.groupby(['id', 'labels']).mean()[['BVP_n_peak', 'TEMP_mean']]

Unnamed: 0_level_0,Unnamed: 1_level_0,BVP_n_peak,TEMP_mean
id,labels,Unnamed: 2_level_1,Unnamed: 3_level_1
S10,non-stress,1.575214,33.709771
S10,stress,1.655782,33.436357
S11,non-stress,1.264979,34.266667
S11,stress,1.564444,33.085311
S13,non-stress,1.383122,34.809348
S13,stress,1.44697,33.632693
S14,non-stress,1.209705,32.175703
S14,stress,1.79037,32.89427
S15,non-stress,1.411538,30.001259
S15,stress,1.452899,30.137127
