In [1]:
import pickle

with open('WESAD/S2/S2.pkl', 'rb') as f:
    dataF = pickle.load(f, encoding='latin1')

In [2]:
len(dataF["label"])

4255300

In [3]:
len(dataF['signal']["wrist"]["TEMP"])

24316

In [4]:
dataF['signal']["wrist"]["BVP"]

array([[-59.37],
       [-53.42],
       [-44.4 ],
       ...,
       [ 18.26],
       [ 18.68],
       [ 19.71]])

In [6]:
dataF

{'signal': {'chest': {'ACC': array([[ 0.95539999, -0.222     , -0.55799997],
          [ 0.92579997, -0.2216    , -0.55379999],
          [ 0.90820003, -0.21960002, -0.53920001],
          ...,
          [ 0.87179995, -0.12379998, -0.30419999],
          [ 0.87300003, -0.12339997, -0.30260003],
          [ 0.87020004, -0.12199998, -0.30220002]]),
   'ECG': array([[ 0.02142334],
          [ 0.02032471],
          [ 0.01652527],
          ...,
          [-0.00544739],
          [ 0.00013733],
          [ 0.0040741 ]]),
   'EMG': array([[-0.00444031],
          [ 0.00434875],
          [ 0.00517273],
          ...,
          [-0.01716614],
          [-0.02897644],
          [-0.02357483]]),
   'EDA': array([[5.25054932],
          [5.26733398],
          [5.24330139],
          ...,
          [0.36048889],
          [0.36582947],
          [0.365448  ]]),
   'Temp': array([[30.120758],
          [30.129517],
          [30.138214],
          ...,
          [31.459229],
          [31.484283

In [45]:
from pandas import read_csv
import numpy as np
from scipy import stats
import pickle

data_dir = 'WESAD/'
LABEL_SF = 700
ACC_SF = 32
BVP_SF = 64
EDA_SF = 4
TEMP_SF = 4
SF_dict = {'ACC':ACC_SF, 'BVP':BVP_SF, 'EDA':EDA_SF, 'TEMP':TEMP_SF}
features = ['ACC','BVP','EDA','TEMP']
baseline_label = 1
stress_label = 2
meditation_label = 4
invalid_labels = [0, 3, 5, 6, 7]

def get_subject_data(subject):
    with open(data_dir+'S'+subject+'/S'+subject+'.pkl', 'rb') as file:
        data = pickle.load(file, encoding='latin1')

    data_labels = data['label']
    data = data['signal']['wrist']
    window_labels = create_labels(data_labels, LABEL_SF)
    mask = [x for x in range(len(window_labels)) if window_labels[x] not in invalid_labels]
    valid_labels = window_labels[mask]

    for feat in features:
        data[feat] = create_windows(data[feat],SF_dict[feat])
        data[feat] = [data[feat][x] for x in mask]

    med_mask = [x for x in range(len(valid_labels)) if valid_labels[x] == meditation_label]
    valid_labels[med_mask] = baseline_label
    valid_labels -= 1

    final_data = dict()
    final_data['data'] = {'ACC' : data['ACC'], 'BVP' : data['BVP'],
            'EDA' : data['EDA'], 'TEMP' : data['TEMP']}
    final_data['labels'] = valid_labels

    return final_data

def get_all_subjects():
    data = {'data': {'ACC':np.empty((0,ACC_SF,3)), 'BVP':np.empty((0,BVP_SF,1)),
            'EDA':np.empty((0,EDA_SF,1)), 'TEMP':np.empty((0,TEMP_SF,1))},
                'labels':[]}

    for x in range(2,18):
        if x != 12:
            temp = get_subject_data(str(x))
            for i in features:
                data['data'][i] = np.append(data['data'][i],temp['data'][i],0)
                data['labels'] = np.append(data['labels'],temp['labels'],0)
    return data

def norm(data):
    normalized_data = (data - np.mean(np.mean(data,0),0))/np.std(np.std(data,0),0)
    return normalized_data

def create_labels(all_labels, SF):
    labels = []
    for x in range(0, len(all_labels) - SF//2, SF//2):
        labels.append(all_labels[x])
    return np.array(labels)

def create_windows(data, data_SF, window_size=1):
    data_windows = []
    for x in range(0, len(data) - window_size * (data_SF//2), window_size * (data_SF//2)):
        data_windows.append(data[x : window_size * data_SF + x])

    return data_windows

def save_data(path, data):
    with open(path, 'wb') as file:
        pickle.dump(data, file)

def save_formatted_data(path):
    data = {'data': {'ACC':np.empty((0,ACC_SF,3)), 'BVP':np.empty((0,BVP_SF,1)),
            'EDA':np.empty((0,EDA_SF,1)), 'TEMP':np.empty((0,TEMP_SF,1))},
                'labels':[], 'id':[]}
    for x in range(2, 18):
        if x != 12:
            cur = get_subject_data(str(x))
            for i in features:
                cur['data'][i] = norm(cur['data'][i])

            save_data(path+'S'+str(x)+'.pkl', cur)
            for i in features:
                data['data'][i] = np.append(data['data'][i], cur['data'][i],0)
            data['labels'] = np.append(data['labels'], cur['labels'],0)
            data['id'] = np.append(data['id'], np.full(cur['labels'].size, str(x)), 0)
    save_data(path+'All_ID.pkl', data)

In [46]:
save_formatted_data("WESAD2/")

In [47]:
with open('WESAD2/All_ID.pkl', 'rb') as file:
        data = pickle.load(file, encoding='latin1')

In [48]:
data

{'data': {'ACC': array([[[  126.32624263, -1012.3755119 ,  1296.16359523],
          [  233.10128359,  -738.13483315,   636.76661552],
          [  411.0596852 ,  -738.13483315,  1024.64719182],
          ...,
          [  138.19013607,  -772.414918  ,  1024.64719182],
          [   78.87066887,  -772.414918  ,  1024.64719182],
          [   67.00677543,  -601.01449378,  1218.58747997]],
  
         [[   90.73456231,  -669.57466347,   636.76661552],
          [   78.87066887,  -635.29457863,   636.76661552],
          [  114.46234919,  -601.01449378,   753.13078841],
          ...,
          [  -16.04047865,  -635.29457863,  1179.79942234],
          [  -16.04047865,  -601.01449378,  1141.01136471],
          [  -27.90437209,  -635.29457863,  1218.58747997]],
  
         [[  -27.90437209,  -498.17423926,  1218.58747997],
          [  -63.49605241,  -498.17423926,  1179.79942234],
          [  -27.90437209,  -498.17423926,  1102.22330708],
          ...,
          [  -27.90437209,  -669

In [49]:
len(data["id"])

78766

In [50]:
len(data["data"]["ACC"])

78766

In [51]:
len(data["data"]["BVP"])

78766

In [52]:
len(data["data"]["EDA"])

78766

In [53]:
len(data["data"]["TEMP"])

78766