In [1]:
# Imports
import os
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Set WESAD directory
directory = "C:/Users/Boris/Google Drive/Studie/Master/2020-2021-2A/Deep Learning/a2/data/WESAD/WESAD/"

# Create a list that contains a path for each pickle file that you want to use
pickle_paths = []
for root,dirs,files in os.walk(directory):
    for f in files:
        # Use endswith(".pkl") to get all files
        if f.endswith("S2.pkl"):
            pickle_paths.append(os.path.join(root, f))

In [3]:
# Load each pickle file
all_raw_data = []
for pickle in pickle_paths:
    all_raw_data.append(pd.read_pickle(pickle))

In [4]:
# Create a list for every modality (using lists here b/c it's a lot faster than dataframes)
subject_ID_list = []
label_list = []
ACC_list = []
ECG_list = []
EMG_list = []
EDA_list = []
Temp_list = []
Resp_list = []

for subject_data in all_raw_data:

    # Get this subject's ID
    subject_ID = subject_data["subject"]

    # Extract the chest data
    chest_data = subject_data["signal"]["chest"]

    # Set the total signal length for this subject
    signal_length = len(chest_data["ECG"])

    # Extract the signal values of each modality to its list
    for i in range(signal_length):
        subject_ID_list.append(subject_ID)
        label_list.append(all_raw_data[0]["label"][i])
        ACC_list.append(chest_data["ACC"][i])
        ECG_list.append(chest_data["ECG"][i][0])
        EMG_list.append(chest_data["EMG"][i][0])
        EDA_list.append(chest_data["EDA"][i][0])
        Temp_list.append(chest_data["Temp"][i][0])
        Resp_list.append(chest_data["Resp"][i][0])
    

In [5]:
# Create a dataframe using the lists
result = pd.DataFrame(
    {"Subject_ID" : subject_ID_list,
     "Label" : label_list,
     "Chest_ACC" : ACC_list,
     "Chest_ECG" : ECG_list,
     "Chest_EMG" : EMG_list,
     "Chest_EDA" : EDA_list,
     "Chest_Temp" : Temp_list,
     "Chest_Resp" : Resp_list
    })

# Restrict the data to the baseline condition
result = result[result["Label"] == 1].reset_index(drop = True)
result.tail()

Unnamed: 0,Subject_ID,Label,Chest_ACC,Chest_ECG,Chest_EMG,Chest_EDA,Chest_Temp,Chest_Resp
800795,S2,1,"[0.7148000001907349, 0.07579994201660156, -0.0...",0.308167,0.016617,1.204681,29.716492,-1.144409
800796,S2,1,"[0.714400053024292, 0.06700003147125244, -0.06...",0.33284,-0.00174,1.197052,29.762756,-1.118469
800797,S2,1,"[0.7145999670028687, 0.06420004367828369, -0.0...",0.359528,-0.005814,1.200104,29.715027,-1.078796
800798,S2,1,"[0.7244000434875488, 0.06060004234313965, -0.0...",0.38768,-0.001602,1.190948,29.717896,-1.025391
800799,S2,1,"[0.7281999588012695, 0.05060005187988281, -0.0...",0.415009,-0.028244,1.198959,29.717896,-0.996399


In [12]:
# Create the train/test split
first80 = int(0.8 * len(result))
last20 = len(result) - first80

train = result.head(first80)
test = result.tail(last20)

800800
640640
160160
800800


In [7]:
# OPTIONAL: Select the first n rows
# result = result.head(100000)

# Export the dataframe to csv
train.to_csv("datasets/subject2_baseline_train.csv", index=True)

# Export the dataframe to csv
test.to_csv("datasets/subject2_baseline_test.csv", index=True)

KeyboardInterrupt: 