# Importing Libariries

In [1]:
import os
import pickle
import pandas as pd
import numpy as np

# Processing the data

In [3]:
data_path = 'WESAD/WESAD'
subjects = [f for f in os.listdir(data_path) if f.startswith('S')]
data_frames = []

for subject in subjects:
    subject_path = os.path.join(data_path, subject, f'{subject}.pkl')
    with open(subject_path, 'rb') as file:
        data = pickle.load(file, encoding='latin1')

    chest_data = data['signal']['chest']
    wrist_data = data['signal']['wrist']
    stress = data['label']

    chest_sensor_dfs = []
    for sensor_name, sensor_values in chest_data.items():
        sensor_array = np.array(sensor_values)
        if sensor_array.ndim == 1:
            sensor_array = sensor_array.reshape(-1, 1)
        num_axes = sensor_array.shape[1]
        col_names = [f'chest_{sensor_name}_{i}' for i in range(num_axes)]
        chest_sensor_df = pd.DataFrame(sensor_array, columns=col_names)
        chest_sensor_dfs.append(chest_sensor_df)
    chest_df = pd.concat(chest_sensor_dfs, axis=1)

    wrist_sensor_dfs = []
    for sensor_name, sensor_values in wrist_data.items():
        sensor_array = np.array(sensor_values)
        if sensor_array.ndim == 1:
            sensor_array = sensor_array.reshape(-1, 1)
        num_axes = sensor_array.shape[1]
        col_names = [f'wrist_{sensor_name}_{i}' for i in range(num_axes)]
        wrist_sensor_df = pd.DataFrame(sensor_array, columns=col_names)
        wrist_sensor_dfs.append(wrist_sensor_df)
    wrist_df = pd.concat(wrist_sensor_dfs, axis=1)

    subject_df = pd.concat([chest_df, wrist_df], axis=1)
    subject_df['stress'] = stress
    subject_df['subject'] = subject
    subject_df = subject_df.head(10000)
    data_frames.append(subject_df)

data_df = pd.concat(data_frames, ignore_index=True)
data_df.to_csv('data.csv', index=False)

# Data Pre-processing

In [7]:
df = pd.read_csv('data.csv')
print(f"Rows: {data_df.shape[0]}, Columns: {data_df.shape[1]}")

Rows: 150000, Columns: 16


In [9]:
df.drop(columns=['stress'], inplace=True)
print(df.isnull().sum())

chest_ACC_0     0
chest_ACC_1     0
chest_ACC_2     0
chest_ECG_0     0
chest_EMG_0     0
chest_EDA_0     0
chest_Temp_0    0
chest_Resp_0    0
wrist_ACC_0     0
wrist_ACC_1     0
wrist_ACC_2     0
wrist_BVP_0     0
wrist_EDA_0     0
wrist_TEMP_0    0
subject         0
dtype: int64


In [11]:
df.head()

Unnamed: 0,chest_ACC_0,chest_ACC_1,chest_ACC_2,chest_ECG_0,chest_EMG_0,chest_EDA_0,chest_Temp_0,chest_Resp_0,wrist_ACC_0,wrist_ACC_1,wrist_ACC_2,wrist_BVP_0,wrist_EDA_0,wrist_TEMP_0,subject
0,1.1278,0.152,0.3416,-1.333694,-0.013687,0.716019,33.69586,0.213623,107.0,-105.0,127.0,10.17,0.349215,33.13,S10
1,1.0932,0.1888,0.2922,-1.327744,-0.021927,0.714493,33.741333,0.192261,67.0,-52.0,45.0,12.04,0.346656,33.16,S10
2,1.0354,0.2094,0.1858,-1.322067,-0.009018,0.715637,33.71707,0.205994,26.0,40.0,0.0,13.01,0.350494,33.16,S10
3,0.9666,0.2118,0.0412,-1.316345,-0.00238,0.714874,33.741333,0.193787,52.0,12.0,46.0,13.07,0.336423,33.16,S10
4,0.8916,0.204,-0.1228,-1.310257,0.001053,0.715256,33.747406,0.172424,42.0,20.0,45.0,12.33,0.338981,33.16,S10


In [13]:
df.wrist_BVP_0.describe()

count    150000.000000
mean          0.048008
std          66.849314
min        -870.180000
25%         -19.910000
50%           2.810000
75%          21.520000
max        1062.430000
Name: wrist_BVP_0, dtype: float64

In [15]:
df.rename(columns={'wrist_BVP_0': 'BVP'}, inplace=True)
df['BVP'] = df['BVP'].abs()

In [17]:
df.head()

Unnamed: 0,chest_ACC_0,chest_ACC_1,chest_ACC_2,chest_ECG_0,chest_EMG_0,chest_EDA_0,chest_Temp_0,chest_Resp_0,wrist_ACC_0,wrist_ACC_1,wrist_ACC_2,BVP,wrist_EDA_0,wrist_TEMP_0,subject
0,1.1278,0.152,0.3416,-1.333694,-0.013687,0.716019,33.69586,0.213623,107.0,-105.0,127.0,10.17,0.349215,33.13,S10
1,1.0932,0.1888,0.2922,-1.327744,-0.021927,0.714493,33.741333,0.192261,67.0,-52.0,45.0,12.04,0.346656,33.16,S10
2,1.0354,0.2094,0.1858,-1.322067,-0.009018,0.715637,33.71707,0.205994,26.0,40.0,0.0,13.01,0.350494,33.16,S10
3,0.9666,0.2118,0.0412,-1.316345,-0.00238,0.714874,33.741333,0.193787,52.0,12.0,46.0,13.07,0.336423,33.16,S10
4,0.8916,0.204,-0.1228,-1.310257,0.001053,0.715256,33.747406,0.172424,42.0,20.0,45.0,12.33,0.338981,33.16,S10


In [19]:
df.BVP.describe()

count    150000.000000
mean         38.418641
std          54.706776
min           0.000000
25%           8.810000
50%          20.820000
75%          43.650000
max        1062.430000
Name: BVP, dtype: float64