In [12]:
import pathlib
import zipfile
import tempfile
import io
import flirt

This file quickly downloads and extracts the Empatica data necessary for the assignment. The original file
is in the Downloads folder:

In [13]:
download_path = pathlib.Path("~/Downloads/WESAD.zip").expanduser()

with zipfile.ZipFile(download_path, mode="r") as archive:
    e4s=[x for x in archive.namelist() if "E4_Data" in x]

archive.close()
print(e4s)

['WESAD/S10/S10_E4_Data.zip', 'WESAD/S11/S11_E4_Data.zip', 'WESAD/S13/S13_E4_Data.zip', 'WESAD/S14/S14_E4_Data.zip', 'WESAD/S15/S15_E4_Data.zip', 'WESAD/S16/S16_E4_Data.zip', 'WESAD/S17/S17_E4_Data.zip', 'WESAD/S2/S2_E4_Data.zip', 'WESAD/S3/S3_E4_Data.zip', 'WESAD/S4/S4_E4_Data.zip', 'WESAD/S5/S5_E4_Data.zip', 'WESAD/S6/S6_E4_Data.zip', 'WESAD/S7/S7_E4_Data.zip', 'WESAD/S8/S8_E4_Data.zip', 'WESAD/S9/S9_E4_Data.zip']


The plan here is to extract and obtain features from each of these using [`flirt`](https://flirt.readthedocs.io/en/latest/index.html). We'll use tempfile to keep them handy in memory:

In [14]:
with zipfile.ZipFile(download_path, mode="r") as archive:
    
    e4s=[x for x in archive.namelist() if "E4_Data" in x]
    
    with tempfile.TemporaryDirectory() as tmpdirname:
        archive.extract(e4s[0], path=tmpdirname)

        for x in pathlib.Path(tmpdirname).rglob('*.*'):
            print(x)

archive.close()

/var/folders/6r/df6wlbj152d0h9kj5hy0lfzm0000gn/T/tmpplaa6qkr/WESAD/S10/S10_E4_Data.zip


Ok, now we have the empatica zipfile safely in a temp directory, and it's kicked as soon as we're done with it. We can safely use `flirt` to quickly pull features from it:

In [15]:
with zipfile.ZipFile(download_path, mode="r") as archive:
    
    e4s=[x for x in archive.namelist() if "E4_Data" in x]
    
    with tempfile.TemporaryDirectory() as tmpdirname:
        
        archive.extract(e4s[0], path=tmpdirname)

        e4_zip = [x for x in pathlib.Path(tmpdirname).rglob('*.*')].pop()
        features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 10, True, True, False)
        
        

archive.close()

HRV features: 100%|██████████| 673/673 [00:06<00:00, 105.23it/s]
EDA features: 100%|██████████| 683/683 [00:00<00:00, 1524.32it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 10, True, True, False)


In [16]:
features

Unnamed: 0,num_ibis,hrv_mean_nni,hrv_median_nni,hrv_range_nni,hrv_sdsd,hrv_rmssd,hrv_nni_50,hrv_pnni_50,hrv_nni_20,hrv_pnni_20,...,eda_phasic_n_above_mean,eda_phasic_n_below_mean,eda_phasic_n_sign_changes,eda_phasic_iqr,eda_phasic_iqr_5_95,eda_phasic_pct_5,eda_phasic_pct_95,eda_phasic_entropy,eda_phasic_perm_entropy,eda_phasic_svd_entropy
2017-07-25 07:06:08+00:00,,,,,,,,,,,...,40.0,200.0,1.0,0.004708,0.112202,5.816902e-08,0.112202,3.839339,0.559633,0.669424
2017-07-25 07:06:18+00:00,,,,,,,,,,,...,69.0,171.0,1.0,0.000649,0.002060,3.445207e-07,0.002061,4.532330,0.673947,0.736799
2017-07-25 07:06:28+00:00,21.0,685.340476,709.854,316.978786,61.834959,62.044975,5.642857,20.899471,18.071429,66.931217,...,65.0,175.0,2.0,0.000659,0.002768,1.796683e-07,0.002768,-inf,0.743334,0.735023
2017-07-25 07:06:38+00:00,12.5,675.171354,691.996,294.656357,56.377509,56.555214,4.928571,18.253968,16.214286,60.052910,...,60.0,180.0,3.0,0.000660,0.003950,-5.081750e-04,0.003442,-inf,0.997829,0.838709
2017-07-25 07:06:48+00:00,10.0,665.002233,674.138,272.333929,50.920058,51.065454,4.214286,15.608466,14.357143,53.174603,...,43.0,197.0,2.0,0.002900,0.081645,1.155597e-05,0.081657,-inf,0.909625,0.542774
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-07-25 08:59:08+00:00,24.0,685.578125,656.280,328.140000,75.756019,75.783431,4.000000,16.666667,10.000000,41.666667,...,47.0,121.0,2.0,0.009128,0.038800,-1.752137e-03,0.037048,-inf,0.808868,0.708604
2017-07-25 08:59:18+00:00,24.0,685.578125,656.280,328.140000,75.756019,75.783431,4.000000,16.666667,10.000000,41.666667,...,42.0,86.0,3.0,0.004805,0.018692,-2.589150e-03,0.016103,-inf,0.989588,0.626050
2017-07-25 08:59:28+00:00,24.0,685.578125,656.280,328.140000,75.756019,75.783431,4.000000,16.666667,10.000000,41.666667,...,30.0,58.0,2.0,0.003783,0.020383,-4.293134e-03,0.016090,-inf,0.977418,0.641920
2017-07-25 08:59:38+00:00,24.0,685.578125,656.280,328.140000,75.756019,75.783431,4.000000,16.666667,10.000000,41.666667,...,19.0,29.0,3.0,0.008166,0.014193,-1.484335e-03,0.012709,-inf,0.970951,0.693176


Ok, it looks like that works pretty well. The next question is, what features do we want and what parameters should we use for our windows?

> In our case, we only want the HRV and EDA features

We're given that, but no indication of whether this is an instruction or a default:

> Below is the sample code to get the HRV and EDA features with a window size of 60 seconds and step size of 10 seconds, for participant S2.

Here's a hint from the paper (which used this exact dataset):

> ...we calculated all features uniformly over a 60 seconds time window, with a sliding window shift of 1/4 seconds, and assigned the corresponding emotion label to each window

So 60 second window, slide of 0.25. Let's see if that's efficient:

In [17]:
with zipfile.ZipFile(download_path, mode="r") as archive:
    
    e4s=[x for x in archive.namelist() if "E4_Data" in x]
    target=e4s[14]
    with tempfile.TemporaryDirectory() as tmpdirname:
        print("extracting ", target)
        archive.extract(target, path=tmpdirname)

        e4_zip = [x for x in pathlib.Path(tmpdirname).rglob('*.*')].pop()
        features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 10, hrv_features=True, eda_features=True, acc_features=False)
        
        

archive.close()

extracting  WESAD/S9/S9_E4_Data.zip


HRV features: 100%|██████████| 603/603 [00:00<00:00, 4819.38it/s]
EDA features: 100%|██████████| 623/623 [00:00<00:00, 1637.47it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 10, hrv_features=True, eda_features=True, acc_features=False)


In [18]:
features

Unnamed: 0,num_ibis,hrv_mean_nni,hrv_median_nni,hrv_range_nni,hrv_sdsd,hrv_rmssd,hrv_nni_50,hrv_pnni_50,hrv_nni_20,hrv_pnni_20,...,eda_phasic_n_above_mean,eda_phasic_n_below_mean,eda_phasic_n_sign_changes,eda_phasic_iqr,eda_phasic_iqr_5_95,eda_phasic_pct_5,eda_phasic_pct_95,eda_phasic_entropy,eda_phasic_perm_entropy,eda_phasic_svd_entropy
2017-07-11 11:11:41+00:00,,,,,,,,,,,...,72.0,168.0,1.0,0.088667,0.271218,0.001759,0.272977,4.690184,0.948410,0.583043
2017-07-11 11:11:51+00:00,,,,,,,,,,,...,82.0,158.0,1.0,0.157376,0.267162,0.001049,0.268212,4.643987,0.960795,0.435477
2017-07-11 11:12:01+00:00,9.7,,,,,,,,,,...,87.0,153.0,2.0,0.130156,0.281183,-0.001260,0.279923,-inf,0.990617,0.449231
2017-07-11 11:12:11+00:00,5.6,,,,,,,,,,...,94.0,146.0,3.0,0.159137,0.326278,-0.003468,0.322810,-inf,0.999371,0.494555
2017-07-11 11:12:21+00:00,4.3,,,,,,,,,,...,98.0,142.0,2.0,0.137329,0.307732,0.000000,0.307732,-inf,0.971442,0.485285
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-07-11 12:54:41+00:00,22.0,811.826864,828.163,281.263,56.295101,56.33934,8.0,36.363636,16.0,72.727273,...,51.0,141.0,1.0,0.044629,0.209084,0.001453,0.210537,4.446341,0.791858,0.556641
2017-07-11 12:54:51+00:00,22.0,811.826864,828.163,281.263,56.295101,56.33934,8.0,36.363636,16.0,72.727273,...,62.0,90.0,2.0,0.014448,0.047253,-0.003118,0.044135,-inf,0.829368,0.690081
2017-07-11 12:55:01+00:00,22.0,811.826864,828.163,281.263,56.295101,56.33934,8.0,36.363636,16.0,72.727273,...,43.0,69.0,2.0,0.013512,0.050809,0.000496,0.051305,-inf,0.792591,0.691078
2017-07-11 12:55:11+00:00,22.0,811.826864,828.163,281.263,56.295101,56.33934,8.0,36.363636,16.0,72.727273,...,29.0,43.0,2.0,0.013757,0.023660,-0.005288,0.018372,-inf,0.965636,0.639546


In [19]:
import pathlib
import zipfile
import tempfile

import flirt

def extract_features_e4(WESAD_zip_path, subject_index=0, hrv=True, eda=True):
    '''
    args:
        WESAD_zip_path: str path to your WESAD zip file
        subject_index: int which subject to extract, between 0 and 14
        hrv: bool passed to get_features_for_empatica_archive -> hrv_features
        eda: bool passed to get_features_for_empatica_archive -> eda_features
    '''
    with zipfile.ZipFile(WESAD_zip_path, mode="r") as archive:
    
        e4s=[x for x in archive.namelist() if "E4_Data" in x]
        
        target=e4s[subject_index]
        
        print(target)
        with tempfile.TemporaryDirectory() as tmpdirname:
            print("extracting ", target)
            archive.extract(target, path=tmpdirname)

            e4_zip = [x for x in pathlib.Path(tmpdirname).rglob('*.*')].pop()
            features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 10, hrv_features=hrv, eda_features=hrv, acc_features=False)
            features['participant_id'] = target.split("/")[1]
            
            

    archive.close()
    return features

## here, put the path to your full WESAD zip file download; no need to extract
download_path = pathlib.Path("~/Downloads/WESAD.zip").expanduser()

# run flirt
extract_features_e4(download_path, subject_index=0, hrv=True)

WESAD/S10/S10_E4_Data.zip
extracting  WESAD/S10/S10_E4_Data.zip


HRV features: 100%|██████████| 673/673 [00:00<00:00, 7969.86it/s]
EDA features: 100%|██████████| 683/683 [00:00<00:00, 1809.99it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 10, hrv_features=hrv, eda_features=hrv, acc_features=False)


Unnamed: 0,num_ibis,hrv_mean_nni,hrv_median_nni,hrv_range_nni,hrv_sdsd,hrv_rmssd,hrv_nni_50,hrv_pnni_50,hrv_nni_20,hrv_pnni_20,...,eda_phasic_n_below_mean,eda_phasic_n_sign_changes,eda_phasic_iqr,eda_phasic_iqr_5_95,eda_phasic_pct_5,eda_phasic_pct_95,eda_phasic_entropy,eda_phasic_perm_entropy,eda_phasic_svd_entropy,participant_id
2017-07-25 07:06:08+00:00,,,,,,,,,,,...,200.0,1.0,0.004708,0.112202,5.816902e-08,0.112202,3.839339,0.559633,0.669424,S10
2017-07-25 07:06:18+00:00,,,,,,,,,,,...,171.0,1.0,0.000649,0.002060,3.445207e-07,0.002061,4.532330,0.673947,0.736799,S10
2017-07-25 07:06:28+00:00,21.0,685.340476,709.854,316.978786,61.834959,62.044975,5.642857,20.899471,18.071429,66.931217,...,175.0,2.0,0.000659,0.002768,1.796683e-07,0.002768,-inf,0.743334,0.735023,S10
2017-07-25 07:06:38+00:00,12.5,675.171354,691.996,294.656357,56.377509,56.555214,4.928571,18.253968,16.214286,60.052910,...,180.0,3.0,0.000660,0.003950,-5.081750e-04,0.003442,-inf,0.997829,0.838709,S10
2017-07-25 07:06:48+00:00,10.0,665.002233,674.138,272.333929,50.920058,51.065454,4.214286,15.608466,14.357143,53.174603,...,197.0,2.0,0.002900,0.081645,1.155597e-05,0.081657,-inf,0.909625,0.542774,S10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-07-25 08:59:08+00:00,24.0,685.578125,656.280,328.140000,75.756019,75.783431,4.000000,16.666667,10.000000,41.666667,...,121.0,2.0,0.009128,0.038800,-1.752137e-03,0.037048,-inf,0.808868,0.708604,S10
2017-07-25 08:59:18+00:00,24.0,685.578125,656.280,328.140000,75.756019,75.783431,4.000000,16.666667,10.000000,41.666667,...,86.0,3.0,0.004805,0.018692,-2.589150e-03,0.016103,-inf,0.989588,0.626050,S10
2017-07-25 08:59:28+00:00,24.0,685.578125,656.280,328.140000,75.756019,75.783431,4.000000,16.666667,10.000000,41.666667,...,58.0,2.0,0.003783,0.020383,-4.293134e-03,0.016090,-inf,0.977418,0.641920,S10
2017-07-25 08:59:38+00:00,24.0,685.578125,656.280,328.140000,75.756019,75.783431,4.000000,16.666667,10.000000,41.666667,...,29.0,3.0,0.008166,0.014193,-1.484335e-03,0.012709,-inf,0.970951,0.693176,S10


Now we know we can extract all of these data and write them to file:

In [20]:
import pathlib
import zipfile
import tempfile

import flirt

def extract_features_e4(WESAD_zip_path, output_dir, subject_index=0, hrv=True, eda=True):
    '''
    args:
        WESAD_zip_path: str path to your WESAD zip file
        subject_index: int which subject to extract, between 0 and 14
        hrv: bool passed to get_features_for_empatica_archive -> hrv_features
        eda: bool passed to get_features_for_empatica_archive -> eda_features
    '''
    with zipfile.ZipFile(WESAD_zip_path, mode="r") as archive:
    
        e4s=[x for x in archive.namelist() if "E4_Data" in x]
        
        target=e4s[subject_index]
        
        participant_id = target.split("/")[1]
        with tempfile.TemporaryDirectory() as tmpdirname:
            print("extracting ", target)
            archive.extract(target, path=tmpdirname)

            e4_zip = [x for x in pathlib.Path(tmpdirname).rglob('*.*')].pop()
            features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)
            features['participant_id'] = participant_id
    features.reset_index(inplace=True)
    features.to_csv(output_dir+"/"+participant_id+".csv", index=False)
            

    archive.close()

    return

## here, put the path to your full WESAD zip file download; no need to extract
download_path = pathlib.Path("~/Downloads/WESAD.zip").expanduser()

# run flirt
extract_features_e4(download_path, "../extdata", subject_index=0, hrv=True)

extracting  WESAD/S10/S10_E4_Data.zip


HRV features: 100%|██████████| 6729/6729 [00:01<00:00, 5435.83it/s]
EDA features: 100%|██████████| 6822/6822 [00:06<00:00, 1114.01it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


Cool, now we run the whole set:

In [21]:
_=[extract_features_e4(download_path, "../extdata", subject_index=x, hrv=True, eda=True) for x in range(len(e4s))]

extracting  WESAD/S10/S10_E4_Data.zip


HRV features: 100%|██████████| 6729/6729 [00:01<00:00, 6632.71it/s]
EDA features: 100%|██████████| 6822/6822 [00:06<00:00, 1072.73it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S11/S11_E4_Data.zip


HRV features: 100%|██████████| 6282/6282 [00:00<00:00, 10567.33it/s]
EDA features: 100%|██████████| 6461/6461 [00:07<00:00, 883.36it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S13/S13_E4_Data.zip


HRV features: 100%|██████████| 6629/6629 [00:00<00:00, 10134.46it/s]
EDA features: 100%|██████████| 6863/6863 [00:07<00:00, 953.37it/s] 
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S14/S14_E4_Data.zip


HRV features: 100%|██████████| 6902/6902 [00:00<00:00, 7702.03it/s]
EDA features: 100%|██████████| 6986/6986 [00:08<00:00, 818.74it/s] 
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S15/S15_E4_Data.zip


HRV features: 100%|██████████| 6557/6557 [00:00<00:00, 7061.62it/s]
EDA features: 100%|██████████| 6642/6642 [00:06<00:00, 1015.20it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S16/S16_E4_Data.zip


HRV features: 100%|██████████| 7021/7021 [00:00<00:00, 10729.84it/s]
EDA features: 100%|██████████| 7107/7107 [00:06<00:00, 1113.99it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S17/S17_E4_Data.zip


HRV features: 100%|██████████| 7098/7098 [00:00<00:00, 15459.90it/s]
EDA features: 100%|██████████| 7232/7232 [00:06<00:00, 1052.05it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S2/S2_E4_Data.zip


HRV features: 100%|██████████| 7782/7782 [00:00<00:00, 12383.99it/s]
EDA features: 100%|██████████| 7874/7874 [00:08<00:00, 951.26it/s] 
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S3/S3_E4_Data.zip


HRV features: 100%|██████████| 7473/7473 [00:00<00:00, 14283.56it/s]
EDA features: 100%|██████████| 7724/7724 [00:07<00:00, 1016.89it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S4/S4_E4_Data.zip


HRV features: 100%|██████████| 7771/7771 [00:00<00:00, 11623.49it/s]
EDA features: 100%|██████████| 8000/8000 [00:07<00:00, 1030.18it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S5/S5_E4_Data.zip


HRV features: 100%|██████████| 6988/6988 [00:00<00:00, 17603.03it/s]
EDA features: 100%|██████████| 7551/7551 [00:08<00:00, 905.72it/s] 
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S6/S6_E4_Data.zip


HRV features: 100%|██████████| 8085/8085 [00:00<00:00, 14709.46it/s]
EDA features: 100%|██████████| 8324/8324 [00:07<00:00, 1044.79it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S7/S7_E4_Data.zip


HRV features: 100%|██████████| 6403/6403 [00:00<00:00, 7661.70it/s]
EDA features: 100%|██████████| 6494/6494 [00:05<00:00, 1083.06it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S8/S8_E4_Data.zip


HRV features: 100%|██████████| 6541/6541 [00:00<00:00, 14079.77it/s]
EDA features: 100%|██████████| 6638/6638 [00:06<00:00, 1086.10it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)


extracting  WESAD/S9/S9_E4_Data.zip


HRV features: 100%|██████████| 6025/6025 [00:00<00:00, 11194.60it/s]
EDA features: 100%|██████████| 6228/6228 [00:05<00:00, 1069.43it/s]
  features = flirt.simple.get_features_for_empatica_archive(e4_zip, 60, 1, hrv_features=hrv, eda_features=hrv, acc_features=False)
