In [None]:
# import libraries
import os
import csv
import copy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# add drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Make Participant List
participants = []
for i in range(1,18):
  if i != 1 and i!=12:
    participants.append("S"+ str(i))

print(participants)

participants_id = [x[1:] for x in participants]
print(participants_id)

['S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10', 'S11', 'S13', 'S14', 'S15', 'S16', 'S17']
['2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '13', '14', '15', '16', '17']


# **Extracting Labels**
The ground truth of this dataset is in each participant's quest file (formatted S#_quest.csv). This file contains lots of information including time stamps that indicate different stages in the participant's experiment (including stress (TSST), amusement, meditation, etc.).

The time stamps are formatted as minutes.seconds and are based on the beginning of the chest data measurements. 

First the labels need to be extracted from the quest files. We will be labelling our data as either "stressed" (1) or not stressed (0). The labels will be added to a neat csv file.

In [None]:
# Function to access labels
def getLabel(participant):
  """Function that accesses the label for a given participant. The labels are formatted as S#_quest.csv. Function returns pandas dataframe.
  participant: number that represents a file in the dataset. 
  """
  path = '/content/drive/My Drive/stress_data_unzipped/WESAD/S{}/S{}_quest.csv'.format(participant,participant)
  label = pd.read_csv(path)

  return label

In [None]:
# Test
label_s2 = getLabel(2)
display(label_s2)

Unnamed: 0,# Subj;S2;;;;;;;;;;;;;;;;;;;;;;;;;
0,# ORDER;Base;TSST;Medi 1;Fun;Medi 2;sRead;fRea...
1,# START;7.08;39.55;70.19;81.25;93.38;54.42;89....
2,# END;26.32;50.3;77.1;87.47;100.15;56.07;91.15...
3,;;;;;;;;;;;;;;;;;;;;;;;;;;
4,# PANAS;1;1;3;2;1;3;1;1;1;2;2;2;2;1;4;3;4;4;2;...
5,# PANAS;3;2;4;1;3;3;1;2;1;4;2;4;3;1;5;4;4;4;2;...
6,# PANAS;1;1;2;3;1;2;1;1;1;1;1;1;3;1;2;1;2;3;1;...
7,# PANAS;1;1;2;3;1;1;1;1;1;1;1;1;2;1;4;1;1;3;1;...
8,# PANAS;1;1;1;2;1;1;1;1;1;1;1;1;2;1;2;1;1;2;1;...
9,;;;;;;;;;;;;;;;;;;;;;;;;;;


In [None]:
def getTimeStamps(participant):
  """ Function that accesses the labels of the given participant and returns a dictionary that 
  contains the type of activity (Base, TSST, Medi1, Fun, Medi2) as the key and a tuple of start and
  end time as a value.
  """

  label = getLabel(participant)
  # first line of label is the order to time segments, 
  # second line is start times, third line is end times. 

  # access the rows and split into list
  order = label.iloc[0][0].split(";")
  start = label.iloc[1][0].split(";")
  end = label.iloc[2][0].split(";")

  # find index of "Medi 2" in the order list, which is the last time segment I want to keep
  index = order.index("Medi 2")

  # cut all the lists based on index, and remove first value
  order = order[1:index+1]
  start = start[1:index+1]
  end = end[1:index+1]

  # make dictionary
  label_dict = {}

  for i in range(len(order)):
    label_dict[order[i]] = (start[i], end[i])
  
  return label_dict

In [None]:
label_dict2 = getTimeStamps(2)
print(label_dict2)

{'Base': ('7.08', '26.32'), 'TSST': ('39.55', '50.3'), 'Medi 1': ('70.19', '77.1'), 'Fun': ('81.25', '87.47'), 'Medi 2': ('93.38', '100.15')}


In [None]:
df = pd.DataFrame(columns = ["label", "name", "start_time", "end_time", "label_note"])
df.loc[1] = [0,'S2', 7.08, 26.32, 'base']

df

Unnamed: 0,label,name,start_time,end_time,label_note
1,0,S2,7.08,26.32,base


In [None]:
# helper function to convert minute.second format to second

def getSeconds(timestamp):
 # time stamp is string

  # if there is no "." in the time stamp then assume no seconds
  # when there are no seconds in the time stamp the data is NOT
  # formatted as such: e.g. 7.00

  if ('.' not in timestamp):
    total_seconds = int(timestamp)*60

  else:
    time = timestamp.split('.')
    minutes = int(time[0])
    seconds = int(time[1])
    total_seconds = minutes*60 + seconds

  return total_seconds

In [None]:
# function that creates pandas dataframe of labeled time stamps
# labels will only either be stressed/not stressed

def makeLabelsDF(participant):
  labels_dict = getTimeStamps(participant)

  df = pd.DataFrame(columns = ["label","name","start_time","end_time","label_note"])

  counter = 1

  for key in labels_dict:
    timestamps = labels_dict[key]
    starttime = getSeconds(timestamps[0])
    endtime = getSeconds(timestamps[1])

    # make label
    # label either 0 or 1. 0 = not stressed, 1 = stressed
    # if key is TSST, then stress. if not, then not stressed
    label = -1

    if (key == "TSST"):
      label = 1
    else:
      label = 0

    row = [label, 'S{}'.format(participant), starttime, endtime, key]
    df.loc[counter] = row
    counter += 1

  return df

In [None]:
# test for S2
df_s2 = makeLabelsDF(2)
display(df_s2)

# test for s4
df_s4 = makeLabelsDF(4)
display(df_s4)

Unnamed: 0,label,name,start_time,end_time,label_note
1,0,S2,428,1592,Base
2,1,S2,2395,3003,TSST
3,0,S2,4219,4621,Medi 1
4,0,S2,4885,5267,Fun
5,0,S2,5618,6015,Medi 2


Unnamed: 0,label,name,start_time,end_time,label_note
1,0,S4,352,1503,Base
2,0,S4,1899,2291,Fun
3,0,S4,2752,3180,Medi 1
4,1,S4,3662,4335,TSST
5,0,S4,5743,6124,Medi 2


In [None]:
# make labels for all participants and save
path = '/content/drive/My Drive/stress_data_unzipped/WESAD/S{}/'

for participant in participants_id:
  df = makeLabelsDF(participant)
  df.to_csv(path.format(participant) + "labels.csv", index=False)

# **Splitting Labels**

Labels are split into 60 second segments

In [None]:
def getNewLabels(participant):
  # get newly made labels as pandas dataframe

  path = '/content/drive/My Drive/stress_data_unzipped/WESAD/S{}/labels.csv'.format(participant)
  df = pd.read_csv(path)
  return df



In [None]:
df_test = getNewLabels(2)
display(df_test)

Unnamed: 0,label,name,start_time,end_time,label_note
0,0,S2,428,1592,Base
1,1,S2,2395,3003,TSST
2,0,S2,4219,4621,Medi 1
3,0,S2,4885,5267,Fun
4,0,S2,5618,6015,Medi 2


In [None]:
def splitLabels(participant):
  # function returns pandas df

  # get labels of participant
  labels = getNewLabels(participant)
  

  # list of list to hold slices
  slices = []

  # iterate through each labelled time period
  for i in range(len(labels)):
    segment = labels.iloc[i]

    # extract information 
    segment_label = segment['label']
    segment_name = segment['name']
    segment_note = segment['label_note']
    segment_start = int(segment['start_time'])
    segment_end = int(segment['end_time'])

    # calculate how many 1 minute segments we can make
    duration = segment_end - segment_start
    num_slices = int(duration/60)

    # make slices
    for a_slice in range(num_slices):
      label_row = [segment_label, segment_name, segment_start, segment_start+60, segment_note]
      slices.append(label_row)
      
      segment_start += 60

  # make df
  header =  ['label', 'name', 'start_time', 'end_time', 'label_note']
  sliced_labels = pd.DataFrame(slices, columns = header)

  return sliced_labels

In [None]:
# test
print('original labels:')
print("")
display(getNewLabels(2))

print("")
print("Split labels: ")
print("")
sliced_df = splitLabels(2)
display(sliced_df)



original labels:



Unnamed: 0,label,name,start_time,end_time,label_note
0,0,S2,428,1592,Base
1,1,S2,2395,3003,TSST
2,0,S2,4219,4621,Medi 1
3,0,S2,4885,5267,Fun
4,0,S2,5618,6015,Medi 2



Split labels: 



Unnamed: 0,label,name,start_time,end_time,label_note
0,0,S2,428,488,Base
1,0,S2,488,548,Base
2,0,S2,548,608,Base
3,0,S2,608,668,Base
4,0,S2,668,728,Base
5,0,S2,728,788,Base
6,0,S2,788,848,Base
7,0,S2,848,908,Base
8,0,S2,908,968,Base
9,0,S2,968,1028,Base


In [None]:
# slice labels for all participants
path = '/content/drive/My Drive/stress_data_unzipped/WESAD/S{}/labels_split.csv'

for participant in participants_id:
  split_df = splitLabels(participant)
  split_df.to_csv(path.format(participant), index=False)

In [None]:
def getSplitLabels(participant):
  path = '/content/drive/My Drive/stress_data_unzipped/WESAD/S{}/labels_split.csv'
  return pd.read_csv(path.format(participant))

In [None]:
display(getSplitLabels(2))
display(getSplitLabels(6))

Unnamed: 0,label,name,start_time,end_time,label_note
0,0,S2,428,488,Base
1,0,S2,488,548,Base
2,0,S2,548,608,Base
3,0,S2,608,668,Base
4,0,S2,668,728,Base
5,0,S2,728,788,Base
6,0,S2,788,848,Base
7,0,S2,848,908,Base
8,0,S2,908,968,Base
9,0,S2,968,1028,Base


Unnamed: 0,label,name,start_time,end_time,label_note
0,0,S6,677,737,Base
1,0,S6,737,797,Base
2,0,S6,797,857,Base
3,0,S6,857,917,Base
4,0,S6,917,977,Base
5,0,S6,977,1037,Base
6,0,S6,1037,1097,Base
7,0,S6,1097,1157,Base
8,0,S6,1157,1217,Base
9,0,S6,1217,1277,Base
