In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import signal
import neurokit2 as nk
from scipy.signal import butter, lfilter, savgol_filter, find_peaks
from numpy.lib.stride_tricks import as_strided
import pickle
import os
import random
random.seed(9700)
import warnings 
warnings.filterwarnings('ignore')
from itertools import product


In [2]:
base_path = '/dhc/cold/groups/idyll/FINAL DATA/UNIVERSE_ANONYMIZED_ZENODO_FINAL/' # define the path the data is saved

# Loading the raw data

In [2]:

participant_id =  [item for item in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, item))]
participant_id = sorted(participant_id, key=lambda name: int(name.split('_')[1]))
lab_sessions = ["Lab1", "Lab2"]

    
#### Raw data from the controlled sessions

for participant, session in product(participant_id, lab_sessions):
    print(f"Participant: {participant}, Session: {session}")
    try:
        path = f'{base_path}{participant}/{session}/Raw/'
        print(path)
        df_muse = pd.read_csv(path + 'Muse/Muse.csv') # loading the raw muse data
        print(df_muse.head())
        df_empatica_acc = pd.read_csv (path+ 'Empatica/ACC.csv') # loading the raw accelerometer data
        df_empatica_bvp = pd.read_csv (path+ 'Empatica/BVP.csv') # loading the raw blood volume pulse data
        df_empatica_temp = pd.read_csv (path+ 'Empatica/TEMP.csv') # loading the raw temperature data
        df_empatica_eda = pd.read_csv (path+ 'Empatica/EDA.csv') # loading the raw electrodermal activity data
        print(df_empatica_acc.head())
    except:
        print("Missing data for this session")
    
#### Raw data from the uncontrolled sessions    

for participant in participant_id:
    print(f"Participant: {participant}, Session: Wild")
    try:
        path = f'{base_path}{participant}/Wild/Raw/'
        wild_folders =[item for item in os.listdir(path)]

        for wild in wild_folders:
            
            df_muse = pd.read_csv(f'{path}{wild}/Muse/Muse.csv') # loading the raw muse data
            print(f'{path}{wild}')
            print(df_muse.head())
            df_empatica_acc = pd.read_csv (f'{path}{wild}/Empatica/ACC.csv') # loading the raw accelerometer data
            df_empatica_bvp = pd.read_csv (f'{path}{wild}/Empatica/BVP.csv') # loading the raw blood volume pulse data
            df_empatica_temp = pd.read_csv (f'{path}{wild}/Empatica/TEMP.csv') # loading the raw temperature data
            df_empatica_eda = pd.read_csv (f'{path}{wild}/Empatica/EDA.csv') # loading the raw electrodermal activity data
            print(df_empatica_acc.head())
    except:
        print("Missing data for this session")


# Loading Synchronized Data from the Controlled session

Using jointly, the data from both devices has been synchronized and stretched to the maximum frequency of the modalities used in each individual devices. For Muse 256 Hz and for Empatica 64 Hz. This is only performed for the controlled sessions. For the uncontrolled sessions data was synchronized based on timestamps, therefore, no streching of the dataset performed.

In [1]:
participant_id =  [item for item in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, item))]
participant_id = sorted(participant_id, key=lambda name: int(name.split('_')[1]))
lab_sessions = ["Lab1", "Lab2"]

    
#### Synchronized data from the controlled sessions

for participant, session in product(participant_id, lab_sessions):
    print(f"Participant: {participant}, Session: {session}")
    try:
        path = f'{base_path}{participant}/{session}/Raw/Stretched/'
        df_muse = pd.read_csv(path + 'stretched_muse.csv') # loading the raw muse data
        print(df_muse.head())
        df_empatica = pd.read_csv (path+ 'stretched_muse.csv') # loading the raw accelerometer data
        print(df_empatica.head())
    except:
        print("Missing data for this session")
    

# Loading Labeled Data

In [1]:
participant_id =  [item for item in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, item))]
participant_id = sorted(participant_id, key=lambda name: int(name.split('_')[1]))
lab_sessions = ["Lab1", "Lab2", "Wild"]

    
#### Labeled data from the controlled and uncontrolled sessions

for participant, session in product(participant_id, lab_sessions):
    print(f"Participant: {participant}, Session: {session}")
    try:
        path = f'{base_path}{participant}/{session}/Labeled/'
        performed_tasks = [item for item in os.listdir(path) if os.path.isdir(path) and not item.startswith("questionnaire")]
        for task in performed_tasks:
            task_folders_path = f'{path}{task}'
            print(task_folders_path)
            for filename in os.listdir(task_folders_path):
                file_path = os.path.join(task_folders_path, filename)
                with open(file_path, 'rb') as file:
                    data = pickle.load(file)
                print("Data from each the task", task, "for individual modality", data)

    except:
        print("Missing data for this session")
