In [1]:
import os
import pandas as pd
import re

# Load The Data

## Eye Tracking Data

In [2]:
# Set the directory path where the CSV files are located
directory_path = '../data/raw/eyetracking'

# Create an empty list to store dataframes
dataframes = []

# Define a regular expression to extract name, date, category, and random number from filenames
filename_pattern = re.compile(r'(?P<name>[\w\-]+)-(?P<date>\d{4}-\d{2}-\d{2})_(?P<category>\w+)_(?P<random_number>\d+)\.csv')

# Iterate through each file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith('.csv'):
        # Match the pattern to extract metadata from the filename
        match = filename_pattern.match(filename)
        if match:
            # Extract the components from the filename
            name = match.group('name')
            date = match.group('date')
            category = match.group('category')
            random_number = match.group('random_number')
            
            # Load the CSV file into a dataframe
            file_path = os.path.join(directory_path, filename)
            df = pd.read_csv(file_path)
            
            # Add the extracted components as constant columns
            df['name'] = name
            df['date'] = date
            df['category'] = category
            df['id'] = filename.replace('.csv', '')
            df['path_name'] = filename.replace('.csv', '')
            
            # Append the dataframe to the list
            dataframes.append(df)

# Concatenate all dataframes into one
df_eyetracking = pd.concat(dataframes, ignore_index=True)


In [3]:
df_eyetracking.head()

Unnamed: 0,Timestamp,MovingTarget_X,MovingTarget_Y,EyeTracker_X,EyeTracker_Y,name,date,category,id,path_name
0,172922994431109,983.844759,307.220648,972.815616,314.788023,mar,2024-10-18,SEM,mar-2024-10-18_SEM_1729229853,mar-2024-10-18_SEM_1729229853
1,172922994435202,983.76364,307.070849,969.08616,329.65092,mar,2024-10-18,SEM,mar-2024-10-18_SEM_1729229853,mar-2024-10-18_SEM_1729229853
2,172922994464003,983.159359,305.961283,974.521814,367.21962,mar,2024-10-18,SEM,mar-2024-10-18_SEM_1729229853,mar-2024-10-18_SEM_1729229853
3,172922994468201,983.059361,305.778737,977.196672,363.0078,mar,2024-10-18,SEM,mar-2024-10-18_SEM_1729229853,mar-2024-10-18_SEM_1729229853
4,172922994472270,982.953858,305.586467,978.562224,337.5027,mar,2024-10-18,SEM,mar-2024-10-18_SEM_1729229853,mar-2024-10-18_SEM_1729229853


Split SP and SEM data

In [4]:
df_eyetracking_sp = df_eyetracking[df_eyetracking['category']=='SP']

df_eyetracking_sem = df_eyetracking[df_eyetracking['category']=='SEM']

## Heartrate Data

In [5]:
df_heartrate = pd.read_csv('../data/raw/heartrate/00_HR-data.csv')

df_heartrate.head()

Unnamed: 0,row_id,SP_result,SEM_result,HR_before,HR_after,age
0,1,ar-2024-10-09_SP_1728433242,ar-2024-10-09_SEM_1728433242,72,120,21
1,2,han-2024-10-09_SP_1728433863,han-2024-10-09_SEM_1728433863,83,140,21
2,3,ar-2024-10-10_SP_1728533147,ar-2024-10-10_SEM_1728533147,84,134,21
3,4,han-2024-10-10_SP_1728534050,han-2024-10-10_SEM_1728534050,86,141,21
4,5,ar-2024-10-12_SP_1728718590,ar-2024-10-12_SEM_1728718590,94,146,21


Extract the data from id

In [6]:
# # Define a regular expression to extract name, date, category, and random number
# pattern = re.compile(r'(?P<name>[\w\-]+)-(?P<date>\d{4}-\d{2}-\d{2})_(?P<category>\w+)_(?P<random_number>\d+)')

# # Use str.extract with the regex pattern to create new columns
# df_heartrate[['name', 'date', 'category', 'random_number']] = df['SP_result'].str.extract(pattern)

# Merge both data into one dataframe

In [7]:
df_eyetracking_sp = pd.merge(df_eyetracking_sp,df_heartrate, left_on='path_name', right_on='SP_result', how='left')

df_eyetracking_sem = pd.merge(df_eyetracking_sem,df_heartrate, left_on='path_name', right_on='SEM_result', how='left')

In [8]:
df_eyetracking_sp.head()

Unnamed: 0,Timestamp,MovingTarget_X,MovingTarget_Y,EyeTracker_X,EyeTracker_Y,name,date,category,id,path_name,row_id,SP_result,SEM_result,HR_before,HR_after,age
0,172843326579763,1016.090081,401.723052,1021.80528,395.415,ar,2024-10-09,SP,ar-2024-10-09_SP_1728433242,ar-2024-10-09_SP_1728433242,1.0,ar-2024-10-09_SP_1728433242,ar-2024-10-09_SEM_1728433242,72.0,120.0,21.0
1,172843326671260,1019.829718,439.893571,1024.466342,406.894995,ar,2024-10-09,SP,ar-2024-10-09_SP_1728433242,ar-2024-10-09_SP_1728433242,1.0,ar-2024-10-09_SP_1728433242,ar-2024-10-09_SEM_1728433242,72.0,120.0,21.0
2,172843326675478,1019.882231,441.594809,1025.091014,410.885316,ar,2024-10-09,SP,ar-2024-10-09_SP_1728433242,ar-2024-10-09_SP_1728433242,1.0,ar-2024-10-09_SP_1728433242,ar-2024-10-09_SEM_1728433242,72.0,120.0,21.0
3,172843326679578,1019.924879,443.286799,1025.208144,412.747227,ar,2024-10-09,SP,ar-2024-10-09_SP_1728433242,ar-2024-10-09_SP_1728433242,1.0,ar-2024-10-09_SP_1728433242,ar-2024-10-09_SEM_1728433242,72.0,120.0,21.0
4,172843326683727,1019.957807,444.968713,1025.414424,414.661788,ar,2024-10-09,SP,ar-2024-10-09_SP_1728433242,ar-2024-10-09_SP_1728433242,1.0,ar-2024-10-09_SP_1728433242,ar-2024-10-09_SEM_1728433242,72.0,120.0,21.0


In [9]:
df_eyetracking_sem.head()

Unnamed: 0,Timestamp,MovingTarget_X,MovingTarget_Y,EyeTracker_X,EyeTracker_Y,name,date,category,id,path_name,row_id,SP_result,SEM_result,HR_before,HR_after,age
0,172922994431109,983.844759,307.220648,972.815616,314.788023,mar,2024-10-18,SEM,mar-2024-10-18_SEM_1729229853,mar-2024-10-18_SEM_1729229853,18.0,mar-2024-10-18_SP_1729229853,mar-2024-10-18_SEM_1729229853,104.0,164.0,21.0
1,172922994435202,983.76364,307.070849,969.08616,329.65092,mar,2024-10-18,SEM,mar-2024-10-18_SEM_1729229853,mar-2024-10-18_SEM_1729229853,18.0,mar-2024-10-18_SP_1729229853,mar-2024-10-18_SEM_1729229853,104.0,164.0,21.0
2,172922994464003,983.159359,305.961283,974.521814,367.21962,mar,2024-10-18,SEM,mar-2024-10-18_SEM_1729229853,mar-2024-10-18_SEM_1729229853,18.0,mar-2024-10-18_SP_1729229853,mar-2024-10-18_SEM_1729229853,104.0,164.0,21.0
3,172922994468201,983.059361,305.778737,977.196672,363.0078,mar,2024-10-18,SEM,mar-2024-10-18_SEM_1729229853,mar-2024-10-18_SEM_1729229853,18.0,mar-2024-10-18_SP_1729229853,mar-2024-10-18_SEM_1729229853,104.0,164.0,21.0
4,172922994472270,982.953858,305.586467,978.562224,337.5027,mar,2024-10-18,SEM,mar-2024-10-18_SEM_1729229853,mar-2024-10-18_SEM_1729229853,18.0,mar-2024-10-18_SP_1729229853,mar-2024-10-18_SEM_1729229853,104.0,164.0,21.0


## Drop Unnecesarry Feature

In [10]:
df_eyetracking_sp = df_eyetracking_sp.drop(['path_name','SP_result','SEM_result'], axis =1)
df_eyetracking_sem = df_eyetracking_sem.drop(['path_name','SP_result','SEM_result'], axis =1)

## Save dataframe into csv

In [11]:
df_eyetracking_sp.to_csv('../data/interim/1_0_data_sp.csv', index=False)
df_eyetracking_sem.to_csv('../data/interim/1_1_data_sem.csv', index=False)