# Cognitive Stress Detection from Wearable Physiological Signals

**Goal**: Build a classifier to detect cognitive stress vs rest states using physiological data from wearable devices.

## Dataset Overview:
- **22 subjects** (10 males V1 protocol, 12 females V2 protocol)
- **Signals**: EDA, BVP, HR, IBI (for HRV), Temperature, Accelerometer
- **Cognitive Tasks**: Stroop test, TMCT, Opinion speeches, Subtraction test
- **Rest Periods**: Baseline, rest between tasks

## Stress Induction Protocol

### First version (S01 to S18)

![Stress illustration](/Users/camilletran/Documents/Capstone/behavior-cap/Capstone-Behavorial/stress_v1.png)

### Second version (f01 to f18)

![Stress illustration](/Users/camilletran/Documents/Capstone/behavior-cap/Capstone-Behavorial/stress_v2.png)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import datetime
from scipy import stats

In [None]:
dataset_path = '22subjects/STRESS'
stress_level_v1_path = 'Stress_Level_v1.csv'
stress_level_v2_path = 'Stress_Level_v2.csv'
subject_info_path = 'subject-info.csv'

##  Define Helper Functions from Wearable_Dataset.ipynb

The functions help us read and process the physiological data files.

In [None]:
def create_df_array(dataframe):
    """Converts a pandas DataFrame to a flattened numpy array."""
    return dataframe.values.flatten()


def time_abs_(UTC_array):
    """Converts UTC timestamps to seconds from the start of recording."""
    new_array = []
    start_time = datetime.datetime.strptime(UTC_array[0], '%Y-%m-%d %H:%M:%S')
    
    for utc in UTC_array:
        current_time = datetime.datetime.strptime(utc, '%Y-%m-%d %H:%M:%S')
        seconds_elapsed = (current_time - start_time).total_seconds()
        new_array.append(int(seconds_elapsed))
    
    return new_array


def moving_average(acc_data):
    """
    Applies a moving average filter to accelerometer data to measure movement.
    Higher values = more movement, Lower values = less movement
    """
    avg = 0
    prevX, prevY, prevZ = 0, 0, 0
    results = []
    
    # Process every second (32 samples at 32 Hz)
    for i in range(0, len(acc_data), 32):
        sum_ = 0
        buffX = acc_data[i:i+32, 0]
        buffY = acc_data[i:i+32, 1]
        buffZ = acc_data[i:i+32, 2]
        
        for j in range(len(buffX)):
            sum_ += max(
                abs(buffX[j] - prevX),
                abs(buffY[j] - prevY),
                abs(buffZ[j] - prevZ)
            )
            prevX, prevY, prevZ = buffX[j], buffY[j], buffZ[j]
        
        avg = avg * 0.9 + (sum_ / 32) * 0.1
        results.append(avg)
    
    return results

print("Helper functions defined")

## Load Physiological Signals
Reads physiological signals from all subject folders, adapted from wearables_dataset.ipynb

In [None]:
def read_signals(main_folder):
    """
    Each subject folder contains: EDA, BVP, HR, IBI, TEMP, ACC, tags
    """
    signal_dict = {}
    time_dict = {}
    fs_dict = {}
    
    subfolders = next(os.walk(main_folder))[1]
    
    # Get start times
    utc_start_dict = {}
    for folder_name in subfolders:
        csv_path = f'{main_folder}/{folder_name}/EDA.csv'
        df = pd.read_csv(csv_path)
        utc_start_dict[folder_name] = df.columns.tolist()
    
    # Read all signals
    for folder_name in subfolders:
        folder_path = os.path.join(main_folder, folder_name)
        files = os.listdir(folder_path)
        
        signals = {}
        time_line = {}
        fs_signal = {}
        
        desired_files = ['EDA.csv', 'BVP.csv', 'HR.csv', 'TEMP.csv', 'tags.csv', 'ACC.csv', 'IBI.csv']
        
        for file_name in files:
            if file_name not in desired_files:
                continue
            
            file_path = os.path.join(folder_path, file_name)
            signal_name = file_name.replace('.csv', '')
            
            if file_name == 'tags.csv':
                try:
                    df = pd.read_csv(file_path, header=None)
                    tags_vector = create_df_array(df)
                    tags_UTC_vector = np.insert(tags_vector, 0, utc_start_dict[folder_name])
                    signal_array = time_abs_(tags_UTC_vector)
                except pd.errors.EmptyDataError:
                    signal_array = []
            
            elif file_name == 'IBI.csv':
                df = pd.read_csv(file_path)
                signal_array = df.values
                fs_signal['IBI'] = 'variable'
            
            else:
                df = pd.read_csv(file_path)
                fs = int(df.iloc[0, 0])
                signal_array = df.iloc[1:].values
                time_array = np.linspace(0, len(signal_array)/fs, len(signal_array))
                
                time_line[signal_name] = time_array
                fs_signal[signal_name] = fs
            
            signals[signal_name] = signal_array
        
        signal_dict[folder_name] = signals
        time_dict[folder_name] = time_line
        fs_dict[folder_name] = fs_signal
    
    return signal_dict, time_dict, fs_dict

print(" Data loading function defined")

In [None]:
# Load all physiological signals
print("Loading physiological signals...")
signal_data, time_data, fs_dict = read_signals(dataset_path)

subjects = list(signal_data.keys())
v1_subjects = sorted([s for s in subjects if s.startswith('S')])
v2_subjects = sorted([s for s in subjects if s.startswith('f')])

print(f"\nâœ“ Loaded {len(subjects)} subjects:")
print(f"   V1: {v1_subjects}")
print(f"   V2: {v2_subjects}")

## Load Self-Reported Stress Levels

In [None]:
stress_level_v1 = pd.read_csv(stress_level_v1_path, index_col=0)
stress_level_v2 = pd.read_csv(stress_level_v2_path, index_col=0)
subject_info = pd.read_csv(subject_info_path, index_col=0)

print("Self-reported stress levels loaded")
print(f"\nV1 Phases: {list(stress_level_v1.columns)}")
print(f"V2 Phases: {list(stress_level_v2.columns)}")
print("\nExample stress levels:")
stress_level_v1.head()