In [11]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.signal import butter, filtfilt
import os 
import pickle

In [12]:
frequency_lable = 700
frequency_PPG = 64
frequency_ACC = 32

In [13]:
def ranges(indices):
    groups = []
    current_group = [indices[0]]
    time_interval = []

    for i in range(1, len(indices)):
        if indices[i] == indices[i - 1] + 1:  # Проверяем, непрерывный ли индекс
            current_group.append(indices[i])
        else:
            groups.append(current_group)  # Сохраняем текущую группу
            current_group = [indices[i]]  # Начинаем новую группу

    # Добавляем последнюю группу
    groups.append(current_group)

    # Выводим начальные и конечные индексы для каждой группы
    for group in groups:
        start = group[0]
        end = group[-1]
        tmp = (int(start / 700), int(end / 700))
        time_interval.append(tmp)

    return time_interval

In [14]:
def find_interval(Sx_lable):
    Sx_lable_df = pd.DataFrame(Sx_lable, columns=['val'])

    indices_4 = Sx_lable_df[Sx_lable_df['val'] == 4].index # интервалы где чел медитирует
    time_interval_4 = ranges(indices_4) # Группируем непрерывные индексы 

    indices_2 = Sx_lable_df[Sx_lable_df['val'] == 2].index # интервалы где стрессует
    time_interval_2 = ranges(indices_2)

    return time_interval_4, time_interval_2

In [15]:
def remove_motion_artifacts(acc_data, ppg_data, threshold, window_size):
    # 1. Вычисляем разницу между соседними измерениями акселерометра
    acc_diff = np.vstack((np.zeros((1, 3)), np.abs(np.diff(acc_data, axis=0))))

    kernel = np.ones(window_size)/window_size
    smoothed_diff = np.zeros_like(acc_diff)
    
    for i in range(3):  # Обрабатываем каждый из 3 каналов
        smoothed_diff[:, i] = np.convolve(acc_diff[:, i], kernel, mode='same')
    
    mask_acc = np.all(acc_diff < threshold, axis=1)

    # 3. Учитываем соотношение частот 32 Гц -> 64 Гц (1:2)
    mask_ppg = np.repeat(mask_acc, frequency_PPG // frequency_ACC)

    if(len(ppg_data) > len(mask_ppg)):
        ppg_data = ppg_data[:len(mask_ppg)]
    else:
        mask_ppg = mask_ppg[:len(ppg_data)]

    if(len(acc_data) > len(mask_acc)):
        acc_data = acc_data[:len(mask_acc)]
    else:
        mask_acc = mask_acc[:len(acc_data)]
    
    # 5. Применяем маску
    filtered_ppg = ppg_data[mask_ppg]
    filtered_acc = acc_data[mask_acc]
    
    return filtered_ppg, filtered_acc


In [16]:
def filt_PGG_amplitude_anomaly(acc_data, ppg_data, threshold, min_valid_len):
    mask_pgg = np.squeeze(np.abs(ppg_data) < threshold).astype(int)    
    diff = np.diff(mask_pgg) # np.insert(np.diff(mask_pgg), 0, 0)

    starts = np.where(diff == 1)[0] + 1   # Начала групп
    ends = np.where(diff == -1)[0] + 1    # Концы групп
    
    # Обработка случаев, когда маска начинается или заканчивается True
    if mask_pgg[0]:
        starts = np.insert(starts, 0, 0)
    if mask_pgg[-1]:
        ends = np.append(ends, len(mask_pgg))
    
    # Фильтруем группы по длине
    valid_groups = [(start, end) for start, end in zip(starts, ends) if (end - start) >= min_valid_len]
    
    # Собираем данные ФПГ из валидных групп
    filtered_ppg = np.concatenate([ppg_data[start:end] for start, end in valid_groups])
    filtered_acc = np.concatenate([acc_data[start // 2 : end // 2] for start, end in valid_groups]) # Из-за округления данные могут съехать на несколько единиц
    
    return filtered_ppg, filtered_acc



In [17]:
def get_stress_lvl(stai):
    return stai.split(";")[2]

In [18]:
def get_PGG_and_ACC(Sx):
    time_interval_medit, time_interval_stress = find_interval(Sx['label'])

    PPG_Sx = np.squeeze(Sx['signal']['wrist']['BVP']) # данные с фотоплетизмограммы
    ACC_Sx = np.squeeze(Sx['signal']['wrist']['ACC']) # данные акселерометра

    PPG_medit = PPG_Sx[frequency_PPG * time_interval_medit[0][0] : frequency_PPG * time_interval_medit[0][1]]
    ACC_medit = ACC_Sx[frequency_ACC * time_interval_medit[0][0] : frequency_ACC * time_interval_medit[0][1]]

    PGG_stress = PPG_Sx[frequency_PPG * time_interval_stress[0][0] : frequency_PPG * time_interval_stress[0][1]]
    ACC_stress = ACC_Sx[frequency_ACC * time_interval_stress[0][0] : frequency_ACC * time_interval_stress[0][1]]

    # plt.figure(figsize=(200, 6))
    # plt.plot(PPG_medit)
    # plt.savefig('PPG_stress_S4.png', dpi=300, bbox_inches='tight')

    # filtred_PPG_stress, filtred_ACC_stress = filt_PGG_amplitude_anomaly(ACC_stress, PGG_stress, 50, 64 * 3)
    # filtred_PPG_medit, filtred_ACC_medit = filt_PGG_amplitude_anomaly(ACC_medit, PPG_medit, 50, 64 * 3)
    
    # filtred_PPG_stress, filtred_ACC_stress = remove_motion_artifacts(filtred_ACC_stress, filtred_PPG_stress, 5, 3)
    # filtred_PPG_medit, filtred_ACC_medit = remove_motion_artifacts(filtred_ACC_medit, filtred_PPG_medit, 5, 3)

    return PGG_stress, ACC_stress, PPG_medit, ACC_medit

In [19]:
def drow_data(data_dict, Sx):
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(200, 6))
    ax1.plot(data_dict[Sx]['PPG_stress'], color='red')
    ax1.grid(True)
    ax1.tick_params(axis='x', which='both', bottom=False, labelbottom=False)
    ax1.set_ylim(-50, 50)
    ax2.plot(data_dict[Sx]['ACC_stress'])
    ax2.grid(True)  
    ax2.set_ylim(-128, 127)
    ax2.tick_params(axis='x', which='both', bottom=False, labelbottom=False)
    plt.savefig(f'./init_data/PPG_ACC_st_{Sx}.png', dpi=300, bbox_inches='tight')
    plt.close()

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(200, 6))
    ax1.plot(data_dict[Sx]['PPG_medit'], color='red')
    ax1.grid(True)
    ax1.tick_params(axis='x', which='both', bottom=False, labelbottom=False)
    ax1.set_ylim(-50, 50)
    ax2.plot(data_dict[Sx]['ACC_medit'])
    ax2.grid(True)
    ax2.set_ylim(-128, 127)
    ax2.tick_params(axis='x', which='both', bottom=False, labelbottom=False)
    plt.savefig(f'./init_data/PPG_ACC_md_{Sx}.png', dpi=300, bbox_inches='tight')
    plt.close()

In [20]:
data_dict = dict()

In [21]:
def get_folders(path):
    all_items = os.listdir(path)
    list_user = [item for item in all_items if os.path.isdir(os.path.join(path, item))]
    
    return list_user

list_user = get_folders('/home/ilya/Downloads/WESAD_old/')

In [22]:
for person_id in list_user:
    data_of_person = pd.read_pickle(f'/home/ilya/Downloads/WESAD_old/{person_id}/{person_id}.pkl')
    filtred_PPG_stress, filtred_ACC_stress, filtred_PPG_medit, filtred_ACC_medit = get_PGG_and_ACC(data_of_person)

    with open(f'/home/ilya/Downloads/WESAD_old/{person_id}/{person_id}_quest.csv', 'r', encoding='utf-8') as file:
        lines = file.readlines()

    lines = [line.strip() for line in lines]
    order = lines[1].split(";")[1:8]
    # print(order)

    index_stress = order.index('TSST')
    index_medit = order.index('Medi 1')

    stai_lines = [line for line in lines if line.startswith("# STAI;")]
    # print(stai_lines[index_stress], get_stress_lvl(stai_lines[index_stress]))
    # print(stai_lines[index_medit], get_stress_lvl(stai_lines[index_medit]))

    lvl_stress_for_stress = get_stress_lvl(stai_lines[index_stress])
    lvl_stress_for_medit = get_stress_lvl(stai_lines[index_medit])

    data_dict[person_id] = dict(
        {    
        'PPG_stress': filtred_PPG_stress,
        'ACC_stress': filtred_ACC_stress,
        'stress_lvl_for_stress': lvl_stress_for_stress,
        'PPG_medit': filtred_PPG_medit,
        'ACC_medit': filtred_ACC_medit,
        'stress_lvl_for_medit': lvl_stress_for_medit
        }
    )

    drow_data(data_dict, person_id)


In [23]:
with open('./data.pkl', 'wb') as file:
    pickle.dump(data_dict, file)