# 001 Preprocess and analyse ECG

### Purpose
Purpose of this notebook is to preprocess the raw ECG data for the 'We Love Reading' study.

- Neurokit2 package with a custom pipeline will be used for preprocessing
- Segmentation is performed afterwards
- QA figures will be exported for every partipant
- The processed signal data will be exported
- HRV metrics are computed and exported


### Input / Output
- Input: `~/data/raw`
- Outputs:
  - QA visualizations: `~/reports/ECG QA`
  - Processed signal data and HRV metrics: `~/data/processed`
  
**Note**:
- Output folders are created if they do not exist


### Imports

In [1]:

# fmt: off
from pathlib import Path
import numpy as np
import importlib
import nk_pipeline
import neurokit2 as nk
import parameters as params
import common
import data_utils
importlib.reload(nk)
importlib.reload(nk_pipeline)
importlib.reload(params)
importlib.reload(data_utils)
importlib.reload(common)
import warnings
import matplotlib.pyplot as plt
plt.ioff()  # Turn off interactive mode
import gc
# fmt:on


## Parameters

#### IO

In [2]:
WORKING_DIR = Path().cwd()
ROOT_DIR = WORKING_DIR.parent
DATA_DIR = ROOT_DIR / 'data'
RAW_DATA_DIR = DATA_DIR / 'raw'
PROCESSED_DATA_DIR = DATA_DIR / 'processed'
PROCESSED_DATA_DIR.mkdir(parents=False, exist_ok=True)
REPORTS_DIR = ROOT_DIR / 'reports'
REPORTS_DIR.mkdir(parents=False, exist_ok=True) 
QA_REPORTS_DIR = REPORTS_DIR / 'QA'
QA_REPORTS_DIR.mkdir(parents=False, exist_ok=True) 

## Support Functions

# Prepare Data

## Filepaths

ECG

In [3]:
ecg_filepaths = np.sort(list(RAW_DATA_DIR.glob('*mc.txt')))
# [print(f) for f in ecg_filepaths];
# [print(data_utils.extract_subject_id_condition_from_filepath(f)) for f in ecg_filepaths];

Events

In [4]:
event_filepaths = np.sort(list(RAW_DATA_DIR.glob('*event.txt')))
# [print(f) for f in event_filepaths];
# [print(data_utils.extract_subject_id_condition_from_filepath(f)) for f in event_filepaths];

# Apply the pipeline

- Make sure that the order of the ECG and Event files match.

**ToDo**
- Improve reading/checking ECG and Event files to make sure they match

In [5]:
for index in range(len(ecg_filepaths)):
    try:
        dyad_number, condition, wave = data_utils.extract_subject_id_condition_from_filepath(ecg_filepaths[index])
        print(f"Processing recording {index+1}/{len(ecg_filepaths)}. Dyad number: {dyad_number}. Condition: {condition}. Wave: {wave}")
        nk_pipeline.process_dyad(
            ecg_filepath = ecg_filepaths[index],
            event_filepath = event_filepaths[index],
            parameters=params.base_params,
            data_output_dir=PROCESSED_DATA_DIR,
            figure_output_dir=QA_REPORTS_DIR
        );
    except Exception as e:
        print(f"Error processing {ecg_filepaths[index]}: {e}")
    gc.collect()


Processing recording 1/177. Dyad number: 1. Condition: B. Wave: W2
Processing recording 2/177. Dyad number: 1. Condition: B. Wave: W3
Processing recording 3/177. Dyad number: 2. Condition: B. Wave: W2
Processing recording 4/177. Dyad number: 2. Condition: B. Wave: W3
Processing recording 5/177. Dyad number: 3. Condition: B. Wave: W2
Processing recording 6/177. Dyad number: 3. Condition: B. Wave: W3
Processing recording 7/177. Dyad number: 4. Condition: B. Wave: w1
Error processing /Users/lukasspiess/Library/CloudStorage/OneDrive-SpiessSolution/Documents - We Love Reading/Welcome/data/raw/B04_w1_mc.txt: Found 3 rows in df for event: Book start
Processing recording 8/177. Dyad number: 5. Condition: B. Wave: W2
Processing recording 9/177. Dyad number: 5. Condition: B. Wave: W3
Processing recording 10/177. Dyad number: 5. Condition: B. Wave: w1
Processing recording 11/177. Dyad number: 8. Condition: B. Wave: W3
Processing recording 12/177. Dyad number: 8. Condition: B. Wave: w1
Processing 

KeyboardInterrupt: 

<Figure size 640x480 with 0 Axes>

# DEBUG STUFF
|    
V    

In [None]:
event_filepaths[0]

In [None]:
ecg_filepaths[0]

# Preprocess first

In [None]:
# Load and prepare
signal_event_df = data_utils.load_dyad_ecg_events(ecg_filepaths[0], event_filepaths[0])
dyad_parameters = params.configure_segmentation_params(1, params.base_params)
child_params, mother_params = params.configure_ecg_params(1, dyad_parameters)
child_series, mother_series = data_utils.split_in_child_mother_series(signal_event_df)

# Preprocess ECG
child_signals_df = nk_pipeline.ecg_preprocess(child_series, child_params)
mother_signals_df = nk_pipeline.ecg_preprocess(mother_series, mother_params)

# Join the preprocessed signals with the events
child_signal_event_df = child_signals_df.merge(signal_event_df[["event", "event_description"]], left_index=True, right_index=True, how = "left")
mother_signal_event_df = mother_signals_df.merge(signal_event_df[["event", "event_description"]], left_index=True, right_index=True, how = "left")

In [None]:
child_signal_event_df.head()

## Then segment and analyze

In [None]:
# segment the dataframe
child_segments_df_list = data_utils.segment_df(child_signal_event_df, dyad_parameters)
mother_segments_df_list = data_utils.segment_df(mother_signal_event_df, dyad_parameters)

In [None]:
test_df = child_segments_df_list[0]
test_df[test_df.ECG_R_Peaks == 1].index.diff().dropna()