In [None]:
%pip install pyarrow

In [11]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
from scipy import signal, interpolate, fftpack  # signal processing lib
import glob
from tqdm import tqdm  # pretty loading bar
import pyarrow.csv  # faster .csv I/O than in pandas
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.fft import rfft, rfftfreq, irfft
# Input data files are available in the read-only "../input/" directory


In [12]:
# ROOT = "./watch_data_test/watch_data_test/"
ROOT = "../input/neymark-signal-processing/watch_data_test/watch_data_test/"
SUBMISSION_SR = 30 # sampling rate for the submission file

In [13]:
def format_peaks_to_intervals(ppg_peaks, ts_start, ts_finish):
    """
    Formats the detected heart beat timestamps into the sequence of interbeat intervals. 
    The test ECG peaks are formatted in the same way.
    The start and finish of each experiment are the start and finish of the PPG data.
    """
    timestamps_interp = np.linspace(ts_start, ts_finish, int((ts_finish - ts_start) * SUBMISSION_SR))
    heartbeat_intervals = np.diff(ppg_peaks, prepend=ts_start)
    func = interpolate.interp1d(ppg_peaks, heartbeat_intervals, "nearest", bounds_error=False, fill_value=0)
    heartbeat_intervals_interp = func(timestamps_interp)
    return heartbeat_intervals_interp

In [14]:
def process_exp(ppg_df):
    ppg_signal = ppg_df['PPG_signal'].values
    timestamps = ppg_df['Timestamp'].values  

    N = SUBMISSION_SR * int(timestamps.max() - timestamps.min())
    yf = rfft(ppg_signal)[:-1]
    xf = np.linspace(0, SUBMISSION_SR, len(yf))

    band_filter = np.zeros(len(xf))
    band_filter[(xf > 1.5) & (xf< 5)] = 1
    yf[band_filter == 0] = 0
    new_sig = irfft(yf)

    ppg_peak_idxs = signal.find_peaks(new_sig)[0]
    ppg_peaks = timestamps[ppg_peak_idxs]
    ppg_rr_intervals_interp = format_peaks_to_intervals(ppg_peaks, timestamps[0], timestamps[-1])
    return ppg_rr_intervals_interp

In [15]:
train_ppgs_pathes = glob.glob(f"{ROOT}/PPG_EXP_*.csv")
train_ppgs_pathes.sort()

In [16]:
all_predictions = []

In [17]:
for path in tqdm(train_ppgs_pathes):
    ppg_df = pyarrow.csv.read_csv(path).to_pandas()
    ppg_df.drop(columns=ppg_df.columns [0], axis= 1 , inplace= True )
    ppg_rr_intervals_interp = process_exp(ppg_df)
    all_predictions.append(ppg_rr_intervals_interp)

100%|██████████| 9/9 [00:00<00:00, 16.95it/s]


In [18]:
all_predictions = np.concatenate(all_predictions)
sample_submission = pd.DataFrame({"PPG_interbeat_interval": all_predictions})
sample_submission['id'] = sample_submission.index
sample_submission.to_csv("fourier_submission.csv", index=None)