In [1]:
import pandas as pd
import numpy as np
from scipy.signal import resample
import scipy.signal as signal

In [2]:
# read xls
df = pd.read_excel(r'/home/bahey/bio_data/dataset/additional_info/data_info.xlsx')

# Forward fill NaN values in the first column
df["subject"].fillna(method="ffill", inplace=True)
df.head(50)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["subject"].fillna(method="ffill", inplace=True)
  df["subject"].fillna(method="ffill", inplace=True)


Unnamed: 0,subject,date,BCG,RR,resp
0,s01,2023-11-03 00:00:00,0.0,0.0,0.0
1,s01,2023-11-04 00:00:00,1.0,1.0,0.0
2,s01,2023-11-05 00:00:00,1.0,1.0,0.0
3,s01,2023-11-06 00:00:00,1.0,0.0,0.0
4,s01,2023-11-07 00:00:00,1.0,0.0,0.0
5,s01,2023-11-08 00:00:00,1.0,0.0,0.0
6,s01,2023-11-09 00:00:00,1.0,0.0,0.0
7,s01,sum,6.0,2.0,0.0
8,s01,,,,
9,s02,2023-11-03 00:00:00,1.0,1.0,0.0


In [3]:

# Filter rows where both BCG and RR are 1
filtered_df = df[(df["BCG"] == 1) & (df["RR"] == 1)  ]
filtered_df.head(50)

Unnamed: 0,subject,date,BCG,RR,resp
1,s01,2023-11-04 00:00:00,1.0,1.0,0.0
2,s01,2023-11-05 00:00:00,1.0,1.0,0.0
9,s02,2023-11-03 00:00:00,1.0,1.0,0.0
10,s02,2023-11-04 00:00:00,1.0,1.0,0.0
18,s03,2023-11-03 00:00:00,1.0,1.0,0.0
20,s03,2023-11-05 00:00:00,1.0,1.0,0.0
27,s04,2023-11-03 00:00:00,1.0,1.0,0.0
28,s04,2023-11-04 00:00:00,1.0,1.0,0.0
36,s05,2023-11-03 00:00:00,1.0,1.0,0.0
37,s05,2023-11-04 00:00:00,1.0,1.0,0.0


In [4]:
#resample sample 1
bcg=pd.read_csv('dataset/data/01/BCG/01_20231104_BCG.csv')
rr=pd.read_csv('dataset/data/01/Reference/RR/01_20231104_RR.csv')

#count how many recoreded hr samples in rr
hr_samples=rr['Heart Rate'].count()
print(hr_samples)    #32686

bcg_samples=bcg['BCG'].count()
print(bcg_samples)   #3643008

32686
3643008


In [5]:
bcg_signal = bcg['BCG']
fs = 140  # Sampling rate

# Apply bandpass filter (0.5–10 Hz)
lowcut = 0.5
highcut = 10.0
nyquist = 0.5 * fs
b, a = signal.butter(2, [lowcut / nyquist, highcut / nyquist], btype="band")
filtered_bcg = signal.filtfilt(b, a, bcg_signal)

In [6]:
peaks, _ = signal.find_peaks(filtered_bcg, height=np.std(filtered_bcg), distance=fs*0.6)  # At least 0.5s apart

In [7]:
peak_intervals = np.diff(peaks) / fs  # Convert samples to seconds
heart_rates = 60 / peak_intervals  # Convert intervals to BPM

# Convert to a DataFrame for easier analysis
hr_df = pd.DataFrame({ "Heart Rate": heart_rates})
print(hr_df.head(50))


    Heart Rate
0    76.363636
1    59.154930
2    29.166667
3    91.304348
4    86.597938
5     1.744911
6    10.370370
7    60.869565
8    40.975610
9    20.638821
10   82.352941
11   96.551724
12   91.304348
13   59.574468
14   45.161290
15   60.000000
16   68.292683
17   38.356164
18   43.076923
19   85.714286
20   39.810427
21   82.352941
22   53.164557
23   75.675676
24   52.500000
25   45.405405
26   66.141732
27   55.629139
28   92.307692
29   78.504673
30   12.407681
31   38.009050
32   87.500000
33   75.675676
34   87.500000
35   34.285714
36   95.454545
37    2.457578
38   53.164557
39   97.674419
40   48.554913
41   34.285714
42   31.226766
43   95.454545
44   95.454545
45   64.122137
46   51.533742
47   78.504673
48   94.382022
49   96.551724


In [None]:
#add Timestamp in 