#### From the train folder check all the segments for each patient and also check if they have both ABP and Pleth signals and be a minumum of two minutes of duration.

For example in train folder we have patient p10019003, p10020306 etc folders, which have a segments like 87033314.. In segment 87033314, we have 10 subsegment 87033314_0000 to 87033314_0009. In subsegment, we have ABP and Pleth signals. We need to read the data from the subsegment.

In [1]:
import os
import wfdb

def display_segment_info(record):
    print(f"--- Segment Header Information for {record.record_name} ---")
    print(f"Record name and segment number: {record.record_name}")
    print(f"Number of signals: {record.n_sig}")
    print(f"Total duration: {record.sig_len / record.fs} seconds")
    print(f"Sampling frequency: {record.fs} Hz")
    print(f"Counter frequency: {record.counter_freq} Hz")
    print(f"Number of samples per signal: {record.sig_len}")
    print(f"Start time of the recording: {record.base_time}")
    print(f"Start date of the recording: {record.base_date}")
    print(f"Meta data: {record.comments}")
    print("--------------------------------------------------\n")

def display_subsegment_info(record):
    print(f"--- Subsegment Signal Information for {record.record_name} ---")
    print(f"Number of signals: {record.n_sig}")
    for i in range(record.n_sig):
        print(f"Signal {i+1} name: {record.sig_name[i]}")
        print(f"Signal {i+1} duration: {record.sig_len / record.fs} seconds")
    print("--------------------------------------------------\n")

def process_patient_folder(patient_folder_path):
    subsegment_list = []
    subsegment_abp_pleth_list = []
    min_max_durations = {'ABP': {'min': float('inf'), 'max': 0}, 'Pleth': {'min': float('inf'), 'max': 0}}
    for segment_folder in os.listdir(patient_folder_path):
        if segment_folder.startswith('8'):
            segment_folder_path = os.path.join(patient_folder_path, segment_folder)
            header_file_path = os.path.join(segment_folder_path, segment_folder + '.hea')
            if os.path.exists(header_file_path):
                segment_record = wfdb.rdheader(header_file_path[:-4])
                display_segment_info(segment_record)
                # Process subsegment header files
                subsegment_files = [f for f in os.listdir(segment_folder_path) if f.startswith(segment_folder + '_') and f.endswith('.hea')]
                for subsegment_file in subsegment_files:
                    subsegment_file_path = os.path.join(segment_folder_path, subsegment_file)
                    if os.path.exists(subsegment_file_path):
                        subsegment_record = wfdb.rdheader(subsegment_file_path[:-4])
                        display_subsegment_info(subsegment_record)
                        subsegment_list.append(subsegment_file_path[:-4]+'.dat')
                        # Check if both ABP and pleth signals are present and duration is at least 2 minutes
                        if subsegment_record.sig_name and 'ABP' in subsegment_record.sig_name and 'Pleth' in subsegment_record.sig_name:
                            abp_duration = subsegment_record.sig_len / subsegment_record.fs
                            pleth_duration = subsegment_record.sig_len / subsegment_record.fs
                            if abp_duration >= 120 and pleth_duration >= 120:
                                subsegment_abp_pleth_list.append(subsegment_file_path[:-4]+'.dat')
                                abp_index = subsegment_record.sig_name.index('ABP')
                                pleth_index = subsegment_record.sig_name.index('Pleth')
                                if subsegment_file_path[:-4]+'.dat' in subsegment_abp_pleth_list:
                                    min_max_durations['ABP']['min'] = min(min_max_durations['ABP']['min'], abp_duration)
                                    min_max_durations['ABP']['max'] = max(min_max_durations['ABP']['max'], abp_duration)
                                    min_max_durations['Pleth']['min'] = min(min_max_durations['Pleth']['min'], pleth_duration)
                                    min_max_durations['Pleth']['max'] = max(min_max_durations['Pleth']['max'], pleth_duration)
            else:
                print(f"Header file not found: {header_file_path}")
    print(f"Total number of subsegments: {len(subsegment_list)}")
    print(f"Total number of subsegments with ABP and Pleth: {len(subsegment_abp_pleth_list)}")
    print(f"Minimum and Maximum durations for ABP: {min_max_durations['ABP']['min']} seconds, {min_max_durations['ABP']['max']} seconds")
    print(f"Minimum and Maximum durations for Pleth: {min_max_durations['Pleth']['min']} seconds, {min_max_durations['Pleth']['max']} seconds")
    return subsegment_list, subsegment_abp_pleth_list

def process_all_patients(train_folder_path):
    all_subsegments = []
    all_subsegments_abp_pleth = []
    patients_with_abp_pleth = 0
    for patient_folder in os.listdir(train_folder_path):
        patient_folder_path = os.path.join(train_folder_path, patient_folder)
        if os.path.isdir(patient_folder_path):
            subsegments, subsegments_abp_pleth = process_patient_folder(patient_folder_path)
            all_subsegments.extend(subsegments)
            all_subsegments_abp_pleth.extend(subsegments_abp_pleth)
            if subsegments_abp_pleth:
                patients_with_abp_pleth += 1
    print(f"Number of patients with subsegments having both ABP and Pleth: {patients_with_abp_pleth}")
    print(f"Total number of patients that satisfied the condition: {patients_with_abp_pleth}")
    return all_subsegments, all_subsegments_abp_pleth

# Example usage
train_folder_path = r"E:\Research\Project Tiny\physionet.org\files\mimic4wdb\0.1.0\waves\train"
all_subsegments, all_subsegments_abp_pleth = process_all_patients(train_folder_path)
print()
# print("All subsegments:", all_subsegments)
print()
# print("Subsegments with ABP and pleth:", all_subsegments_abp_pleth)


--- Segment Header Information for 87033314 ---
Record name and segment number: 87033314
Number of signals: 6
Total duration: 244454.4399535796 seconds
Sampling frequency: 62.4725 Hz
Counter frequency: 999.56 Hz
Number of samples per signal: 15271680
Start time of the recording: 19:05:46.807000
Start date of the recording: 2155-07-10
Meta data: ['wfdb 10.7', 'subject_id 10019003', 'hadm_id 21457723']
--------------------------------------------------

--- Subsegment Signal Information for 87033314_0000 ---
Number of signals: 6
Signal 1 name: II
Signal 1 duration: 0.0 seconds
Signal 2 name: III
Signal 2 duration: 0.0 seconds
Signal 3 name: V
Signal 3 duration: 0.0 seconds
Signal 4 name: aVR
Signal 4 duration: 0.0 seconds
Signal 5 name: Pleth
Signal 5 duration: 0.0 seconds
Signal 6 name: Resp
Signal 6 duration: 0.0 seconds
--------------------------------------------------

--- Subsegment Signal Information for 87033314_0001 ---
Number of signals: 5
Signal 1 name: II
Signal 1 duration: 3

In [2]:
all_subsegments_abp_pleth

['E:\\Research\\Project Tiny\\physionet.org\\files\\mimic4wdb\\0.1.0\\waves\\train\\p10020306\\83404654\\83404654_0005.dat',
 'E:\\Research\\Project Tiny\\physionet.org\\files\\mimic4wdb\\0.1.0\\waves\\train\\p10020306\\83404654\\83404654_0006.dat',
 'E:\\Research\\Project Tiny\\physionet.org\\files\\mimic4wdb\\0.1.0\\waves\\train\\p10020306\\83404654\\83404654_0009.dat',
 'E:\\Research\\Project Tiny\\physionet.org\\files\\mimic4wdb\\0.1.0\\waves\\train\\p10020306\\83404654\\83404654_0011.dat',
 'E:\\Research\\Project Tiny\\physionet.org\\files\\mimic4wdb\\0.1.0\\waves\\train\\p10020306\\83404654\\83404654_0013.dat',
 'E:\\Research\\Project Tiny\\physionet.org\\files\\mimic4wdb\\0.1.0\\waves\\train\\p10020306\\83404654\\83404654_0016.dat',
 'E:\\Research\\Project Tiny\\physionet.org\\files\\mimic4wdb\\0.1.0\\waves\\train\\p10020306\\83404654\\83404654_0018.dat',
 'E:\\Research\\Project Tiny\\physionet.org\\files\\mimic4wdb\\0.1.0\\waves\\train\\p10020306\\83404654\\83404654_0019.dat',


#### We now create a dataframe with maximum duration of 3 minutes and each reading each column.

Since the sampling frequency was found out to be 62.47 in the meta data we round it and use it in calculating the samples per minute.

In [3]:
import pandas as pd
import wfdb
import os

# Initialize two separate lists to store ABP and Pleth data
abp_data = []
pleth_data = []

# Define the number of samples per minute assuming a sampling rate 
sampling_rate = 62   # sampling rate rounded to 62 as sampling frequency is 62.47
samples_per_minute = 60 * sampling_rate
max_samples = 3 * samples_per_minute  # 3 minutes of data

# Iterate through each subsegment that contains both ABP and Pleth signals
for subsegment_path in all_subsegments_abp_pleth:
    # Extract the combined unique id from the file path
    parts = subsegment_path.split(os.sep)
    patient_no = parts[-3]
    segment_no = parts[-2]
    subsegment_no = parts[-1].split('.')[0]
    combined_id = patient_no + '_' + subsegment_no
    
    # Read the signal data using the rdrecord function
    record = wfdb.rdrecord(subsegment_path[:-4])
    
    # Extract ABP and Pleth signals
    abp_signal = record.p_signal[:, record.sig_name.index('ABP')]
    pleth_signal = record.p_signal[:, record.sig_name.index('Pleth')]
    
    # Limit the signal to the first 3 minutes
    abp_signal = abp_signal[:max_samples]
    pleth_signal = pleth_signal[:max_samples]
    
    # Append the data to the respective lists
    abp_data.append([combined_id] + abp_signal.tolist())
    pleth_data.append([combined_id] + pleth_signal.tolist())

# Create DataFrames from the lists
abp_df = pd.DataFrame(abp_data)
pleth_df = pd.DataFrame(pleth_data)

# Rename the first column of abp_df and pleth_df to 'combined_unique_id'
abp_df.rename(columns={0: 'combined_unique_id'}, inplace=True)
pleth_df.rename(columns={0: 'combined_unique_id'}, inplace=True)

# Print the DataFrames
# print(abp_df)
# print(pleth_df)

In [66]:
abp_df.head()

Unnamed: 0,combined_unique_id,1,2,3,4,5,6,7,8,9,...,11151,11152,11153,11154,11155,11156,11157,11158,11159,11160
0,p10020306_83404654_0005,,,,,,,,,,...,29.0,29.0,29.0,29.0,29.0625,29.0,28.8125,28.625,28.5625,28.625
1,p10020306_83404654_0006,72.5,72.4375,72.1875,72.25,73.875,78.3125,85.0625,91.6875,96.1875,...,109.375,107.5625,104.8125,101.375,97.5625,94.3125,92.875,92.9375,93.0625,92.4375
2,p10020306_83404654_0009,80.3125,75.625,70.9375,68.4375,68.9375,69.75,69.0,67.6875,67.25,...,57.5,56.9375,56.3125,55.75,55.125,54.75,54.5,54.25,53.875,53.375
3,p10020306_83404654_0011,,,,,,,,,,...,74.125,73.6875,72.9375,71.8125,71.625,71.625,70.75,70.3125,70.25,69.4375
4,p10020306_83404654_0013,61.625,61.1875,60.75,60.25,59.8125,59.375,58.875,58.5,58.1875,...,58.875,58.625,58.4375,58.1875,58.0625,57.9375,57.75,57.8125,58.5,61.6875


In [65]:
abp_df

Unnamed: 0,combined_unique_id,1,2,3,4,5,6,7,8,9,...,11151,11152,11153,11154,11155,11156,11157,11158,11159,11160
0,p10020306_83404654_0005,,,,,,,,,,...,29.0,29.0,29.0,29.0,29.0625,29.0,28.8125,28.625,28.5625,28.625
1,p10020306_83404654_0006,72.5,72.4375,72.1875,72.25,73.875,78.3125,85.0625,91.6875,96.1875,...,109.375,107.5625,104.8125,101.375,97.5625,94.3125,92.875,92.9375,93.0625,92.4375
2,p10020306_83404654_0009,80.3125,75.625,70.9375,68.4375,68.9375,69.75,69.0,67.6875,67.25,...,57.5,56.9375,56.3125,55.75,55.125,54.75,54.5,54.25,53.875,53.375
3,p10020306_83404654_0011,,,,,,,,,,...,74.125,73.6875,72.9375,71.8125,71.625,71.625,70.75,70.3125,70.25,69.4375
4,p10020306_83404654_0013,61.625,61.1875,60.75,60.25,59.8125,59.375,58.875,58.5,58.1875,...,58.875,58.625,58.4375,58.1875,58.0625,57.9375,57.75,57.8125,58.5,61.6875
5,p10020306_83404654_0016,65.625,65.1875,64.75,64.25,63.875,63.4375,63.1875,63.125,63.3125,...,71.375,70.875,70.5,70.0625,69.5625,69.0625,68.5625,68.0625,67.5625,67.125
6,p10020306_83404654_0018,57.4375,57.0625,56.6875,56.4375,56.25,55.9375,55.4375,55.0,54.5,...,71.875,71.3125,68.125,63.625,59.0,56.4375,56.3125,56.625,56.625,56.5
7,p10020306_83404654_0019,,,,,,,,,,...,61.375,60.875,60.5625,60.25,60.0,59.625,59.0,58.5625,58.25,57.9375
8,p10020306_83404654_0020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
9,p10020306_83404654_0022,,,,,,,,,,...,96.125,95.75,95.5,96.0,97.4375,99.5,101.5625,103.375,104.625,104.875


In [5]:
pleth_df

Unnamed: 0,combined_unique_id,1,2,3,4,5,6,7,8,9,...,11151,11152,11153,11154,11155,11156,11157,11158,11159,11160
0,p10020306_83404654_0005,0.50293,0.50293,0.50293,0.5,0.498535,0.494141,0.489502,0.483643,0.474854,...,0.583252,0.583252,0.583252,0.574951,0.574951,0.574951,0.56665,0.562256,0.558105,0.549805
1,p10020306_83404654_0006,0.558105,0.553955,0.549805,0.545654,0.541504,0.537354,0.533203,0.529053,0.524902,...,0.541504,0.541504,0.541504,0.541504,0.549805,0.553955,0.562256,0.574951,0.587402,0.604004
2,p10020306_83404654_0009,0.395752,0.404053,0.412354,0.428955,0.454102,0.483154,0.508301,0.545654,0.587402,...,0.757324,0.755127,0.751953,0.747803,0.74292,0.738525,0.999756,0.999756,0.999756,0.999756
3,p10020306_83404654_0011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.624756,0.617676,0.614258,0.606934,0.603271,0.592773,0.585693,0.574707,0.567627,0.557129
4,p10020306_83404654_0013,0.440918,0.42749,0.414062,0.400879,0.387451,0.374023,0.360596,0.347168,0.336426,...,0.621094,0.605469,0.595215,0.584961,0.574463,0.564209,0.559082,0.548828,0.533203,0.522949
5,p10020306_83404654_0016,0.520752,0.516602,0.512451,0.508301,0.5,0.5,0.491455,0.487305,0.483154,...,0.625,0.620605,0.616455,0.612305,0.608154,0.608154,0.604004,0.608154,0.608154,0.608154
6,p10020306_83404654_0018,0.416748,0.42749,0.438232,0.443604,0.446289,0.446289,0.440918,0.435547,0.42749,...,0.496338,0.489014,0.478516,0.471436,0.464111,0.457031,0.449951,0.446289,0.439209,0.431885
7,p10020306_83404654_0019,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.491455,0.491455,0.491455,0.491455,0.491455,0.5,0.5,0.512451,0.520752,0.524902
8,p10020306_83404654_0020,0.280518,0.297852,0.317139,0.338379,0.359375,0.378662,0.395996,0.411377,0.424805,...,,,,,,,,,,
9,p10020306_83404654_0022,0.5,0.5,0.487305,0.479004,0.466553,0.454102,0.445801,0.4375,0.428955,...,0.554443,0.548096,0.535156,0.522217,0.512695,0.503174,0.5,0.493408,0.483887,0.477295


In [6]:
# import matplotlib.pyplot as plt

# # Plot the Pleth and ABP signals for each patient
# for combined_id in abp_df['combined_unique_id']:
#     # Extract the signals for the current patient
#     abp_signal = abp_df[abp_df['combined_unique_id'] == combined_id].iloc[0, 1:].dropna().values
#     pleth_signal = pleth_df[pleth_df['combined_unique_id'] == combined_id].iloc[0, 1:].dropna().values
    
#     # Create a new figure
#     plt.figure(figsize=(12, 6))
    
#     # Plot ABP signal
#     plt.subplot(2, 1, 1)
#     plt.plot(abp_signal, label='ABP')
#     plt.title(f'ABP Signal for {combined_id}')
#     plt.xlabel('Sample')
#     plt.ylabel('ABP')
#     plt.legend()
    
#     # Plot Pleth signal
#     plt.subplot(2, 1, 2)
#     plt.plot(pleth_signal, label='Pleth', color='orange')
#     plt.title(f'Pleth Signal for {combined_id}')
#     plt.xlabel('Sample')
#     plt.ylabel('Pleth')
#     plt.legend()
    
#     # Show the plot
#     plt.tight_layout()
#     plt.show()

