#### Take a patients folder, display the segment details in the patient and also all the subsegments in the segment.

#### We also filter the subsegments in such a way that they should have both ABP and Pleth signals and be a minumum of two minutes of duration.

For example for patient p10019003 folder, we have a segment 87033314. In segment 87033314, we have 10 subsegment 87033314_0000 to 87033314_0009. In subsegment, we have ABP and Pleth signals. We need to read the data from the subsegment.

In [7]:
import os
import wfdb

def display_segment_info(record):
    print(f"--- Segment Header Information for {record.record_name} ---")
    print(f"Record name and segment number: {record.record_name}")
    print(f"Number of signals: {record.n_sig}")
    print(f"Total duration: {record.sig_len / record.fs} seconds")
    print(f"Sampling frequency: {record.fs} Hz")
    print(f"Counter frequency: {record.counter_freq} Hz")
    print(f"Number of samples per signal: {record.sig_len}")
    print(f"Start time of the recording: {record.base_time}")
    print(f"Start date of the recording: {record.base_date}")
    print(f"Meta data: {record.comments}")
    print("--------------------------------------------------\n")

def display_subsegment_info(record):
    print(f"--- Subsegment Signal Information for {record.record_name} ---")
    print(f"Number of signals: {record.n_sig}")
    for i in range(record.n_sig):
        print(f"Signal {i+1} name: {record.sig_name[i]}")
        print(f"Signal {i+1} duration: {record.sig_len / record.fs} seconds")
    print("--------------------------------------------------\n")

def process_patient_folder(patient_folder_path):
    subsegment_list = []
    subsegment_abp_pleth_list = []
    min_max_durations = {'ABP': {'min': float('inf'), 'max': 0}, 'Pleth': {'min': float('inf'), 'max': 0}}
    for segment_folder in os.listdir(patient_folder_path):
        if segment_folder.startswith('8'):
            segment_folder_path = os.path.join(patient_folder_path, segment_folder)
            header_file_path = os.path.join(segment_folder_path, segment_folder + '.hea')
            if os.path.exists(header_file_path):
                segment_record = wfdb.rdheader(header_file_path[:-4])
                display_segment_info(segment_record)
                # Process subsegment header files
                subsegment_files = [f for f in os.listdir(segment_folder_path) if f.startswith(segment_folder + '_') and f.endswith('.hea')]
                for subsegment_file in subsegment_files:
                    subsegment_file_path = os.path.join(segment_folder_path, subsegment_file)
                    if os.path.exists(subsegment_file_path):
                        subsegment_record = wfdb.rdheader(subsegment_file_path[:-4])
                        display_subsegment_info(subsegment_record)
                        subsegment_list.append(subsegment_file_path[:-4]+'.dat')
                        # Check if both ABP and pleth signals are present and duration is at least 2 minutes
                        if subsegment_record.sig_name and 'ABP' in subsegment_record.sig_name and 'Pleth' in subsegment_record.sig_name:
                            abp_duration = subsegment_record.sig_len / subsegment_record.fs
                            pleth_duration = subsegment_record.sig_len / subsegment_record.fs
                            if abp_duration >= 120 and pleth_duration >= 120:
                                subsegment_abp_pleth_list.append(subsegment_file_path[:-4]+'.dat')
                                abp_index = subsegment_record.sig_name.index('ABP')
                                pleth_index = subsegment_record.sig_name.index('Pleth')
                                if subsegment_file_path[:-4]+'.dat' in subsegment_abp_pleth_list:
                                    min_max_durations['ABP']['min'] = min(min_max_durations['ABP']['min'], abp_duration)
                                    min_max_durations['ABP']['max'] = max(min_max_durations['ABP']['max'], abp_duration)
                                    min_max_durations['Pleth']['min'] = min(min_max_durations['Pleth']['min'], pleth_duration)
                                    min_max_durations['Pleth']['max'] = max(min_max_durations['Pleth']['max'], pleth_duration)
            else:
                print(f"Header file not found: {header_file_path}")
    print(f"Total number of subsegments: {len(subsegment_list)}")
    print(f"Total number of subsegments with ABP and Pleth: {len(subsegment_abp_pleth_list)}")
    print(f"Minimum and Maximum durations for ABP: {min_max_durations['ABP']['min']} seconds, {min_max_durations['ABP']['max']} seconds")
    print(f"Minimum and Maximum durations for Pleth: {min_max_durations['Pleth']['min']} seconds, {min_max_durations['Pleth']['max']} seconds")
    return subsegment_list, subsegment_abp_pleth_list

# Example usage
patient_folder_path = r"E:\Research\Project Tiny\physionet.org\files\mimic4wdb\0.1.0\waves\train\p10020306"
subsegments, subsegments_abp_pleth = process_patient_folder(patient_folder_path)
print()
print("All subsegments:", subsegments)
print()
print("Subsegments with ABP and pleth:", subsegments_abp_pleth)


--- Segment Header Information for 83404654 ---
Record name and segment number: 83404654
Number of signals: 7
Total duration: 274696.22633958946 seconds
Sampling frequency: 62.4725 Hz
Counter frequency: 999.56 Hz
Number of samples per signal: 17160960
Start time of the recording: 17:02:28.554000
Start date of the recording: 2135-01-21
Meta data: ['wfdb 10.7', 'subject_id 10020306', 'hadm_id 23052851']
--------------------------------------------------

--- Subsegment Signal Information for 83404654_0000 ---
Number of signals: 7
Signal 1 name: II
Signal 1 duration: 0.0 seconds
Signal 2 name: III
Signal 2 duration: 0.0 seconds
Signal 3 name: V
Signal 3 duration: 0.0 seconds
Signal 4 name: aVR
Signal 4 duration: 0.0 seconds
Signal 5 name: ABP
Signal 5 duration: 0.0 seconds
Signal 6 name: Pleth
Signal 6 duration: 0.0 seconds
Signal 7 name: Resp
Signal 7 duration: 0.0 seconds
--------------------------------------------------

--- Subsegment Signal Information for 83404654_0001 ---
Number o

#### Approach 1: We try to get the whole abp and pleth reading as a list in their respective column for all the duration. We have also combined the patient id and subsegment id to create a combined unique id which we will use for referencing the record.

In [3]:
import pandas as pd

# Initialize two separate lists to store ABP and Pleth data
abp_data = []
pleth_data = []

# Iterate through each subsegment that contains both ABP and Pleth signals
for subsegment_path in subsegments_abp_pleth:
    # Extract the combined unique id from the file path
    parts = subsegment_path.split(os.sep)
    patient_no = parts[-3]
    segment_no = parts[-2]
    subsegment_no = parts[-1].split('.')[0]
    combined_id = patient_no +'_' +subsegment_no
    
    # Read the signal data using the rdrecord function
    record = wfdb.rdrecord(subsegment_path[:-4])
    
    # Extract ABP and Pleth signals
    abp_signal = record.p_signal[:, record.sig_name.index('ABP')]
    pleth_signal = record.p_signal[:, record.sig_name.index('Pleth')]
    
    # Append the data to the respective lists
    abp_data.append({
        'combined_unique_id': combined_id,
        'abp': abp_signal.tolist()
    })
    pleth_data.append({
        'combined_unique_id': combined_id,
        'pleth': pleth_signal.tolist()
    })

# Create DataFrames from the lists
abp_df = pd.DataFrame(abp_data)
pleth_df = pd.DataFrame(pleth_data)

# Print the DataFrames
print(abp_df)
print(pleth_df)

         combined_unique_id                                                abp
0   p10020306_83404654_0005  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
1   p10020306_83404654_0006  [72.5, 72.4375, 72.1875, 72.25, 73.875, 78.312...
2   p10020306_83404654_0009  [80.3125, 75.625, 70.9375, 68.4375, 68.9375, 6...
3   p10020306_83404654_0011  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
4   p10020306_83404654_0013  [61.625, 61.1875, 60.75, 60.25, 59.8125, 59.37...
5   p10020306_83404654_0016  [65.625, 65.1875, 64.75, 64.25, 63.875, 63.437...
6   p10020306_83404654_0018  [57.4375, 57.0625, 56.6875, 56.4375, 56.25, 55...
7   p10020306_83404654_0019  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
8   p10020306_83404654_0020  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
9   p10020306_83404654_0022  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
10  p10020306_83404654_0023  [124.25, 124.25, 124.1875, 124.125, 124.125, 1...
         combined_unique_id                         

In [4]:
print(abp_df.head(10))
print()
print(f'The shape of ABP is {abp_df.shape}')

        combined_unique_id                                                abp
0  p10020306_83404654_0005  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
1  p10020306_83404654_0006  [72.5, 72.4375, 72.1875, 72.25, 73.875, 78.312...
2  p10020306_83404654_0009  [80.3125, 75.625, 70.9375, 68.4375, 68.9375, 6...
3  p10020306_83404654_0011  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
4  p10020306_83404654_0013  [61.625, 61.1875, 60.75, 60.25, 59.8125, 59.37...
5  p10020306_83404654_0016  [65.625, 65.1875, 64.75, 64.25, 63.875, 63.437...
6  p10020306_83404654_0018  [57.4375, 57.0625, 56.6875, 56.4375, 56.25, 55...
7  p10020306_83404654_0019  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
8  p10020306_83404654_0020  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
9  p10020306_83404654_0022  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...

The shape of ABP is (11, 2)


In [5]:
print(pleth_df.head(10))
print()
print(f'The shape of Pleth is {pleth_df.shape}')

        combined_unique_id                                              pleth
0  p10020306_83404654_0005  [0.5029296875, 0.5029296875, 0.5029296875, 0.5...
1  p10020306_83404654_0006  [0.55810546875, 0.553955078125, 0.5498046875, ...
2  p10020306_83404654_0009  [0.395751953125, 0.404052734375, 0.41235351562...
3  p10020306_83404654_0011  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
4  p10020306_83404654_0013  [0.44091796875, 0.427490234375, 0.4140625, 0.4...
5  p10020306_83404654_0016  [0.520751953125, 0.5166015625, 0.512451171875,...
6  p10020306_83404654_0018  [0.416748046875, 0.427490234375, 0.43823242187...
7  p10020306_83404654_0019  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
8  p10020306_83404654_0020  [0.280517578125, 0.2978515625, 0.317138671875,...
9  p10020306_83404654_0022  [0.5, 0.5, 0.4873046875, 0.47900390625, 0.4665...

The shape of Pleth is (11, 2)


In [6]:
# Find the number of readings in each ABP and Pleth reading without adding to the DataFrames
num_abp_readings = [len(abp) for abp in abp_df['abp']]
num_pleth_readings = [len(pleth) for pleth in pleth_df['pleth']]

# Print the number of readings for each ABP and Pleth
print("Number of readings in each ABP:", num_abp_readings)
print("Number of readings in each Pleth:", num_pleth_readings)



Number of readings in each ABP: [196480, 76800, 764160, 4012160, 443840, 3096960, 687680, 5083200, 8640, 32320, 61120]
Number of readings in each Pleth: [196480, 76800, 764160, 4012160, 443840, 3096960, 687680, 5083200, 8640, 32320, 61120]


#### Approach 2: We have filtered the subsegments to have maximum of  3 minutes duration.

In [7]:
import pandas as pd

# Initialize two separate lists to store ABP and Pleth data
abp_data = []
pleth_data = []

# Define the number of samples per minute assuming a sampling rate (e.g., 125 Hz)
sampling_rate = 125  # Example sampling rate
samples_per_minute = 60 * sampling_rate
max_samples = 3 * samples_per_minute  # 3 minutes of data

# Iterate through each subsegment that contains both ABP and Pleth signals
for subsegment_path in subsegments_abp_pleth:
    # Extract the combined unique id from the file path
    parts = subsegment_path.split(os.sep)
    patient_no = parts[-3]
    segment_no = parts[-2]
    subsegment_no = parts[-1].split('.')[0]
    combined_id = patient_no + '_' + subsegment_no
    
    # Read the signal data using the rdrecord function
    record = wfdb.rdrecord(subsegment_path[:-4])
    
    # Extract ABP and Pleth signals
    abp_signal = record.p_signal[:, record.sig_name.index('ABP')]
    pleth_signal = record.p_signal[:, record.sig_name.index('Pleth')]
    
    # Limit the signal to the first 3 minutes
    abp_signal = abp_signal[:max_samples]
    pleth_signal = pleth_signal[:max_samples]
    
    # Append the data to the respective lists
    abp_data.append({
        'combined_unique_id': combined_id,
        'abp': abp_signal.tolist()
    })
    pleth_data.append({
        'combined_unique_id': combined_id,
        'pleth': pleth_signal.tolist()
    })

# Create DataFrames from the lists
abp_df = pd.DataFrame(abp_data)
pleth_df = pd.DataFrame(pleth_data)

# Print the DataFrames
print(abp_df)
print(pleth_df)

         combined_unique_id                                                abp
0   p10020306_83404654_0005  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
1   p10020306_83404654_0006  [72.5, 72.4375, 72.1875, 72.25, 73.875, 78.312...
2   p10020306_83404654_0009  [80.3125, 75.625, 70.9375, 68.4375, 68.9375, 6...
3   p10020306_83404654_0011  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
4   p10020306_83404654_0013  [61.625, 61.1875, 60.75, 60.25, 59.8125, 59.37...
5   p10020306_83404654_0016  [65.625, 65.1875, 64.75, 64.25, 63.875, 63.437...
6   p10020306_83404654_0018  [57.4375, 57.0625, 56.6875, 56.4375, 56.25, 55...
7   p10020306_83404654_0019  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
8   p10020306_83404654_0020  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
9   p10020306_83404654_0022  [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
10  p10020306_83404654_0023  [124.25, 124.25, 124.1875, 124.125, 124.125, 1...
         combined_unique_id                         

In [8]:
# Find the number of readings in each ABP and Pleth reading without adding to the DataFrames
num_abp_readings = [len(abp) for abp in abp_df['abp']]
num_pleth_readings = [len(pleth) for pleth in pleth_df['pleth']]

# Print the number of readings for each ABP and Pleth
print("Number of readings in each ABP:", num_abp_readings)
print("Number of readings in each Pleth:", num_pleth_readings)



Number of readings in each ABP: [22500, 22500, 22500, 22500, 22500, 22500, 22500, 22500, 8640, 22500, 22500]
Number of readings in each Pleth: [22500, 22500, 22500, 22500, 22500, 22500, 22500, 22500, 8640, 22500, 22500]


In [9]:
abp_df

Unnamed: 0,combined_unique_id,abp
0,p10020306_83404654_0005,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
1,p10020306_83404654_0006,"[72.5, 72.4375, 72.1875, 72.25, 73.875, 78.312..."
2,p10020306_83404654_0009,"[80.3125, 75.625, 70.9375, 68.4375, 68.9375, 6..."
3,p10020306_83404654_0011,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
4,p10020306_83404654_0013,"[61.625, 61.1875, 60.75, 60.25, 59.8125, 59.37..."
5,p10020306_83404654_0016,"[65.625, 65.1875, 64.75, 64.25, 63.875, 63.437..."
6,p10020306_83404654_0018,"[57.4375, 57.0625, 56.6875, 56.4375, 56.25, 55..."
7,p10020306_83404654_0019,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
8,p10020306_83404654_0020,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
9,p10020306_83404654_0022,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."


#### Approach 3: We now create a dataframe with maximum duration of 3 minutes and each reading each column.

In [8]:
import pandas as pd
import wfdb
import os

# Initialize two separate lists to store ABP and Pleth data
abp_data = []
pleth_data = []

# Define the number of samples per minute assuming a sampling rate (e.g., 125 Hz)
sampling_rate = 125  # Example sampling rate
samples_per_minute = 60 * sampling_rate
max_samples = 3 * samples_per_minute  # 3 minutes of data

# Iterate through each subsegment that contains both ABP and Pleth signals
for subsegment_path in subsegments_abp_pleth:
    # Extract the combined unique id from the file path
    parts = subsegment_path.split(os.sep)
    patient_no = parts[-3]
    segment_no = parts[-2]
    subsegment_no = parts[-1].split('.')[0]
    combined_id = patient_no + '_' + subsegment_no
    
    # Read the signal data using the rdrecord function
    record = wfdb.rdrecord(subsegment_path[:-4])
    
    # Extract ABP and Pleth signals
    abp_signal = record.p_signal[:, record.sig_name.index('ABP')]
    pleth_signal = record.p_signal[:, record.sig_name.index('Pleth')]
    
    # Limit the signal to the first 3 minutes
    abp_signal = abp_signal[:max_samples]
    pleth_signal = pleth_signal[:max_samples]
    
    # Append the data to the respective lists
    abp_data.append([combined_id] + abp_signal.tolist())
    pleth_data.append([combined_id] + pleth_signal.tolist())

# Create DataFrames from the lists
abp_df = pd.DataFrame(abp_data)
pleth_df = pd.DataFrame(pleth_data)

# Rename the first column of abp_df and pleth_df to 'combined_unique_id'
abp_df.rename(columns={0: 'combined_unique_id'}, inplace=True)
pleth_df.rename(columns={0: 'combined_unique_id'}, inplace=True)

# Print the DataFrames
print(abp_df)
print(pleth_df)

         combined_unique_id         1         2         3         4         5  \
0   p10020306_83404654_0005       NaN       NaN       NaN       NaN       NaN   
1   p10020306_83404654_0006   72.5000   72.4375   72.1875   72.2500   73.8750   
2   p10020306_83404654_0009   80.3125   75.6250   70.9375   68.4375   68.9375   
3   p10020306_83404654_0011       NaN       NaN       NaN       NaN       NaN   
4   p10020306_83404654_0013   61.6250   61.1875   60.7500   60.2500   59.8125   
5   p10020306_83404654_0016   65.6250   65.1875   64.7500   64.2500   63.8750   
6   p10020306_83404654_0018   57.4375   57.0625   56.6875   56.4375   56.2500   
7   p10020306_83404654_0019       NaN       NaN       NaN       NaN       NaN   
8   p10020306_83404654_0020    0.0000    0.0000    0.0000    0.0000    0.0000   
9   p10020306_83404654_0022       NaN       NaN       NaN       NaN       NaN   
10  p10020306_83404654_0023  124.2500  124.2500  124.1875  124.1250  124.1250   

           6         7     

In [9]:
abp_df.isnull().sum()

combined_unique_id    0
1                     4
2                     4
3                     4
4                     4
                     ..
22496                 1
22497                 1
22498                 1
22499                 1
22500                 1
Length: 22501, dtype: int64

In [4]:
abp_df.shape

(11, 22501)

In [10]:
pleth_df.isnull().sum()

combined_unique_id    0
1                     0
2                     0
3                     0
4                     0
                     ..
22496                 1
22497                 1
22498                 1
22499                 1
22500                 1
Length: 22501, dtype: int64

In [6]:
pleth_df.shape

(11, 22501)