In [None]:
import sys
from pathlib import Path

In [None]:
!pip install wfdb==4.0.0
import wfdb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wfdb==4.0.0
  Downloading wfdb-4.0.0-py3-none-any.whl (161 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.7/161.7 KB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
Collecting SoundFile<0.12.0,>=0.10.0
  Downloading soundfile-0.11.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: SoundFile, wfdb
  Attempting uninstall: SoundFile
    Found existing installation: soundfile 0.12.1
    Uninstalling soundfile-0.12.1:
      Successfully uninstalled soundfile-0.12.1
Successfully installed SoundFile-0.11.0 wfdb-4.0.0


Specify the name of the MIMIC Waveform Database

In [None]:
database_name = 'mimic4wdb/0.1.0'


Identify the records in the database

In [None]:
# each subject may be associated with multiple records
subjects = wfdb.get_record_list(database_name)
print(f"The '{database_name}' database contains data from {len(subjects)} subjects")

# set max number of records to load
max_records_to_load = 200

The 'mimic4wdb/0.1.0' database contains data from 198 subjects


In [None]:
# iterate the subjects to get a list of records
records = []
for subject in subjects:
    studies = wfdb.get_record_list(f'{database_name}/{subject}')
    for study in studies:
        records.append(Path(f'{subject}{study}'))
        # stop if we've loaded enough records
        if len(records) >= max_records_to_load:
            print("Reached maximum required number of records.")
            break

print(f"Loaded {len(records)} records from the '{database_name}' database.")

Reached maximum required number of records.
Loaded 200 records from the 'mimic4wdb/0.1.0' database.


We now look at the records

In [None]:
# format and print first five records
first_five_records = [str(x) for x in records[0:5]]
first_five_records = "\n - ".join(first_five_records)
print(f"First five records: \n - {first_five_records}")

print("""
Note the formatting of these records:
 - intermediate directory ('p100' in this case)
 - subject identifier (e.g. 'p10014354')
 - record identifier (e.g. '81739927'
 """)

First five records: 
 - waves/p100/p10014354/81739927/81739927
 - waves/p100/p10019003/87033314/87033314
 - waves/p100/p10020306/83404654/83404654
 - waves/p100/p10039708/83411188/83411188
 - waves/p100/p10039708/85583557/85583557

Note the formatting of these records:
 - intermediate directory ('p100' in this case)
 - subject identifier (e.g. 'p10014354')
 - record identifier (e.g. '81739927'
 



Extract metadata for a record

Each record contains metadata stored in a header file, named "<record name>.hea"

In [None]:
# Specify the 4th record (note, in Python indexing begins at 0)
idx = 3
record = records[idx]
record_dir = f'{database_name}/{record.parent}'
print("PhysioNet directory specified for record: {}".format(record_dir))

PhysioNet directory specified for record: mimic4wdb/0.1.0/waves/p100/p10039708/83411188


Specify the subject identifier

In [None]:

record_name = record.name
print("Record name: {}".format(record_name))

Record name: 83411188



Load the metadata for this record



In [None]:
record_data = wfdb.rdheader(record_name, pn_dir=record_dir, rd_segments=True)
remote_url = "https://physionet.org/content/" + record_dir + "/" + record_name + ".hea"
print(f"Done: metadata loaded for record '{record_name}' from the header file at:\n{remote_url}")

Done: metadata loaded for record '83411188' from the header file at:
https://physionet.org/content/mimic4wdb/0.1.0/waves/p100/p10039708/83411188/83411188.hea


**Inspect details of physiological signals recorded in this record**

In [None]:

print(f"- Number of signals: {record_data.n_sig}".format())
print(f"- Duration: {record_data.sig_len/(record_data.fs*60*60):.1f} hours") 
print(f"- Base sampling frequency: {record_data.fs} Hz")

- Number of signals: 6
- Duration: 14.2 hours
- Base sampling frequency: 62.4725 Hz


**Inspect the segments making up a record**

In [None]:

segments = record_data.seg_name
print(f"The {len(segments)} segments from record {record_name} are:\n{segments}")

The 6 segments from record 83411188 are:
['83411188_0000', '83411188_0001', '83411188_0002', '83411188_0003', '83411188_0004', '83411188_0005']


**Inspect an individual segment**

In [None]:

segment_metadata = wfdb.rdheader(record_name=segments[2], pn_dir=record_dir)

print(f"""Header metadata loaded for: 
  - the segment '{segments[1]}'
  - in record '{record_name}'
  - for subject '{str(Path(record_dir).parent.parts[-1])}""")

Header metadata loaded for: 
  - the segment '83411188_0001'
  - in record '83411188'
  - for subject 'p10039708


**Find out what signals are present**

In [None]:

print(f"This segment contains the following signals: {segment_metadata.sig_name}")
print(f"The signals are measured in units of: {segment_metadata.units}")

This segment contains the following signals: ['II', 'V', 'aVR', 'ABP', 'Pleth', 'Resp']
The signals are measured in units of: ['mV', 'mV', 'mV', 'mmHg', 'NU', 'Ohm']


**Find out how long each signal lasts**

In [None]:

print(f"The signals have a base sampling frequency of {segment_metadata.fs:.1f} Hz")
print(f"and they last for {segment_metadata.sig_len/(segment_metadata.fs*60):.1f} minutes")

The signals have a base sampling frequency of 62.5 Hz
and they last for 0.9 minutes


**Identify records suitable for analysis**

In [None]:
import pandas as pd
from pprint import pprint

In [None]:

print(f"Earlier, we loaded {len(records)} records from the '{database_name}' database.")

Earlier, we loaded 200 records from the 'mimic4wdb/0.1.0' database.


**Specify requirements**

In [None]:

required_sigs = ['ABP', 'Pleth']

In [None]:
# convert from minutes to seconds
req_seg_duration = 60 

**Find out how many records meet the requirements**

In [None]:
matching_recs = {'dir':[], 'seg_name':[], 'length':[]}

for record in records:
    print('Record: {}'.format(record), end="", flush=True)
    record_dir = f'{database_name}/{record.parent}'
    record_name = record.name
    print(' (reading data)')
    record_data = wfdb.rdheader(record_name,
                                pn_dir=record_dir,
                                rd_segments=True)

    # Check whether the required signals are present in the record
    sigs_present = record_data.sig_name
    if not all(x in sigs_present for x in required_sigs):
        print('   (missing signals)')
        continue

    # Get the segments for the record
    segments = record_data.seg_name

    # Check to see if the segment is 2 min long
    # If not, move to the next one
    gen = (segment for segment in segments if segment != '~')
    for segment in gen:
        print(' - Segment: {}'.format(segment), end="", flush=True)
        segment_metadata = wfdb.rdheader(record_name=segment,
                                         pn_dir=record_dir)
        seg_length = segment_metadata.sig_len/(segment_metadata.fs)

        if seg_length < req_seg_duration:
            print(f' (too short at {seg_length/60:.1f} mins)')
            continue

        # Next check that all required signals are present in the segment
        sigs_present = segment_metadata.sig_name
        
        if all(x in sigs_present for x in required_sigs):
            matching_recs['dir'].append(record_dir)
            matching_recs['seg_name'].append(segment)
            matching_recs['length'].append(seg_length)
            print(' (met requirements)')
            # Since we only need one segment per record break out of loop
            break
        else:
            print(' (long enough, but missing signal(s))')

print(f"A total of {len(matching_recs['dir'])} records met the requirements:")

#df_matching_recs = pd.DataFrame(data=matching_recs)
#df_matching_recs.to_csv('matching_records.csv', index=False)
#p=1

Record: waves/p100/p10014354/81739927/81739927 (reading data)
   (missing signals)
Record: waves/p100/p10019003/87033314/87033314 (reading data)
   (missing signals)
Record: waves/p100/p10020306/83404654/83404654 (reading data)
 - Segment: 83404654_0000 (too short at 0.0 mins)
 - Segment: 83404654_0001 (long enough, but missing signal(s))
 - Segment: 83404654_0002 (too short at 0.1 mins)
 - Segment: 83404654_0003 (too short at 0.3 mins)
 - Segment: 83404654_0004 (long enough, but missing signal(s))
 - Segment: 83404654_0005 (met requirements)
Record: waves/p100/p10039708/83411188/83411188 (reading data)
 - Segment: 83411188_0000 (too short at 0.0 mins)
 - Segment: 83411188_0001 (too short at 0.1 mins)
 - Segment: 83411188_0002 (too short at 0.9 mins)
 - Segment: 83411188_0003 (too short at 0.3 mins)
 - Segment: 83411188_0004 (too short at 0.3 mins)
 - Segment: 83411188_0005 (long enough, but missing signal(s))
Record: waves/p100/p10039708/85583557/85583557 (reading data)
   (missing si

In [None]:
df_matching_recs = pd.DataFrame(data=matching_recs)

In [None]:
df_matching_recs.head()


Unnamed: 0,dir,seg_name,length
0,mimic4wdb/0.1.0/waves/p100/p10020306/83404654,83404654_0005,3145.063828
1,mimic4wdb/0.1.0/waves/p101/p10126957/82924339,82924339_0007,2530.393373
2,mimic4wdb/0.1.0/waves/p102/p10209410/84248019,84248019_0004,61.467046
3,mimic4wdb/0.1.0/waves/p109/p10952189/82439920,82439920_0004,25462.723598
4,mimic4wdb/0.1.0/waves/p111/p11109975/82800131,82800131_0002,865.660891


In [None]:
df_matching_recs.count()

dir         52
seg_name    52
length      52
dtype: int64

In [None]:
print(f"A total of {len(matching_recs['dir'])} out of {len(records)} records met the requirements.")

relevant_segments_names = "\n - ".join(matching_recs['seg_name'])
print(f"\nThe relevant segment names are:\n - {relevant_segments_names}")

relevant_dirs = "\n - ".join(matching_recs['dir'])
print(f"\nThe corresponding directories are: \n - {relevant_dirs}")

A total of 52 out of 200 records met the requirements.

The relevant segment names are:
 - 83404654_0005
 - 82924339_0007
 - 84248019_0005
 - 82439920_0004
 - 82800131_0002
 - 84304393_0001
 - 89464742_0001
 - 88958796_0002
 - 88995377_0001
 - 85230771_0004
 - 86643930_0004
 - 81250824_0005
 - 87706224_0002
 - 83058614_0005
 - 82803505_0017
 - 88574629_0001
 - 87867111_0012
 - 84560969_0001
 - 87562386_0001
 - 88685937_0001
 - 86120311_0001
 - 89866183_0014
 - 89068160_0002
 - 86380383_0001
 - 85078610_0008
 - 87702634_0005
 - 84686667_0002
 - 84802706_0002
 - 81811182_0004
 - 84421559_0001
 - 88221516_0006
 - 80057524_0005
 - 84209926_0018
 - 83959636_0003
 - 89989722_0009
 - 89225487_0007
 - 84391267_0001
 - 80889556_0002
 - 85250558_0001
 - 84567505_0005
 - 85814172_0007
 - 88884866_0005
 - 80497954_0010
 - 80666640_0014
 - 84939605_0004
 - 82141753_0018
 - 86874920_0014
 - 84505262_0010
 - 86288257_0001
 - 89699401_0001
 - 88537698_0013
 - 83958172_0001

The corresponding directori