From 9b902c640d0735e3155960ac8ce751c5a653c6ed Mon Sep 17 00:00:00 2001 From: Lucas McCullum Date: Tue, 23 Jun 2020 16:48:20 -0400 Subject: [PATCH] Produces MAT file from WFDB format --- wfdb/__init__.py | 2 +- wfdb/io/__init__.py | 2 +- wfdb/io/record.py | 272 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+), 2 deletions(-) diff --git a/wfdb/__init__.py b/wfdb/__init__.py index 1da6f736..e2ed7d5f 100644 --- a/wfdb/__init__.py +++ b/wfdb/__init__.py @@ -1,5 +1,5 @@ from wfdb.io.record import (Record, MultiRecord, rdheader, rdrecord, rdsamp, - wrsamp, dl_database, edf2mit, wav2mit, sampfreq, signame) + wrsamp, dl_database, edf2mit, wav2mit, wfdb2mat, sampfreq, signame) from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels, show_ann_classes, ann2rr) from wfdb.io.download import get_dbs, get_record_list, dl_files, set_db_index_url diff --git a/wfdb/io/__init__.py b/wfdb/io/__init__.py index cddb6626..963f3f01 100644 --- a/wfdb/io/__init__.py +++ b/wfdb/io/__init__.py @@ -1,5 +1,5 @@ from wfdb.io.record import (Record, MultiRecord, rdheader, rdrecord, rdsamp, wrsamp, - dl_database, edf2mit, wav2mit, sampfreq, signame, SIGNAL_CLASSES) + dl_database, edf2mit, wav2mit, wfdb2mat, sampfreq, signame, SIGNAL_CLASSES) from wfdb.io._signal import est_res, wr_dat_file from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels, show_ann_classes, ann2rr) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index e60bf025..ea7db80a 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -1706,6 +1706,278 @@ def wav2mit(record_name, pn_dir=None, delete_file=True, record_only=False): pass +def wfdb2mat(record_name, pn_dir=None, sampfrom=0, sampto=None, channels=None): + """ + This program converts the signals of any PhysioNet record (or one in any + compatible format) into a .mat file that can be read directly using any version + of Matlab, and a short text file containing information about the signals + (names, gains, baselines, units, sampling frequency, and start time/date if + known). If the input record name is REC, the output files are RECm.mat and + RECm.hea. The output files can also be read by any WFDB application as record + RECm. + + This program does not convert annotation files; for that task, 'rdann' is + recommended. + + The output .mat file contains a single matrix named `val` containing raw + (unshifted, unscaled) samples from the selected record. Using various options, + you can select any time interval within a record, or any subset of the signals, + which can be rearranged as desired within the rows of the matrix. Since .mat + files are written in column-major order (i.e., all of column n precedes all of + column n+1), each vector of samples is written as a column rather than as a + row, so that the column number in the .mat file equals the sample number in the + input record (minus however many samples were skipped at the beginning of the + record, as specified using the `start_time` option). If this seems odd, transpose + your matrix after reading it! + + This program writes version 5 MAT-file format output files, as documented in + http://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf + The samples are written as 32-bit signed integers (mattype=20 below) in + little-endian format if the record contains any format 24 or format 32 signals, + as 8-bit unsigned integers (mattype=50) if the record contains only format 80 + signals, or as 16-bit signed integers in little-endian format (mattype=30) + otherwise. + + The maximum size of the output variable is 2^31 bytes. `wfdb2mat` from versions + 10.5.24 and earlier of the original WFDB software package writes version 4 MAT- + files which have the additional constraint of 100,000,000 elements per variable. + + The output files (recordm.mat + recordm.hea) are still WFDB-compatible, given + the .hea file constructed by this program. + + Parameters + ---------- + record_name : str + The name of the input WFDB record to be read. Can also work with both + EDF and WAV files. + pn_dir : str, optional + Option used to stream data from Physionet. The Physionet + database directory from which to find the required record files. + eg. For record '100' in 'http://physionet.org/content/mitdb' + pn_dir='mitdb'. + sampfrom : int, optional + The starting sample number to read for all channels. + sampto : int, 'end', optional + The sample number at which to stop reading for all channels. + Reads the entire duration by default. + channels : list, optional + List of integer indices specifying the channels to be read. + Reads all channels by default. + + Returns + ------- + N/A + + Notes + ----- + The entire file is composed of: + + Bytes 0 - 127: descriptive text + Bytes 128 - 131: master tag (data type = matrix) + Bytes 132 - 135: master tag (data size) + Bytes 136 - 151: array flags (4 byte tag with data type, 4 byte + tag with subelement size, 8 bytes of content) + Bytes 152 - 167: array dimension (4 byte tag with data type, 4 + byte tag with subelement size, 8 bytes of content) + Bytes 168 - 183: array name (4 byte tag with data type, 4 byte + tag with subelement size, 8 bytes of content) + Bytes 184 - ...: array content (4 byte tag with data type, 4 byte + tag with subelement size, ... bytes of content) + + Examples + -------- + >>> wfdb.wfdb2mat('100', pn_dir='pwave') + + The output file name is 100m.mat and 100m.hea + + """ + record = rdrecord(record_name, pn_dir=pn_dir, sampfrom=sampfrom, sampto=sampto) + record_name_out = record_name.split(os.sep)[-1].replace('-','_') + 'm' + + # Some variables describing the format of the .mat file + field_version = 256 # 0x0100 or 256 + endian_indicator = b'IM' # little endian + master_type = 14 # matrix + sub1_type = 6 # UINT32 + sub2_type = 5 # INT32 + sub3_type = 1 # INT8 + sub1_class = 6 # double precision array + + # Determine if we can write 8-bit unsigned samples, or if 16 or 32 bits + # are needed per sample + bytes_per_element = 1 + for i in range(record.n_sig): + if (record.adc_res[i] > 0): + if (record.adc_res[i] > 16): + bytes_per_element = 4 + elif (record.adc_res[i] > 8) and (bytes_per_element < 2): + bytes_per_element = 2 + else: + # adc_res not specified.. try to guess from format + if (record.fmt[i] == '24') or (record.fmt[i] == '32'): + bytes_per_element = 4 + elif (record.fmt[i] != '80') and (bytes_per_element < 2): + bytes_per_element = 2 + + if (bytes_per_element == 1): + sub4_type = 2 # MAT8 + out_type = ' max_length: + raise Exception("Can't write .mat file: data size exceeds 2GB limit") + + # Bytes of actual data + bytes_of_data = bytes_per_element * record.n_sig * desired_length + # This is the remaining number of bytes that don't fit into integer + # multiple of 8: i.e. if 18 bytes, bytes_remain = 2, from 17 to 18 + bytes_remain = bytes_of_data % 8 + + # master_bytes = (8 + 8) + (8 + 8) + (8 + 8) + (8 + bytes_of_data) + padding + # Must be integer multiple 8 + if bytes_remain == 0: + master_bytes = bytes_of_data + 56 + else: + master_bytes = bytes_of_data + 64 - (bytes_remain) + + # Start writing the file + output_file = record_name_out + '.mat' + with open(output_file, 'wb') as f: + # Descriptive text (124 bytes) + f.write(struct.pack('<124s', b'MATLAB 5.0')) + # Version (2 bytes) + f.write(struct.pack('