In [1]:
# preprocess DB2

In [2]:
from IPython.display import display
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import shutil
import datetime

import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(''))+'/wfdb-python')
import wfdb
from wfdb import processing

In [3]:
PATH_MIMIC2 = '/home/nsc/seers/physionet/database/mimic2db'
PATH_MIMIC2_LIFE_THREATENING = '/home/nsc/seers/physionet/database/mimic2db_life-threatening'
PATH_CHALLENGE2015 = '/home/nsc/seers/physionet/database/challenge-2015/training'
PATH_MC100 = '/home/nsc/seers/cardio/database/mc100/'
PATH_TARGET = PATH_MIMIC2
#PATH_DATASET = '/home/nsc/seers/dataset'
PATH_DATASET = '/home/nsc/seers/temp'
MODEL_NAME = 'life-threatening_arrhythmias'

FILE_RECORD_LIST = 'RECORDS.alM'
TARGET_FS = 250
TARGET_TIME = 10 # 10 second

ARRHYTHMIAS_NAME_1 = ['Asystole', 
                      'Bradycardia', 
                      'Tachycardia', 
                      'Ventricular_Tachycardia', 
                      'Ventricular_Flutter_Fib'
                     ]
ARRHYTHMIAS_NAME_2 = ['ASYSTOLE', 
                      'BRADY', 
                      'TACHY', 
                      'V-TACH', 
                      'V-FIB/TACH'
                     ]
ARRHYTHMIAS_NAME_2TO1 = dict(zip(ARRHYTHMIAS_NAME_2, ARRHYTHMIAS_NAME_1))
ARRHYTHMIAS_NAME_1TO2 = dict(zip(ARRHYTHMIAS_NAME_1, ARRHYTHMIAS_NAME_2))
list_record = []
dic_list_true = {}
dic_list_false = {}
for arrhythmias in ARRHYTHMIAS_NAME_1:
    dic_list_true[arrhythmias] = []
    dic_list_false[arrhythmias] = []

In [4]:
!pwd

/home/nsc/seers/seers_cai_mb/notebook


In [5]:
def arrhythmias_name(comment):
    for name in ARRHYTHMIAS_NAME_2:
        if name in comment:
            return ARRHYTHMIAS_NAME_2TO1[name]
    return None

In [6]:
with open(os.path.join(PATH_TARGET, FILE_RECORD_LIST)) as f:
    for line in f:
        list_record.extend(line.splitlines())

In [7]:
# .alM Alarm

In [8]:
import numpy as np
from scipy import signal

In [9]:
def resample_sig(x, fs, fs_target):
    """
    Resample a signal to a different frequency.

    Parameters
    ----------
    x : numpy array
        Array containing the signal
    fs : int, or float
        The original sampling frequency
    fs_target : int, or float
        The target frequency

    Returns
    -------
    resampled_x : numpy array
        Array of the resampled signal values
    resampled_t : numpy array
        Array of the resampled signal locations

    """
    t = np.arange(x.shape[0]).astype('float64')

    if fs == fs_target:
        return x, t

    new_length = int(x.shape[0]*fs_target/fs)
    resampled_x, resampled_t = signal.resample(x, num=new_length, t=t)

    # assert resampled_x.shape == resampled_t.shape and resampled_x.shape[0] == new_length
    # assert np.all(np.diff(resampled_t) > 0)

    return resampled_x, resampled_t

In [10]:
def replace_nan(x):
    print(np.argwhere(np.isnan(x)))
    nan_list = np.argwhere(np.isnan(x))
    for nan_p in nan_list:
        if nan_p[0] == 0:
            if np.isnan(x[nan_p[0]+1][nan_p[1]]):
                print('start nan')
                x[nan_p[0]+1][nan_p[1]] = 0.
            x[nan_p[0]][nan_p[1]] = x[nan_p[0]+1][nan_p[1]]
        else:
            x[nan_p[0]][nan_p[1]] = x[nan_p[0]-1][nan_p[1]]
    print(np.argwhere(np.isnan(x)))

In [11]:
print('target record number : ', len(list_record))
for num, record in enumerate(list_record):
    #if num < 7 : continue
    file_record = os.path.join(PATH_TARGET, record, record)
    annotation = wfdb.rdann(file_record, 'alM')
    print ('%d. read record : %s - sample : %d' % (num+1, file_record, len(annotation.sample)))
    # wfdb.plot_wfdb(annotation=annotation, time_units='seconds')

    for idx, sample in enumerate(annotation.sample):
        #if idx < 12 : continue
        sample_name = "%s-a%d" % (record, idx+1)
        str_title = ("%s %s %s(%d) %s" % 
                      (sample_name, 
                       "True" if annotation.chan[idx] == 1 else "False",
                       str(datetime.timedelta(seconds=sample/annotation.fs)), 
                       sample, annotation.aux_note[idx]))
        print('  %d-%d. %s' % (num+1, idx+1, str_title))
        
        record_10s_sampfrom = sample - annotation.fs * TARGET_TIME
        if record_10s_sampfrom < 0:
            record_10s_sampfrom = 0
        record_10s_sampto = sample
        if record_10s_sampto < annotation.fs * TARGET_TIME:
            record_10s_sampto = annotation.fs * TARGET_TIME
            
        record_10s = wfdb.rdrecord(file_record, 
                                   sampfrom = record_10s_sampfrom,
                                   sampto = record_10s_sampto)
        #wfdb.plot_wfdb(record=record_10s, title=str_title, time_units='seconds')
        
        try:
            record_10s_ii = wfdb.rdrecord(file_record, 
                                       channels = [record_10s.sig_name.index('II')], 
                                       sampfrom = record_10s_sampfrom,
                                       sampto = record_10s_sampto)
        except Exception as ex:
            print ("rdrecord exception (not exist II) : ", ex)
            continue
        
        comments_arrhythmias = arrhythmias_name(annotation.aux_note[idx])
        comments_truefalse = True if annotation.chan[idx] == 1 else False
        if comments_arrhythmias is None:
            print ('  ** annotation error : ', annotation.aux_note[idx], comments_arrhythmias)
            # raise IndexError('Not define arrhythmias')
            continue
            
        if record_10s_ii.fmt is None or record_10s_ii.adc_gain is None:
            print ('  ** none data ')
            continue
        if record_10s_ii.fmt[0] is None or record_10s_ii.adc_gain[0] is None:
            print ('  ** none data ')
            continue
        if np.all(np.isnan(record_10s_ii.p_signal)):
            print ('  ** all data is nan')
            continue
        if np.all(record_10s_ii.p_signal == 0.):
            print ('  ** all data is 0.')
            continue
            
        #wfdb.plot_wfdb(record=record_10s_ii, title=str_title, time_units='seconds')
        #display(record_10s_ii.__dict__)     

        sample_path = os.path.join(PATH_DATASET, 
                                   MODEL_NAME, 
                                   "true" if annotation.chan[idx] == 1 else "false", 
                                   comments_arrhythmias,
                                   sample_name)
        if os.path.isdir(sample_path) is not True:
            try:
                os.makedirs(sample_path, exist_ok=True)
            except OSError as ex:
                raise
        
        d = record_10s_ii.base_date
        t = record_10s_ii.base_time
        dt = datetime.datetime.combine(d, t)
        dt2 = dt + datetime.timedelta(seconds=record_10s_sampfrom/annotation.fs)
        
        if np.isnan(record_10s_ii.p_signal).any():
            replace_nan(record_10s_ii.p_signal)
        
        # print(record_10s_ii.p_signal.shape)
        # resampled_x, resampled_t = processing.resample_sig(record_10s_ii.p_signal, record_10s_ii.fs, TARGET_FS)
        resampled_x, resampled_t = resample_sig(record_10s_ii.p_signal, record_10s_ii.fs, TARGET_FS)
        
        if np.all(np.isnan(resampled_x)):
            print ('  ** all resampled data is nan')
            wfdb.plot_wfdb(record=record_10s_ii, title=str_title, time_units='seconds')
            display(record_10s_ii.__dict__)     
            raise
            continue
            
        record_10s_ii.fs = TARGET_FS
        record_10s_ii.sig_len = resampled_x.shape[0]
        record_10s_ii.p_signal = resampled_x       
        record_10s_ii.record_name = sample_name
        record_10s_ii.base_date = dt2.date()
        record_10s_ii.base_time = dt2.time()
        record_10s_ii.base_datetime = dt2
        record_10s_ii.fmt = ['16']        
        record_10s_ii.comments = [comments_arrhythmias,
                                    "True alarm" if annotation.chan[idx] == 1 else "False alarm", 
                                    annotation.aux_note[idx]
                                   ]
        str_title = ("load %s %s %s" % 
                      (record_10s_ii.record_name,
                       record_10s_ii.comments[0],
                       "True" if annotation.chan[idx] == 1 else "False")
                    )
        try:
            print('  %d-%d. write %s' % (num+1, idx+1, sample_path))
            # display(record_10s_ii.__dict__)
            wfdb.wrsamp(record_name = record_10s_ii.record_name, 
                        fs = record_10s_ii.fs, 
                        units = record_10s_ii.units,
                        sig_name = record_10s_ii.sig_name,
                        p_signal = record_10s_ii.p_signal,
                        fmt = record_10s_ii.fmt,
                        adc_gain = record_10s_ii.adc_gain,
                        baseline = record_10s_ii.baseline,
                        base_time = record_10s_ii.base_time,
                        base_date = record_10s_ii.base_date,
                        comments = record_10s_ii.comments,
                        write_dir = sample_path
                       )
            np.savetxt(os.path.join(sample_path, sample_name+'.csv'),
                          record_10s_ii.p_signal, fmt='%.5f', delimiter=",")
            print('  %d-%d. %s' % (num+1, idx+1, str_title))
            record_sample = wfdb.rdrecord(os.path.join(sample_path, sample_name))
            # wfdb.plot_wfdb(record=record_sample, time_units='seconds', title=str_title) 
            # display(record_sample.__dict__)
            plt_fig = wfdb.figure_wfdb(record=record_sample, time_units='seconds', 
                                       title=str_title, figsize=(10, 4), return_fig=True)
            plt_fig.savefig(os.path.join(sample_path, sample_name), dpi=100)    
            
            if annotation.chan[idx] == 1:
                dic_list_true[comments_arrhythmias].append(record_10s_ii.record_name)
            else:
                dic_list_false[comments_arrhythmias].append(record_10s_ii.record_name)
    
    
        except Exception as ex:
            print ("exception : ", ex)
            raise Exception


print('')
count_true = 0
count_false = 0
for key in ARRHYTHMIAS_NAME_1:
    count_true += len(dic_list_true[key])
    print('true %s : %d' % (key, len(dic_list_true[key])))
    count_false += len(dic_list_false[key])
    print('false %s : %d' % (key, len(dic_list_false[key])))

    print('')
print('total true : ' + str(count_true))
print('total false : ' + str(count_false))

target record number :  498
1. read record : /media/nsc_bigdata/seers/physionet/database/mimic2db/a40017/a40017 - sample : 13
  1-1. a40017-a1 True 0:00:36.344000(4543) ***TACHY 132 >130
  ** none data 
  1-2. a40017-a2 True 0:03:58.344000(29793) ***TACHY 140 >130
  1-2. write /media/nsc_bigdata/seers/dataset/life-threatening_arrhythmias/true/Tachycardia/a40017-a2
  1-2. load a40017-a2 Tachycardia True
  1-3. a40017-a3 True 0:11:02.344000(82793) ***TACHY 141 >130
  1-3. write /media/nsc_bigdata/seers/dataset/life-threatening_arrhythmias/true/Tachycardia/a40017-a3
  1-3. load a40017-a3 Tachycardia True
  1-4. a40017-a4 True 0:14:21.344000(107668) ***TACHY 141 >130
  1-4. write /media/nsc_bigdata/seers/dataset/life-threatening_arrhythmias/true/Tachycardia/a40017-a4
  1-4. load a40017-a4 Tachycardia True
  1-5. a40017-a5 True 0:25:34.344000(191793) ***TACHY 131 >130
  1-5. write /media/nsc_bigdata/seers/dataset/life-threatening_arrhythmias/true/Tachycardia/a40017-a5
  1-5. load a40017-a5 