In [None]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


snippet for duration reporting

In [None]:
timestamp_start = datetime.datetime.now()

timestamp_dur = datetime.datetime.now() - timestamp_start
print('Elapsed time = ' + str(timestamp_dur))

# [ IMPORTS ]

In [None]:
import datetime
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import statistics as stats
import scipy.signal as scsig

import tensorflow as tf
import keras
from tensorflow.keras.layers import Dense, Input, LSTM, Conv1D, MaxPooling1D, Flatten, BatchNormalization
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.utils import to_categorical

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

Using TensorFlow backend.


# [ GLOBAL ]

In [None]:
# Resample every signal to this rate for consistency
BASIC_SRATE = 128 #Hz
print('Basic sampling rate(Hz): '+str(BASIC_SRATE))


#=================================================
# working directories
#=================================================

# > _ base working directory
global_dir = '/content/drive/My Drive/Masters/workdir/ecg_data'
print('GLOBAL DIR :: '+global_dir)

# >> global MAT directory, contains signal data in matlab (.mat) format
global_matdir = os.path.join(global_dir, 'db_mat') 
print('GLOBAL MAT DIR :: '+global_matdir)

# >> global NPY directory, contains signal and meta data in numpy (.npy) format
global_npydir = os.path.join(global_dir, 'db_npy') 
print('GLOBAL NPY DIR :: '+global_npydir)

# >> global datasets directory, contains manually generated datasets
global_dsdir = os.path.join(global_dir, 'db_dataset') 
print('GLOBAL DATSET DIR :: '+global_dsdir)

# >> model directory, contains model weights and test results use load_weights(), save_weights() 
global_modeldir = os.path.join(global_dir, 'db_model')
print('GLOBAL MODEL DIR :: ' + global_modeldir)

#>>----------------------------------------------- 


#=================================================
# Annotations
#=================================================
# >> annotation directory, contains annotation mapping files to be used for experiments
global_antdir = os.path.join(global_dir, 'db_ant') 
print('GLOBAL ANNOTATION DIR :: ' + global_antdir)
#>>----------------------------------------------- 


#=================================================
# File Identifiers
#=================================================
# beat and non-beat annotations, signal data types to be used to save data in npy format
g_BA = 'BA'                     #<<--- beat annotations (@orignal Sampling rate)
g_NBA = 'NBA'                   #<<--- non-beat annotations (@orignal Sampling rate)
g_RAW2 = 'RAW2'                 #<<--- Raw lead2 signal from mat file
g_BLF2 = 'BLF2'                 #<<--- Baseline fitted signal
g_RES2 = 'RES2'                 #<<--- Resampled to BASIC_SRATE

g_SIG2 = 'SIG2'     #<<--- Removed manual gain
g_RPEAK = 'RRP'     #<<--- Resampled R-peaks
#>>----------------------------------------------- 

Basic sampling rate(Hz): 128
GLOBAL DIR :: /content/drive/My Drive/Masters/workdir/ecg_data
GLOBAL MAT DIR :: /content/drive/My Drive/Masters/workdir/ecg_data/db_mat
GLOBAL NPY DIR :: /content/drive/My Drive/Masters/workdir/ecg_data/db_npy
GLOBAL DATSET DIR :: /content/drive/My Drive/Masters/workdir/ecg_data/db_dataset
GLOBAL MODEL DIR :: /content/drive/My Drive/Masters/workdir/ecg_data/db_model
GLOBAL ANNOTATION DIR :: /content/drive/My Drive/Masters/workdir/ecg_data/db_ant


# [ ~BUILD NPY DATABASE ]

## [ CREATE WORKING DIRS ]

In [None]:
# Create global directory structure
os.makedirs(global_npydir , exist_ok = True)  
os.makedirs(global_modeldir , exist_ok = True) 
os.makedirs(global_dsdir , exist_ok = True) 
os.makedirs(global_antdir , exist_ok = True) 

dirlist = os.listdir(global_dir)
for di in dirlist:
    print(di)

db_raw
db_mat
db_ant
annotations.txt
db_npy
db_model
db_dataset


## [ DEFINE PARAMS ]

In [None]:
# select source and destination dirs for data extraction
import shutil
from scipy.io import loadmat
from scipy.signal import medfilt

# === Define ECG Leads============================
# 6 Limb leads
std_I, std_II, std_III, std_AVL, std_AVR, std_AVF = 'I', 'II', 'III', 'AVL', 'AVR', 'AVF'
# 6 lateral leads
std_V1, std_V2, std_V3, std_V4, std_V5, std_V6, = 'V1', 'V2', 'V3', 'V4', 'V5', 'V6' 

# === Define ECG Lead Mapper Dict============================
g_LEAD_MAPPER = {}

# for incartdb
g_LEAD_MAPPER['I']=std_I
g_LEAD_MAPPER['II']=std_II
g_LEAD_MAPPER['III']=std_III
g_LEAD_MAPPER['AVL']=std_AVL
g_LEAD_MAPPER['AVR']=std_AVR
g_LEAD_MAPPER['AVF']=std_AVF
g_LEAD_MAPPER['V1']=std_V1
g_LEAD_MAPPER['V2']=std_V2
g_LEAD_MAPPER['V3']=std_V3
g_LEAD_MAPPER['V4']=std_V4
g_LEAD_MAPPER['V5']=std_V5
g_LEAD_MAPPER['V6']=std_V6

# for mitdb
g_LEAD_MAPPER['MLII']=std_II
g_LEAD_MAPPER['V5']=std_V5  #<----duplicate

# for svdb
g_LEAD_MAPPER['ECG1']=std_II
g_LEAD_MAPPER['ECG2']=std_V5 #<------------- gotta be sure about this

# === Define Filtering Params for Baseline fitting Leads======================
ms_flt_array = [0.2,0.6]    #<-- length of baseline fitting filters (in seconds)
mfa = np.zeros(len(ms_flt_array), dtype='int')

def get_median_filter_width(sampling_rate, duration):
    res = int( sampling_rate*duration )
    res += ((res%2) - 1) # needs to be an odd number
    return res

def filter_signal(X,mfltrs_array):
    X0 = X  #read orignal signal
    for mi in range(0,len(mfltrs_array)):
        X0 = medfilt(X0,mfltrs_array[mi]) # apply median filter one by one on top of each other
    X0 = np.subtract(X,X0)  # finally subtract from orignal signal
    return X0

# === For Identifing Valid annotations and seperating beat/non-beat annotations====
global_annot = os.path.join(global_dir, 'annotations.txt') 
print('ANNOTATIONS MAPPER:: '+ global_annot)
g_ant_map_data = np.loadtxt(global_annot, dtype='str',delimiter="\t")
g_ant_map={}
beat_indicator = 'b'
non_beat_indicator = 'n'
for a in g_ant_map_data:
    # a[0] =  # orignal pysionet label (char)
    # a[1] =  # beat/non-beat indicator (char)
    # a[2]  = # description (str)
    g_ant_map[a[0]]= a[1]
    print("'"+a[0]+'\t'+a[1]+'\t'+a[2])
g_ant_map_keys = g_ant_map.keys()

def get_ants_logger():  #<<--- this will be used to import data to excel sheet
    ants_logger = {}    #<<--- for counting total labels per record
    for i in g_ant_map_keys:
        ants_logger[i] = 0
    return ants_logger


ANNOTATIONS MAPPER:: /content/drive/My Drive/Masters/workdir/ecg_data/annotations.txt
'N	b	Normal beat
'L	b	Left bundle branch block beat
'R	b	Right bundle branch block beat
'B	b	Bundle branch block beat (unspecified)
'A	b	Atrial premature beat
'a	b	Aberrated atrial premature beat
'J	b	Nodal (junctional) premature beat
'S	b	Supraventricular premature or ectopic beat (atrial or nodal)
'V	b	Premature ventricular contraction
'r	b	R-on-T premature ventricular contraction
'F	b	Fusion of ventricular and normal beat
'e	b	Atrial escape beat
'j	b	Nodal (junctional) escape beat
'n	b	Supraventricular escape beat (atrial or nodal)
'E	b	Ventricular escape beat
'/	b	Paced beat
'f	b	Fusion of paced and normal beat
'Q	b	Unclassifiable 
'?	b	Beat not classified during learning
'[	n	Start of ventricular flutter/fibrillation
'!	n	Ventricular flutter wave
']	n	End of ventricular flutter/fibrillation
'x	n	Non-conducted P-wave (blocked APC)
'(	n	Waveform onset
')	n	Waveform end
'p	n	Peak of P-wave
't	n	Peak

## [ ~BUILD DATABASE ]

In [None]:
build_db_dict = {'mitdb':{},'svdb':{},'incartdb':{}} ##<--- for purpose of recording annotation count
build_db_list = build_db_dict.keys()
# === START PROCEDURE ===========================
timestamp_start = datetime.datetime.now()

for build_db_name in build_db_list:
    # check source mat sub-directory

    print('Working ',build_db_name)
    db_matdir = os.path.join(global_matdir, build_db_name + '_mat') 
    if not os.path.exists(db_matdir):
        print('ERROR: mat directory does not exsist. cannot proceed.')
        print('Not Found : '+ str(db_matdir))
    else:
        # mat sub-directory exists, proceed to preapare data
        print('DB_MAT DIR :: '+db_matdir)

        # prepare destination npy sub-directory
        db_npydir = os.path.join(global_npydir, build_db_name + '_npy') 
        os.makedirs(db_npydir , exist_ok = True) 
        print('DB_NPY DIR :: '+db_npydir)

        # place a copy of records file in dataset dir
        record_file_path = os.path.join(db_matdir,'RECORDS')
        shutil.copy2(record_file_path, db_npydir)

        # load record list
        reclist = np.loadtxt(record_file_path, dtype='str',delimiter="\n")
        xs=len(reclist)
        print('Records['+str(xs)+ ']::'+str(reclist))

        print('\nProcessing Records...\n')
        #------------------------------------------------------------------------------------------------
        # Process records
        #------------------------------------------------------------------------------------------------
        xc=0
        for rec in reclist:
            xc+=1
            print(str(xc)+'/'+str(xs)+' REC: '+str(rec))

            # prepare ant_logger to recording annotation count
            build_db_dict[build_db_name][rec] = get_ants_logger()

            #------------------------------------------------------------------------------------------------
            # >> Read Meta-data (Header File)
            #------------------------------------------------------------------------------------------------

            minfo_path = os.path.join(db_matdir,rec+'m.hea')
            f = open(minfo_path,'r')
            info = f.readlines()
            f.close()

            zero_line = info[0].split() # rec, noslead, srate, len_in_samples
            nos_leads = int( zero_line[1] ) #tottal number of leads available
            srate = int( zero_line[2] ) #sampling rate
            dlen = int( zero_line[3] ) # total sampling points, data length
            duration = dlen/srate # total duration in seconds
            print(' Header\t'+ str(zero_line))
            lead_info = {}
            for i in range(1, nos_leads+1):
                tIA = info[i].split()   # this should have 9 values      I67m.mat 16+192 623/mV 16 0 4441 1894 0 II
                #t_rec = tIA[0]         # record name:                   'I67m.mat'
                #t_wbit = tIA[1]        # written at and offset bits:    '16+192'
                t_gain = tIA[2]        # gain (amplitude resolution):   '623/mV'
                #t_adcr = tIA[3]        # ADC amplitude resolution:      '16'
                #t_base = tIA[4]        # baseline:                      '0'
                #t_fval = tIA[5]        # first_value:                   '4441'
                #t_chk = tIA[6]         # checksum:                      '1894'
                #t_blk = tIA[7]         # block_size:                    '0'
                t_lead = tIA[8]        # lead name:                      'II'

                if t_lead in g_LEAD_MAPPER.keys():
                    tIL = g_LEAD_MAPPER[t_lead]
                    lead_info[tIL] = [ int(i-1) ,int(t_gain.split('/')[0]) ]
                    # lead_type :: { [lead_index, manual_gain ] }
                else:
                    print('\tWARNING:: UNMAPPED LEAD '+ str(t_lead))

            
            if std_II in lead_info.keys(): #==== LEAD II signal Exists

                #------------------------------------------------------------------------------------------------
                # >> Prepare annotations
                #------------------------------------------------------------------------------------------------

                ants = np.loadtxt(os.path.join(db_matdir , rec + 'a.txt'), dtype='str', delimiter= '\n', skiprows = 1)
                antsbeat = np.zeros((0,2),dtype='str')
                antsresbeat = np.zeros((0,2),dtype='str')
                antsnonbeat = np.zeros((0,2),dtype='str')
                for i in range(0,len(ants)):
                    a_a = ants[i].split()
                    a_sample = a_a[1]
                    a_resample = int(round(int(a_sample)*BASIC_SRATE/srate))
                    a_label = a_a[2] 
                    if a_label in g_ant_map_keys:
                        build_db_dict[build_db_name][rec][a_label]+=1   #<<-increment by one, key should already exist
                        beat_type = g_ant_map[a_label]
                        if beat_type == beat_indicator: # is beat annot
                            antsbeat = np.vstack((antsbeat,np.array([a_sample,a_label]))) # sample,label str
                            antsresbeat = np.vstack((antsresbeat,np.array([a_resample,a_label]))) # re-sample,label str
                        else: # is non beat
                            antsnonbeat = np.vstack((antsnonbeat,np.array([a_sample,a_label]))) # sample,label str
                    else:
                        print('\tWARNING :: Unknown annotation "'+a_label+'" found at [' +a_sample+ '], skipping....')
                
                print(' Ants\tTotal:'+ str(len(ants)) + '; BA:'+str(len(antsbeat))+'; NBA:' + str(len(antsnonbeat)))
                # save for this record
                np.save(os.path.join(db_npydir , rec+ '_'+g_BA+'.npy'), antsbeat)
                np.save(os.path.join(db_npydir , rec+ '_'+g_RPEAK+'.npy'), antsresbeat)
                np.save(os.path.join(db_npydir , rec+ '_'+g_NBA+'.npy'), antsnonbeat)


                #------------------------------------------------------------------------------------------------
                # Process Signal Data
                #-----------------------------------------------------------------------------------------------

                # saving raw lead2 signal
                L2_index = lead_info[std_II][0]
                print(' Lead2\t' +  info[L2_index+1])
                mat_signal = loadmat(os.path.join( db_matdir , rec + 'm.mat'))['val'][L2_index]
                np.save(os.path.join(db_npydir , rec + '_'+g_RAW2+'.npy'), mat_signal ) 
                
                # baseline fitting by filtering
                for i in range(0, len(ms_flt_array)):
                    mfa[i] = get_median_filter_width(srate,ms_flt_array[i])
                blf_signal = filter_signal(mat_signal, mfa)
                np.save(os.path.join(db_npydir , rec + '_'+g_BLF2+'.npy'), blf_signal ) 

                # Resampling blf signal
                sr_ratio = BASIC_SRATE/srate
                res_signal = scsig.resample(blf_signal, round(len(blf_signal)*sr_ratio))
                np.save(os.path.join(db_npydir , rec + '_'+g_RES2+'.npy'), res_signal ) 

                # Remove manual gain
                L2_gain = lead_info[std_II][1]
                sig_signal = res_signal/L2_gain
                np.save(os.path.join(db_npydir , rec + '_'+g_SIG2+'.npy'), sig_signal ) 

            else: #==== LEAD II signal DOES NOT Exists
                print(' Lead2\tdata not found.... skip this record\n')


#---------------------------------------------------------------------------------------------------------------------------------------------
timestamp_dur = datetime.datetime.now() - timestamp_start
print('Elapsed time = ' + str(timestamp_dur))

Working  mitdb
DB_MAT DIR :: /content/drive/My Drive/Masters/workdir/ecg_data/db_mat/mitdb_mat
DB_NPY DIR :: /content/drive/My Drive/Masters/workdir/ecg_data/db_npy/mitdb_npy
Records[48]::['100' '101' '102' '103' '104' '105' '106' '107' '108' '109' '111' '112'
 '113' '114' '115' '116' '117' '118' '119' '121' '122' '123' '124' '200'
 '201' '202' '203' '205' '207' '208' '209' '210' '212' '213' '214' '215'
 '217' '219' '220' '221' '222' '223' '228' '230' '231' '232' '233' '234']

Processing Records...

1/48 REC: 100
 Header	['100m', '2', '360', '650000']
 Ants	Total:2274; BA:2273; NBA:1
 Lead2	100m.mat 16+192 200/mV 11 0 -29 27021 0 MLII

2/48 REC: 101
 Header	['101m', '2', '360', '650000']
 Ants	Total:1874; BA:1865; NBA:9
 Lead2	101m.mat 16+192 200/mV 11 0 -69 13448 0 MLII

3/48 REC: 102
 Header	['102m', '2', '360', '650000']
 Lead2	data not found.... skip this record

4/48 REC: 103
 Header	['103m', '2', '360', '650000']
 Ants	Total:2091; BA:2084; NBA:7
 Lead2	103m.mat 16+192 200/mV 11 0

## [ ANNOTATION COUNT ]

In [None]:
ord_ants = list(g_ant_map_keys)
heading = 'ECG_DB\tREC'
for ilabel in range(0,len(ord_ants)):
    heading += "\t'"+ord_ants[ilabel]
print(heading)

body = ''
for idb in build_db_dict.keys():
    for irec in build_db_dict[idb]:
        body=idb+'\t'+irec
        ants_log = build_db_dict[idb][irec]
        for ilabel in range(0,len(ord_ants)):
            body+='\t'+str(ants_log[ord_ants[ilabel]])
        print(body)

ECG_DB	REC	'N	'L	'R	'B	'A	'a	'J	'S	'V	'r	'F	'e	'j	'n	'E	'/	'f	'Q	'?	'[	'!	']	'x	'(	')	'p	't	'u	'`	''	'^	'|	'~	'+	's	'T	'*	'D	'=	'"	'@
mitdb	100	2239	0	0	0	33	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0
mitdb	101	1860	0	0	0	3	0	0	0	0	0	0	0	0	0	0	0	0	2	0	0	0	0	0	0	0	0	0	0	0	0	0	4	4	1	0	0	0	0	0	0	0
mitdb	102	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
mitdb	103	2082	0	0	0	2	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	6	1	0	0	0	0	0	0	0
mitdb	104	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
mitdb	105	2526	0	0	0	0	0	0	0	41	0	0	0	0	0	0	0	0	5	0	0	0	0	0	0	0	0	0	0	0	0	0	30	88	1	0	0	0	0	0	0	0
mitdb	106	1507	0	0	0	0	0	0	0	520	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	30	41	0	0	0	0	0	0	0
mitdb	107	0	0	0	0	0	0	0	0	59	0	0	0	0	0	0	2078	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	2	1	0	0	0	0	0	0	0
mitdb	108	1739	0	0	0	4	0	0	0	17	0	2	0	1	0	0	0	0	0	0	0	0	0	11	0	0	0	0	0	0	0	0	8	41	1	0	0	0	0	0	0	0
mitdb

# [ CLASS DEFINITIONS ]

In [None]:
#---------------------------------------------------------------------------------------------------------------------------------------------
# CLASS ecg_db : represents one ECG database
#---------------------------------------------------------------------------------------------------------------------------------------------
class ecg_db:
    def __init__(self, dbname,  tag_recs):
        print('\nInitailze new ecg database ... ')
        self.name = dbname  #str
        self.dir_npy = os.path.join(global_npydir , dbname+'_npy') #str
        self.recs = set(np.loadtxt(os.path.join(self.dir_npy,'RECORDS'), dtype='str',delimiter="\n")) #set
        self.recs_tag = set(tag_recs)
        self.recs_dict = {} # initially empty, will be loaded on demand using function 'get_record'
        self.info()

    def info(self):
        print( 'DB NAME :: '+ self.name)
        print( 'DATA DIR :: ' + self.dir_npy )
        print( 'RECORDS :: [' +str(len(self.recs))+'] ' + str(self.recs) )
        print( 'TAG RECORDS :: [' +str(len(self.recs_tag))+'] ' + str(self.recs_tag))
        return 0

    def get_record(self,rec):
        if not (rec in self.recs_dict.keys()):
            self.recs_dict[rec] = ecg_record(self,rec)
        return self.recs_dict[rec]
    
    def get_random_record(self, recset):
        rec = random.choice(list(recset))
        if not (rec in self.recs_dict.keys()):
            self.recs_dict[rec] = ecg_record(self,rec)
        return self.recs_dict[rec]

#---------------------------------------------------------------------------------------------------------------------------------------------

#---------------------------------------------------------------------------------------------------------------------------------------------
# CLASS ecg_record : represents one ECG Record in any database
#---------------------------------------------------------------------------------------------------------------------------------------------
g_SUPRESS_DATA_WARNING=False
class ecg_record:

    def __init__(self, db, recname):
        self.db = db                                # class:{ecg_db}    object this record belongs to
        self.rec = recname                          # string            name of this record
        self.name = db.name + '_'+ recname          # string            full name including db.name
        if not recname in db.recs:
            print('WARNING:: Record "'+ recname +'" not found in database '+ db.name )
        self.data_npy = {}                          # dict dict of data file content used in self.read_data_npy('key')
        self.data_temp = {}                          # dict dict of data file content used in self.read_data_temp('key')
        self.binfo = None                           # class binfo       

##<------------------------------------------------- get instance of binfo class
    def read_binfo(self):
        if self.binfo == None:
            self.binfo = ecg_binfo(self)
        return self.binfo

    def refresh_binfo(self):
        self.binfo = ecg_binfo(self)
        return self.binfo

##<------------------------------------------------- data reading for npydir
    def load_data(self, data_type):
        ipath = os.path.join(self.db.dir_npy, self.rec + '_'+data_type+'.npy')
        try: # try to load this data
            self.data_npy[data_type] = np.load(ipath) # adds this to dictionary so next time it can read
            return self.data_npy[data_type] #= np.load(self.dirs[s])
        except:
            if g_SUPRESS_DATA_WARNING == False:
                print('WARNING:: Cant load "'+data_type+ '" file at '+ str(ipath) )
            return np.array([])
        
    def read_data(self, data_type):
        if data_type in self.data_npy.keys():
            return self.data_npy[data_type] #= np.load(self.dirs[s])
        else:
            return self.load_data(data_type)

##<------------------------------------------------- for tempdir
    def load_data_temp(self, data_type, dir_path):
        ipath = os.path.join(dir_path, self.rec + '_'+data_type+'.npy')
        try: # try to load this data
            self.data_temp[data_type] = np.load(ipath) # adds this to dictionary so next time it can read
            return self.data_temp[data_type] #= np.load(self.dirs[s])
        except:
            if g_SUPRESS_DATA_WARNING == False:
                print('WARNING:: Cant load "'+data_type+ '" file at '+ str(ipath) )
            return np.array([])
        
    def read_data_temp(self, data_type, dir_path):
        if data_type in self.data_temp.keys():
            return self.data_temp[data_type] #= np.load(self.dirs[s])
        else:
            return self.load_data_temp(data_type, dir_path)

    def save_data_temp(self, data_type, data_array, dir_path):
        ipath = os.path.join(dir_path, self.rec + '_'+data_type+'.npy')
        np.save(ipath, data_array)
        return ipath

    def del_data_temp(self, data_type, dir_path, vb):
        ipath = os.path.join(dir_path, self.rec + '_'+data_type+'.npy')
        if os.path.exists(ipath):
            if vb:
                print('Removing: '+str(ipath))
            os.remove(ipath)
            return 1
        else:
            return 0
#------------------------------------------------------------------------------------------------

#---------------------------------------------------------------------------------------------------------------------------------------------
# CLASS ecg_binfo : information about beats in a record
#---------------------------------------------------------------------------------------------------------------------------------------------
class ecg_binfo:
    def __init__(self, rec):
         
        # the record object
        self.rec = rec
        
        # read orignal annotations
        r_peaks_ants = rec.read_data(g_RPEAK)       # resampled ant file
        
        # calculate count of R peaks (excluding first and last)
        self.rp_count = len(r_peaks_ants) - 2
        
        # Extract Location and Labels of Peaks (exclude first and last beat)
        r_peaks_int = r_peaks_ants[:,0].astype('int')
        r_ants_str = r_peaks_ants[:,1]
        
        #self.rp_first = r_peaks_int_raw[0] # = self.rp_prev[0]
        #self.rp_last = r_peaks_int_raw[-1] # = self.rp_next[-1]
        
        # Location
        self.rp_curr = r_peaks_int[1:-1]    # current R peak
        self.rp_prev = r_peaks_int[0:-2]    # previous R peak (in samples)
        self.rp_next = r_peaks_int[2:]      # next R peak (in samples)
        
        # Label
        self.rl_curr = r_ants_str[1:-1]
        self.rl_prev = r_ants_str[0:-2]
        self.rl_next = r_ants_str[2:]

        # mapped Label
        self.rli_prev = []
        self.rli_curr = []
        self.rli_next = []

        # calculate temporal info
        self.rp_sec = self.rp_curr / BASIC_SRATE                 # peak location (in sec)
        self.rri_prev = (self.rp_curr - self.rp_prev) / BASIC_SRATE   # prev RRI (in sec) 
        self.rri_next = (self.rp_next - self.rp_curr) / BASIC_SRATE   # next RRI (in sec) 
        self.rri_delta = (self.rri_next - self.rri_prev)              # difference b/w prev and next RRI (in sec) 
        self.rri_dur = (self.rri_next + self.rri_prev)                # total duration from prev to next R-peak
        

    def get_signal_data_var(self, ith_peak): # data_type = g_SIG_II_POSTFIX
        # prev peak to next peak
        sel_sig = self.rec.read_data(g_SIG2) 
        ff = self.rp_prev[ith_peak]
        tt = self.rp_next[ith_peak]
        pp = self.rp_curr[ith_peak]
        return sel_sig[ff:tt+1], (pp-ff), (tt+1-ff) #<- also return position of peak
    
    def get_signal_data_fix(self, ith_peak, v_left_sec, v_right_sec): # data_type = g_SIG_II_POSTFIX
        return self.get_signal_data_fix_samples(ith_peak,int(v_left_sec*BASIC_SRATE),int(v_right_sec*BASIC_SRATE))

    def get_signal_data_fix_samples(self, ith_peak, v_left, v_right): # data_type = g_SIG_II_POSTFIX
        sel_sig = self.rec.read_data(g_SIG2) 
        ff = self.rp_curr[ith_peak]-v_left
        tt = self.rp_curr[ith_peak]+v_right
        pp = self.rp_curr[ith_peak]

        f_pad,t_pad=0,0
        if ff<0:
            f_pad=0-ff
            ff=0

        if tt>len(sel_sig):
            tpad=tt-len(sel_sig)
            tt=len(sel_sig)

        sel_part = np.hstack((
            np.zeros(f_pad),
            sel_sig[ff:tt],
            np.zeros(t_pad),
            ))

        pl = pp+f_pad
        return sel_part, pl #<- also return position of peak

    def get_local_hrT(self,local_window_start,local_window_end): # within a time duration
        lws = local_window_start*BASIC_SRATE # in samples        
        lwe = local_window_end*BASIC_SRATE # in samples        
        #ff and tt should be within signal limits
        # if not in limits then take shortest : means truncate lw duration
        ff = max( lws ,self.rp_prev[0])
        tt = min( lwe ,self.rp_next[-1])
        dd = (tt-ff)/BASIC_SRATE
        qq = np.where((self.rp_curr>=ff) & (self.rp_curr<=tt))[0] #  these many peaks in dd sec
        nq = len(qq)# qq must be at least 2 peaks
        # if qq peaks in dd secs then heart rate = (qq/dd) bps =  (qq/dd)*60 bpm
        if nq<2:  
             dd=0
             local_bps = 0
        else:
             ff = self.rp_curr[qq[0]]
             tt = self.rp_curr[qq[-1]]
             dd = (tt-ff)/BASIC_SRATE
             local_bps = (nq-1)/dd #bps
        
        return local_bps, dd
   
     
    def get_local_hr(self,ith_peak, local_window_left,local_window_right): # within local duration of ith peak
        lwl = local_window_left*BASIC_SRATE # in samples        
        lwr = local_window_right*BASIC_SRATE # in samples        
        #ff and tt should be within signal limits
        # if not in limits then take shortest : means truncate lw duration
        ff = max(self.rp_curr[ith_peak] - lwl ,self.rp_prev[0])
        tt = min(self.rp_curr[ith_peak] + lwr ,self.rp_next[-1])
        
        qq = np.where((self.rp_curr>=ff) & (self.rp_curr<=tt))[0] #  these many peaks in dd sec
        nq = len(qq)# qq must be at least 2 peaks
        if nq<2:  
             return 0, 0
        else:
             ff = self.rp_curr[qq[0]]
             tt = self.rp_curr[qq[-1]]
             dd = (tt-ff)/BASIC_SRATE
        # if nq peaks in dd secs then heart rate = (nq/dd) bps =  (nq/dd)*60 bpm
             local_bps = (nq-1)/dd #bps
             return local_bps, dd

    def get_local_hrA(self, local_window_left,local_window_right): # within local duration of all peaks
        lwl = local_window_left*BASIC_SRATE # in samples        
        lwr = local_window_right*BASIC_SRATE # in samples       
        #ff and tt should be within signal limits
        # if not in limits then take shortest : means truncate lw duration
        local_bps = np.zeros(self.rp_count,dtype='float')
        local_dd = np.zeros(self.rp_count,dtype='float')
        for ith_peak in range(0, self.rp_count):
             ff = max(self.rp_curr[ith_peak] - lwl ,self.rp_prev[0])
             tt = min(self.rp_curr[ith_peak] + lwr ,self.rp_next[-1])
             qq = np.where((self.rp_curr>=ff) & (self.rp_curr<=tt))[0] #  these many peaks in dd sec
             nq = len(qq) # qq must be at least 2 peaks
             if nq<2:  
                  local_bps[ith_peak] = 0 #bps
                  local_dd[ith_peak] = 0 #bps
             else:
                  ff = self.rp_curr[qq[0]]
                  tt = self.rp_curr[qq[-1]]
                  dd = (tt-ff)/BASIC_SRATE
                  local_bps[ith_peak] = (nq-1)/dd #bps
                  local_dd[ith_peak] = dd #bps
        return local_bps, local_dd
    
    def map_ants2int(self,map_dict):
        if len(self.rli_curr)!=self.rp_count:
            temp = np.zeros(self.rp_count+2,dtype='str')
            temp[0] =  map_dict[self.rl_prev[0]]
            for i in range(0, self.rp_count):
                temp[i+1] = map_dict[self.rl_curr[i]]
            temp[-1] =  map_dict[self.rl_next[-1]]
            self.rli_curr = temp[1:-1]
            self.rli_prev = temp[0:-2]
            self.rli_next = temp[2:]




# [ BUILD STANDARD DBs ]

In [None]:
print('Buidling standard databases')
#------------------------------------------------------------------------
std_mitdb = ecg_db('mitdb', [])
#------------------------------------------------------------------------
std_svdb = ecg_db('svdb', [])
#------------------------------------------------------------------------
std_incartdb= ecg_db('incartdb', [])
#------------------------------------------------------------------------

Buidling standard databases

Initailze new ecg database ... 
DB NAME :: mitdb
DATA DIR :: /content/drive/My Drive/Masters/workdir/ecg_data/db_npy/mitdb_npy
RECORDS :: [48] {'122', '101', '214', '222', '231', '233', '118', '108', '228', '109', '213', '121', '117', '202', '212', '123', '208', '223', '203', '217', '232', '114', '104', '230', '205', '124', '201', '105', '215', '102', '219', '119', '103', '107', '234', '207', '116', '209', '100', '220', '106', '115', '111', '210', '112', '221', '113', '200'}
TAG RECORDS :: [0] set()

Initailze new ecg database ... 
DB NAME :: svdb
DATA DIR :: /content/drive/My Drive/Masters/workdir/ecg_data/db_npy/svdb_npy
RECORDS :: [78] {'848', '894', '841', '823', '887', '871', '812', '800', '826', '840', '803', '858', '889', '892', '801', '893', '827', '861', '846', '850', '810', '870', '877', '880', '890', '828', '878', '807', '859', '805', '883', '822', '811', '862', '849', '872', '847', '809', '804', '806', '884', '855', '885', '844', '868', '869', '

# [Model Performance Measuers]

In [None]:
#=========================================================================================================================
#======================= NEURAL NETWORK PERFORMANCE MEASURES
#=========================================================================================================================
# 3.3 :: define performance evaluation functions

def get_performance(conf_matrix):
    #how many classes? = len of conf_matril
    nos_class = len(conf_matrix[0,:]) # len of 0th row
    res = np.zeros((0,8),dtype ='float64')
    for i in range(0,nos_class):
        # for each class calculate 4 performance measure - ACC, PRE, SEN, SPF, 
        # first compute TP, TN, FP, FN
        TP = conf_matrix[i,i]
        FP = np.sum(conf_matrix[:,i]) - TP
        FN = np.sum(conf_matrix[i,:]) - TP
        TN = np.sum(conf_matrix) - FN - FP - TP

        ACC = (TP+TN)   /   (TP+FP+FN+TN)
        PRE = (TP)      /   (TP+FP)
        SEN = (TP)      /   (TP+FN)
        SPF = (TN)      /   (TN+FP)

        res_i = np.array([TP, FN, FP, TN, ACC, PRE, SEN, SPF])
        res = np.vstack((res,res_i))
    return res


#------------------------------------------------------------------PRINTING

def print_lstr(class_labels):
    g_LSTR=''   # HEADER ROW for printing confusing matrix
    for i in range(0,len(class_labels)):
        g_LSTR+='\t'+str(class_labels[i])
    return  g_LSTR

def print_cf_row(cf_row,nos_labels):
    res = ''
    for j in range(0,nos_labels):
        res += '\t'+ str(cf_row[j])
    return res
def print_conf_matrix(conf_matrix, suffix, class_labels):
    res=(suffix+'A\\P' + print_lstr(class_labels)+'\n')
    nos_l=len(class_labels)
    for i in range(0,nos_l):
        res+=(suffix+str(class_labels[i]) + print_cf_row(conf_matrix[i],nos_l )+'\n')
    return res
def print_performance(perf_measures, class_labels):
    nos_class = len(perf_measures[:,0])
    print('Performance for '+str(nos_class)+' classes')
    print ('Class\tACC\tPRE\tSEN\tSPF')
    for i in range(0, nos_class):
        perf_i = np.round(perf_measures [i,:],2)
        #print('\tT.P : '+str(perf_i[0])+'\tF.N : '+str(perf_i[1]))
        #print('\tF.P : '+str(perf_i[2])+'\tT.N : '+str(perf_i[3]))
        print(str(class_labels[i])+'\t'+str(perf_i[4])+'\t'+str(perf_i[5])+'\t'+str(perf_i[6])+'\t'+str(perf_i[7]))
    return
#------------------------------------------------------------------

def plot_ecg_segment(signal_info, signal_array, fsec, tsec, x_scale, y_scale, y_low, y_high, mticks_pos, show_rris, predx, predy, a_color, gain=1):
    # plot signal segments
    #<<---------------------------------------------Select ECG Segment
    dsec = tsec - fsec
    print(signal_info.rec.name)
    if len(signal_array)==0:
        print('WARNING::Signal was not loaded.')
        return 0
    else:
        ff = int(fsec * BASIC_SRATE)
        tt = int(tsec * BASIC_SRATE)
        dd = tt - ff

        bps = signal_array[ff:tt] * gain  # signal data * gain

        lim_query = np.where((signal_info.rp_sec >= fsec) & (signal_info.rp_sec < tsec))[0]

        dticks = signal_info.rp_curr[lim_query]-ff  # tick position
        nos_ticks = len(dticks)

        dlabels = signal_info.rl_curr[lim_query]    # orignal labels
        dilabels = signal_info.rli_curr[lim_query]  # mapped labels
        ditruth = predy[lim_query]
        dpredx = predx[lim_query]
        diPred = dpredx.argmax(axis=1)
        diPredCol = np.zeros(nos_ticks, dtype='U15') 
        for i in range(0,nos_ticks):
            if diPred[i]==0:
                diPredCol[i]= 'tab:green'      
            else:
                diPredCol[i]= a_color  
        dicolors = np.zeros(nos_ticks, dtype='U15') # get color repesentation
        for i in range(0,nos_ticks):
            dicolors[i]= g_STD_LABELS[dilabels[i]]

        print('Time Interval{'+str(dsec)+'s}:['+str(fsec)+':'+str(tsec)+']')
        if nos_ticks > 0:
            print('Beat Interval{'+str(nos_ticks)+'#}:['+str(lim_query[0])+':'+str(lim_query[-1])+']')
        else:
            print('Beat Interval{'+str(nos_ticks)+'#}')

        # prepare figure: predictions
        plt.figure('ecg predictions', figsize = (dsec*x_scale ,(y_high-y_low) * y_scale) )
        plt.xlim(0, len(bps))
        plt.ylim(-0.5,1.1)
        plt.yticks([])
        plt.xticks(dticks,dlabels)
        plt.grid(axis='x')
        plt.hlines(0,0,len(bps), linewidth=0.3)
        plt.hlines(0.5,0,len(bps), linewidth=0.3)
        plt.hlines(1,0,len(bps), linewidth=0.3)
        plt.scatter(dticks,np.zeros(nos_ticks)-0.40,marker='s',color=dicolors)
        plt.scatter(dticks,np.zeros(nos_ticks)-0.20,marker='o',color=diPredCol)

        pred_str = np.zeros(len(diPred))
        for i in range(0,len(diPred)):
            pred_str[i]=dpredx[i][diPred[i]]
            
        plt.scatter(dticks,pred_str,marker='.',color='black')

        # where predy is 0 and predx is 1
        d_aN_pA = dticks[np.where((ditruth==0)&(diPred==1))[0]]
        plt.scatter(d_aN_pA,np.zeros(len(d_aN_pA)),marker='x',color='tab:green')

        d_aA_pN = dticks[np.where((ditruth==1)&(diPred==0))[0]]
        plt.scatter(d_aA_pN,np.zeros(len(d_aA_pN)),marker='x',color='tab:red')

        #plt.scatter(dticks,dpredx[:,0],marker='x',color='tab:green')
        #plt.scatter(dticks,dpredx[:,1],marker='x',color=a_color)

        plt.tight_layout()
        plt.show()
        

        # prepare figure: signal
        plt.figure('ecg signal', figsize = (dsec*x_scale ,(y_high-y_low) * y_scale) )
        plt.xlim(0, len(bps))
        plt.ylim(y_low,y_high)
        plt.yticks([])
        plt.xticks(dticks,dlabels)
        #x_grid = np.arange(0,tt-ff, 1*BASIC_SRATE)
        #plt.xticks(x_grid)
        plt.grid(axis='x')
        # plot signal and baseline
        plt.plot(bps, linewidth=0.5, color='black')
        plt.hlines(0,0,len(bps), linewidth=0.3)
        # plot mapped labels
        plt.scatter(dticks,np.zeros(nos_ticks)+mticks_pos,marker='s',color=dicolors)

        # finalize
        plt.tight_layout()
        plt.show()
        
        if show_rris:
             ddur = signal_info.rri_dur[lim_query]       # duration
             ddel = np.absolute(signal_info.rri_delta[lim_query] )      # delta rri
             # prepare figure: rri,delta rri
             my_low, my_high = -0.1, 3.5
             plt.figure('ecg meta', figsize = (dsec*x_scale ,(my_high-my_low) * 1.5*y_scale) )
             plt.xlim(0, len(bps))
             plt.ylim(my_low,my_high)
             plt.yticks([])
             plt.xticks(dticks,dlabels)
             #x_grid = np.arange(0,tt-ff, 1*BASIC_SRATE)
             #plt.xticks(x_grid)
             plt.grid(axis='x')
     
             # plot grid and baseline
             plt.hlines(0,0,len(bps), linewidth=0.3,color='red')
             #for j in [0.5,1,1.5,2,2.5,3]:
             #    plt.hlines(j,0,len(bps), linewidth=0.3,color='black')
     
             # plot mapped labels
             plt.scatter(dticks,ddur,marker='s',color=dicolors)
             plt.scatter(dticks,ddel,marker='o',color='tab:purple')
             plt.plot(dticks,ddur,color='black',linewidth=0.5,linestyle='dotted')
             plt.plot(dticks,ddel,color='black',linewidth=0.5,linestyle='dotted')
             # finalize
             plt.tight_layout()
             plt.show()

        return bps,dticks,dlabels





---

END OF SHARED SECTION

---



# [EXP DATA DICT]

In [None]:
mitdb_ex = set([
            '102','104','107','217', # paced
            '207',   # VFlutter
            '212', '231',   # both N and BBB
            '108', # bad signal
            '202','203' # bad labeling
            ])
svdb_ex = set([])
incartdb_ex = set([])

#<<--------------------------------------------
std_mitdb.recs_tag = set.difference(std_mitdb.recs, mitdb_ex)
std_svdb.recs_tag = set.difference(std_svdb.recs, svdb_ex)
std_incartdb.recs_tag = set.difference(std_incartdb.recs, incartdb_ex)

#<<--------------------------------------------
std_db_msi = {}
std_db_msi['mitdb']=std_mitdb
std_db_msi['svdb']=std_svdb
std_db_msi['incartdb']=std_incartdb

#<<--------------------------------------------
std_db_ms = {}
std_db_ms['mitdb']=std_mitdb
std_db_ms['svdb']=std_svdb

#<<--------------------------------------------
std_db_mi = {}
std_db_mi['mitdb']=std_mitdb
std_db_mi['incartdb']=std_incartdb

#<<--------------------------------------------
std_db_si = {}
std_db_si['svdb']=std_svdb
std_db_si['incartdb']=std_incartdb

#<<--------------------------------------------
std_db_m = {}
std_db_m['mitdb']=std_mitdb

#<<--------------------------------------------
std_db_s = {}
std_db_s['svdb']=std_svdb

#<<--------------------------------------------
std_db_i = {}
std_db_i['incartdb']=std_incartdb




# [ VIEW ANNOTATION MAPPERS ]

In [None]:
ls_ants = os.listdir(global_antdir)
ls_ants=np.sort(ls_ants)
print('Available annotation files ['+str(len(ls_ants))+']')
for ls_ant in ls_ants:
    print(ls_ant)
print('--------------------------')

Available annotation files [6]
default_labels.txt
default_map.txt
nsv_labels.txt
nsv_map.txt
nsvf_labels.txt
nsvf_map.txt
--------------------------


# [ MAP ANNOTATIONS ]

In [None]:
# standard labels and mappings default_labels
sel_labels = os.path.join(global_antdir, 'nsv_labels.txt') 
sel_map = os.path.join(global_antdir, 'nsv_map.txt') 

# ----------------------------------------------------------------------
# ------ load standard labels ------------------------------------------
# ----------------------------------------------------------------------
sel_labels_data = np.loadtxt(sel_labels, dtype='str',delimiter="\t")
g_STD_LABELS={}
print('\nStandard Labels::')
for a in sel_labels_data:
    # a[0] =  # standard label (char)
    # a[1] =  # mapped color (str)
    # a[2]  = # description (str)
    g_STD_LABELS[a[0]]= a[1]
    print(a[0]+'\t'+a[1]+'\t'+a[2])

# ----------------------------------------------------------------------
# ------ load mapping data ---------------------------------------------
# ----------------------------------------------------------------------
ant_map_data = np.loadtxt(sel_map, dtype='str',delimiter="\t")
g_STD_NO_MAP = '_'
g_STD_LABELS[g_STD_NO_MAP]='black'
g_STD_MAP={}
print('\nMapping::')
for a in ant_map_data:
    # a[0] =  # orignal pysionet label (char)
    # a[1] =  # mapped standard label (char)
    # a[2]  = # description (str)
    g_STD_MAP[a[0]]= a[1] ##<<----------------mapping dictionary
    print(a[0]+'\t'+a[1]+'\t'+a[2])
print('\n',g_STD_MAP.keys())

#<<--------------------------------------------
for idb in std_db_msi.keys():
    sel_db = std_db_msi[idb]
    for irec in sel_db.recs_tag:
        sel_rec = sel_db.get_record(irec)
        sel_info = sel_rec.read_binfo()
        sel_info.map_ants2int(g_STD_MAP)


Standard Labels::
N	green	Normal
S	red	Supraventricular Premature
V	blue	Ventricular Premature

Mapping::
N	N	Normal beat
L	N	Left bundle branch block beat
R	N	Right bundle branch block beat
B	N	Bundle branch block beat (unspecified)
A	S	Atrial premature beat
a	S	Aberrated atrial premature beat
J	S	Nodal (junctional) premature beat
S	S	Supraventricular premature or ectopic beat (atrial or nodal)
V	V	Premature ventricular contraction
r	V	R-on-T premature ventricular contraction
F	V	Fusion of ventricular and normal beat
e	_	Atrial escape beat
j	_	Nodal (junctional) escape beat
n	_	Supraventricular escape beat (atrial or nodal)
E	_	Ventricular escape beat
/	_	Paced beat
f	_	Fusion of paced and normal beat
Q	_	Unclassifiable 
?	_	Beat not classified during learning
[	_	Start of ventricular flutter/fibrillation
!	_	Ventricular flutter wave
]	_	End of ventricular flutter/fibrillation
x	_	Non-conducted P-wave (blocked APC)
(	_	Waveform onset
)	_	Waveform end
p	_	Peak of P-wave
t	_	Peak of T-