Outline:

~4,000,000 for training set + ~80,000 for validate set(no augment)

1. Annotation:

   1)  Read through ann.txt. For each R wave point, store its position to the corresponding set (R/A/V). If type is X, skip.
   (annProcess)


2. Sampling:

   1) For each record in lydhdb, choose 18 segment(768 points). For each segment, using a sliding window(512 width) with 8 points step to obtain 256/ 8 = 32 samples. And create label for each sample.
   
   2) Obtain period with tiny QRS and period with heart block. Create label.
   
   3) Obtain true noise from each record. Create label
   
   4) Obtain regular noise(sine/square/triangle) with frequency ranging from 1 to 10 and different phases. Create label.


3. Data Augmentation: 

   1) Segment 5 period(512 points) from em and ma noise record seperatively. Apply 2 different coefficients to the 10 segments -> obtain 5*2*2 = 20 noise segment in total. (noise_segment)


4. Wavelet decomposition + Store as TFRecord
    
   1) Add each noise segment to each sampled sequence.

In [None]:
import os, math, time
import numpy as np
import tensorflow as tf
from Stationary_transform import *
from sklearn.model_selection import train_test_split

### Annotation Preprocessing

In [None]:
'''preprocess annotation file; 
   create set R: contains all R waves
          set A: contains all atrial premature beats
          set V: contains all ventricular premature beats'''

def annProcess(path):
    
    with open(path+'/ann.txt','r') as f: # read annotation
        lines = f.readlines()
    
    R = set() # output initialize
    A = set()
    V = set()
    
    ecg = np.fromfile(path+'/ecg.dat', '>i2') # get ecg length
    ecg_len = len(ecg)
    
    for line in lines: # add each R wave to corresponding set
        temp = line.split(',')
        pos,typ = temp[0],temp[1]
        if typ == 'X':
            continue
        pos = round(int(pos)/1000*256) # convert time(ms) to index
        R.add(pos) # R contains all type R waves (except X)
        if typ =='A': # A contains all atrial premature beat
            A.add(pos)
        elif typ =='V': # V contains all ventricular premature beat
            V.add(pos)
    
    return R,A,V

### Label Creator Helper

In [None]:
'''create label for samples containing R waves
*** position in the label is the relative position to the grid with range from [0,1]
'''

def create_normal_label(sample, R):
    label = np.empty((8,2),'float32') # label size (8,2) -> 8 grid. prob and pos x for each grid
    for i in range(8):
        found = False
        for j in range(64): # loop through each point. Search point in R. If is R, label the grid with [1, relative pos]
            pos = 64*i+j+sample[-1]-256
            if pos in R:
                label[i] = [1,j/64]
                found = True
        if not found: # if no R in the grid, label the grid with [0,-1]. -> -1 = no meaning
            label[i] = [0,-1]
    return label


'''create label for true noise or regular noise samples'''

def create_noise_label():
    label = np.empty((8,2),'int')
    label[:,0], label[:,1] = 0,-1   
    return label

### Sampling period with normal R waves

In [None]:
'''Check if at least 1 R wave present in the period'''
def validPeriod(n, R):
    for i in range(256):
        if n-i in R or n+i in R:
            return True
    return False

'''sampling period without noise'''
def sampling_normal(R,record,train_size,val_size,ecg):
    
    x_train,y_train = np.empty((train_size,514),'float32'),np.empty((train_size,8,2),'float32') # initialize output
    x_val,y_val  = np.empty((val_size,514),'float32'),np.empty((val_size,8,2),'float32')
    
    selected,count,times = set(),0,train_size//32
    
    np.random.seed(40)            # Get sample for train set
    seg_len = len(ecg)//times
    for i in range(0,len(ecg),seg_len):
        n = np.random.randint(i,i+seg_len) # choose a point in the segment, slide from n with step 8
        for j in range(32):
            if validPeriod(n,R):
                sample = np.array([ecg[n-256:n+256]])
                x = np.empty((514),'float32')
                x[:512] = sample
                x[-2:] = [record,n]
                x_train[count] = x
                y_train[count] = create_normal_label(x,R)
                count += 1
                selected.add(n)
            n += 8
            
    np.random.seed(40) # Get sample for val set
    count = 0
    while count<val_size:
        n = np.random.randint(256,len(ecg)-256) 
        if n not in selected and validPeriod(n,R):
            x = np.empty((514),'float32')
            x[:512] = np.array([ecg[n-256:n+256]])
            x[-2:] = [record,n]
            x_val[count] = x
            y_val[count] = create_normal_label(x,R)
            count+=1
        selected.add(n)

    return x_train, x_val, y_train, y_val 

### Sampling Premature Beat Period

In [None]:
'''sampling period with A'''

def sampling_A(R,A,record,train_size,val_size,ecg):
    
    x_train,y_train = np.empty((train_size,514),'float32'),np.empty((train_size,8,2),'float32') # initialize output
    x_val,y_val  = np.empty((val_size,514),'float32'),np.empty((val_size,8,2),'float32')
    
    selected, count= set(), 0 # Get sample for train set
    np.random.seed(40) 
    A = np.fromiter(A, 'int')
    while count < train_size:
        n = random.choice(A)
        if n not in selected:
            for i in range(n-256-128,n-256,8): # 16 windows
                x = np.empty((514),'float32')
                x[:512] = np.array([ecg[i:i+512]])
                x[-2:] = [record,i+256]
                x_train[count] = x
                y_train[count] = create_normal_label(x,R)
                count+=1
            selected.add(n)
    
    count = 0
    while count<val_size:  # Get sample for val set
        n = random.choice(A)
        if n not in selected:
            for i in range(n-256-128,n-256,16): #8 windows
                x = np.empty((514),'float32')
                x[:512] = np.array([ecg[i:i+512]])
                x[-2:] = [record,i+256]
                x_val[count] = x
                y_val[count] = create_normal_label(x,R)
                count+=1
            selected.add(n)
    
    return x_train, x_val, y_train, y_val

In [None]:
'''sampling period with V'''

def sampling_V(R,V,record,train_size,val_size,ecg):
    
    x_train,y_train = np.empty((train_size,514),'float32'),np.empty((train_size,8,2),'float32') # initialize output
    x_val,y_val  = np.empty((val_size,514),'float32'),np.empty((val_size,8,2),'float32')
    
    selected,count = set(),0
    np.random.seed(40)
    V = np.fromiter(V, 'int')
    while count<train_size:  # Get sample for train set
        n = random.choice(V)
        if n not in selected:
            for i in range(n-256-128,n-128,8): # 32 windows
                x = np.empty((514),'float32')
                x[:512] = np.array([ecg[i:i+512]])
                x[-2:] = [record,i+256]
                x_train[count] = x
                y_train[count] = create_normal_label(x,R)
                count+=1
            selected.add(n)
            
    count = 0
    while count<val_size:  # Get sample for val set
        n = random.choice(V)
        if n not in selected:
            for i in range(n-256-128,n-256,16): # 8 windows
                x = np.empty((514),'float32')
                x[:512] = np.array([ecg[i:i+512]])
                x[-2:] = [record,i+256]
                x_val[count] = x
                y_val[count] = create_normal_label(x,R)
                count+=1
            selected.add(n)
    
    return x_train, x_val, y_train, y_val

### Sampling Heart Block Period

In [None]:
def sampling_block(R,record,train_size,val_size,ecg):
    
    x_train,y_train = np.empty((train_size,514),'float32'),np.empty((train_size,8,2),'float32') # initialize output
    x_val,y_val  = np.empty((val_size,514),'float32'),np.empty((val_size,8,2),'float32')
    
    selected,count,times = set(),0,train_size//32
    
    np.random.seed(40)            # Get sample for train set
    seg_len = len(ecg)//times
    for i in range(0,len(ecg),seg_len):
        n = np.random.randint(i,i+seg_len) # choose a point in the segment, slide from n with step 8
        for j in range(32):
            if validPeriod(n,R):
                x = np.empty((514),'float32')
                x[:512] = np.array([ecg[n-256:n+256]])
                x[-2:] = [record,n]
                x_train[count] = x
                y_train[count] = create_normal_label(x,R)
                count += 1
            selected.add(n)
            n += 8
            
    np.random.seed(40)            # Get sample for val set
    count = 0
    while count<val_size:
        n = np.random.randint(256,len(ecg)-256) 
        if n not in selected and validPeriod(n,R):
            x = np.empty((514),'float32')
            x[:512] = np.array([ecg[n-256:n+256]])
            x[-2:] = [record,n]
            x_val[count] = x
            y_val[count] = create_normal_label(x,R)
            count+=1
        selected.add(n)
    return x_train, x_val, y_train, y_val

### Sampling Tiny Wave

In [None]:
def get_boundary(boundary, t0):
    sample_boundary = np.empty((len(boundary),2),'int')

    for i,period in enumerate(boundary):
        start,end = period[0],period[1]
        start = time.strptime(start,'%Y-%m-%d %H:%M:%S')
        end = time.strptime(end, '%Y-%m-%d %H:%M:%S')
        start,end = time.mktime(start),time.mktime(end)
        start,end = int((start-t0)*256), int((end-t0)*256)
        sample_boundary[i] = [start,end]
    return sample_boundary

In [None]:
def sampling_tiny(R,sample_boundary,record,train_size,val_size,ecg):

    data_x,data_y = np.empty((train_size+val_size,514),'float32'),np.empty((train_size+val_size,8,2),'float32') 
    
    count = 0
    for boundary in sample_boundary:
        start,end = boundary[0],boundary[1]
        for n in range(start+256,end-256,4):
            x = np.empty((514),'float32')
            x[:512] = np.array([ecg[n-256:n+256]])
            x[-2:] = [record,n]
            data_x[count] = x
            data_y[count] = create_normal_label(x,R)
            count+=1
            
    data_x,data_y = np.copy(data_x[:count]),np.copy(data_y[:count]) # make copy to avoid memory leak
            
    return data_x,data_y

### Sampling True Noise

In [None]:
'''sampling true noise from each record'''

def sampling_noise(R,record,ecg): 
    
    R = sorted(list(R))
    data_x,data_y = np.empty((150000,514),'float32'),np.empty((150000,8,2),'int') # initialize output
    # 150000 is an abitrary large number. It is used cuz exact number of noise in the record is unknown
    
    count = 0
    
    for i, loc in enumerate(R[1:]):
        dis=loc-R[i]
        if dis>1064 and count<150000: #1064 considered +-20 of the R peak
            for n in range(R[i]+257,loc-257,25):
                x = np.empty((514),'float32')
                x[:512] = np.array([ecg[n-256:n+256]])
                x[-2:] = [record,n]
                data_x[count] = x
                data_y[count] = create_noise_label() 
                count+=1

    if count ==0: # when no noise in the record
        return [],[],0
    
    x_noise,y_noise = np.copy(data_x[:count]),np.copy(data_y[:count]) # make copy to avoid memory leak
    del data_x
    del data_y
    
    return x_noise,y_noise,count

### Sampling Regular Noise

In [None]:
'''Create sine/square/triangle waves as regular noise'''

def get_sin(freq_low, freq_high, size=747):
    
    data_x, data_y = np.empty((size,514),'float32'),np.empty((size,8,2),'int') 
    
    count = 0
    time_axis = np.arange(512)
    for freq in range(freq_low, freq_high+1): # get sine with multiple frequency and phase
        B = 2*np.pi*freq/256
        for phi in range(int(256/freq)):
            print('Get sine '+str(count)+'/ '+str(size), end = '\r')
            sine = np.sin(B*time_axis-B*phi)
            data_x[count] = append(sine,[0,0])
            data_y[count] = create_noise_label()
            count+=1 
    return data_x,data_y
    
    
def get_square(freq_low, freq_high, size=747):
    data_x, data_y = np.empty((size,514),'float32'),np.empty((size,8,2),'int') # initialize output
    count = 0
    
    for freq in range(freq_low, freq_high+1):
        square = np.zeros(1024,'float32') # generate square wave wiht length 1024, but will only crop 512 segment from it
        length,pos,i = int(256/(2*freq)),1,0
        while i< 4*2*freq:
            square[i*length:(i+1)*length] = np.ones(length,'int')*pos
            pos *= -1
            i+=1 
        for phi in range(int(256/freq)): # sliding window to crop the length 512 from square wave according to phase
            print('Get square '+str(count)+'/ '+str(size), end = '\r')
            data_x[count]=np.append(square[phi:phi+512],[0,0])
            data_y[count] = create_noise_label()
            count+=1
    return data_x, data_y
    
    
def get_triangle(freq_low, freq_high,size=747):
    data_x, data_y = np.empty((size,514),'float32'),np.empty((size,8,2),'int') # initialize output
    count = 0
    
    for freq in range(freq_low, freq_high+1):
        tran = np.zeros(1024,'float32') # generate triangle wave wiht length 1024, but will only crop 512 segment from it
        slope, part, period = 1/(256/(4*freq)), 256/(4*freq), 256//freq
        for i in range(256//freq): # draw triangle wave with length 256
            m = 256/(4*freq)
            if (i//(256/(4*freq)))%4==0:
                tran[i] = slope*i
            elif (i//(256/(4*freq)))%4==3:
                tran[i] = slope*i+(-1-slope*(3*part))
            else:
                tran[i] = -slope*i+slope*(part*2)
        for i in range(1,4*freq): # copy length 256 for multi-times to make full 1024 length wave
            tran[i*period:(i+1)*period] = tran[:period]

        for phi in range(int(256/freq)): # sliding window to crop the length 512 from square wave according to phase
            data_x[count]=np.append(tran[phi:phi+512],[0,0])
            data_y[count] = create_noise_label()
            count+=1
    return data_x, data_y

### Noise Segmentation

In [None]:
'''Obtain noise segments from em and ma noise record'''

def noise_segment(coefficient, em_path = 'Record/em.dat', ma_path = 'Record/ma.dat'):
    em = np.fromfile(em_path,'>i2') # upload noise record 
    ma = np.fromfile(ma_path,'>i2')
    
    noise_sample = np.empty((20,1024),'float32') #[em1_1,em1_2,ma1_1,ma1_2...,em5_1,em5_2,ma5_1,ma5_2]
    orig_noise = np.empty((10,1024),'float32') #[em1,ma1,em2,ma2,..., em5,ma5]
    
    # randomly obtained 5 noise samples from em and ma seperatively and stored in orig_noise
    start,end,step,index = len(em)//5//2,len(em),len(em)//5,0 
    for n in range(start,end,step):
        sample_em = em[n-512:n+512]
        orig_noise[index] = sample_em
        sample_ma = ma[n-512:n+512]
        orig_noise[index+1] = sample_ma
        index+=2
        
    # for each noise sample obtained, applied coefficient to it and stored in noise_sample
    index = 0
    for i,noise in enumerate(orig_noise):
        for e in coefficient[i]:
            noise_sample[index] = noise*e
            index+=1
    
    return noise_sample

In [None]:
def get_large_noise():
    
    large_noise = np.empty((10,768),'float32')
    tmp = np.fromfile('db/lydhdb/20009/ecg.dat', '>i2')
    
    noise = np.zeros((768),'float32')
    noise[384-50:384+50] = tmp[177930:178030]
    large_noise[0] = noise
    
    noise = np.zeros((768),'float32')
    noise[384-40:384+25] = tmp[7190:7255]
    large_noise[1] = noise
    
    noise = np.zeros((768),'float32')
    noise[384-5:384+18] = tmp[7910:7933]
    large_noise[2] = noise
    
    noise = np.zeros((768),'float32')
    noise[384-40:384+10] = tmp[10265:10315]
    large_noise[2] = noise
    
    noise = np.zeros((768),'float32')
    noise[384-22:384+20] = tmp[10928:10970]
    large_noise[3] = noise
                                               ### not finish
    return large_noise

### Augmentation and TFRecord Storage

In [None]:
'''Do stationary wavelet decomposition;
   Save samples to tfrecord file'''

def putSample(writer, size, data_x, data_y):
    for i in range(size):
        y = data_y[i].reshape(16)
        
        sample = decomp(data_x[i][:-2],'db2',(512,8)).reshape(4096) # stationary wavelet decomposition using db wavelet
        x = np.empty((4098),'float32')
        x[:4096] = sample
        x[-2:] = data_x[i][-2:]

        # create example and write to tfrecord
        feature = {
            'ecg': tf.train.Feature(float_list=tf.train.FloatList(value=x)),
            'label': tf.train.Feature(float_list=tf.train.FloatList(value=y)),
        }
        example = tf.train.Example(features = tf.train.Features(feature = feature))
        writer.write(example.SerializeToString())

In [None]:
'''Add em/ma noise to each sample;
   Do stationary wavelet decomposition on each sample;
   Save samples to tfrecord file'''

def putSample_AddNoise(writer, size, data_x, data_y, noise_sample):
    for i in range(size):
        y = data_y[i].reshape(16)
        for noise in noise_sample:
            n = np.random.randint(256,768)
            noise = noise[n-256:n+256]
            sample = decomp(data_x[i][:-2]+noise,'db2',(512,8)).reshape(4096) #stationary wavelet decomposition using db wavelet
            x = np.empty((4098),'float32')
            x[:4096] = sample
            x[-2:] = data_x[i][-2:]

            # # create example and write to tfrecord
            feature = {
                'ecg': tf.train.Feature(float_list=tf.train.FloatList(value=x)),
                'label': tf.train.Feature(float_list=tf.train.FloatList(value=y)),
            }
            example = tf.train.Example(features = tf.train.Features(feature = feature))
            writer.write(example.SerializeToString())

In [None]:
def putSample_LargeNoise(writer, size, data_x, data_y, large_noise):
    for i in range(size):
        y = data_y[i].reshape(16)
        for noise in large_noise:
            n = np.random.randint(256,512)
            noise = noise[n-256:n+256]
            sample = decomp(data_x[i][:-2]+noise,'db2',(512,8)).reshape(4096) #stationary wavelet decomposition using db wavelet
            x = np.empty((4098),'float32')
            x[:4096] = sample
            x[-2:] = data_x[i][-2:]

            # # create example and write to tfrecord
            feature = {
                'ecg': tf.train.Feature(float_list=tf.train.FloatList(value=x)),
                'label': tf.train.Feature(float_list=tf.train.FloatList(value=y)),
            }
            example = tf.train.Example(features = tf.train.Features(feature = feature))
            writer.write(example.SerializeToString())

### Other helper

In [None]:
#Shuffle x and y; split to val set/train set
def shuffle_and_split(data_x,data_y,seed = 10,size = 0.03):
    np.random.seed(seed)
    np.random.shuffle(data_x)
    np.random.seed(seed)
    np.random.shuffle(data_y)
    x_train, x_val, y_train, y_val = train_test_split(data_x,data_y, test_size=0.03)
    return x_train, x_val, y_train, y_val


# get the name of each record folder
def findSubdir(path):
    subdir = [x for x in os.walk(path)]
    subdir[0][1].remove('lorenz_plots')
    return subdir[0][1]

### Main

In [None]:
def defineCoefficient():
    em1 = [0.2,0.5] # coefficient for each em/ma noise segment
    em2 = [0.1,0.15]
    em3 = [0.2,0.5]
    em4 = [0.1,0.3]
    em5 = [0.1,0.2]

    ma1 = [0.3,0.7]
    ma2 = [0.5,1.3]
    ma3 = [0.5,1.4]
    ma4 = [1,4]
    ma5 = [0.5,1.3]
    coefficient = np.array([em1,ma1,em2,ma2,em3,ma3,em4,ma4,em5,ma5])
    return coefficient

In [None]:
def defineTiny():

    # location of tiny wave sample
    record_boundary,t0 = {},{}

    # 20020
    t0['20020'] = time.mktime(time.strptime('2000-8-21 9:40:56','%Y-%m-%d %H:%M:%S'))
    boundary_20020 = [['2000-8-21 9:42:13','2000-8-21 9:43:20'],['2000-8-21 9:48:24','2000-8-21 9:54:58'],
                ['2000-8-21 14:2:0','2000-8-21 14:3:0'],['2000-8-21 19:55:00','2000-8-21 19:56:14'],
                ['2000-8-21 20:32:41','2000-8-21 20:33:46'],['2000-8-22 2:47:32','2000-8-22 2:57:00'],
                ['2000-8-22 3:11:54','2000-8-22 3:12:39'],['2000-8-22 3:16:40','2000-8-22 3:18:00'],
                ['2000-8-22 3:19:10','2000-8-22 3:20:00'],['2000-8-22 4:22:20','2000-8-22 4:30:54'],
                ['2000-8-22 4:33:25','2000-8-22 4:33:33'],['2000-8-22 4:50:38','2000-8-22 4:53:00'],
                ['2000-8-22 5:1:19','2000-8-22 5:1:30'],['2000-8-22 5:1:48','2000-8-22 5:1:56'],
                ['2000-8-22 5:3:22','2000-8-22 5:3:32'],['2000-8-22 5:11:41','2000-8-22 5:11:48'],
                ['2000-8-22 5:17:19','2000-8-22 5:17:22'],['2000-8-22 5:43:23','2000-8-22 5:43:30'],
                ['2000-8-22 6:32:22', '2000-8-22 6:33:02'],['2000-8-22 6:59:44','2000-8-22 6:59:53'],
                ['2000-8-22 7:54:53','2000-8-22 7:55:03']]
    record_boundary['20020'] = boundary_20020

    # 20010
    t0['20010'] = time.mktime(time.strptime('2000-8-21 10:42:3','%Y-%m-%d %H:%M:%S'))
    boundary_20010 = [['2000-8-21 11:02:10','2000-8-21 11:4:0'],['2000-8-21 11:04:32','2000-8-21 11:05:32'],
                ['2000-8-21 11:06:0','2000-8-21 11:06:8'],['2000-8-21 13:41:02','2000-8-21 13:41:52'],
                ['2000-8-21 13:42:54','2000-8-21 13:43:14'],['2000-8-21 13:43:20','2000-8-21 13:43:33'],
                ['2000-8-21 13:43:43','2000-8-21 13:44:11'],['2000-8-21 13:47:57','2000-8-21 13:48:57'],
                ['2000-8-21 13:49:34','2000-8-21 13:49:45'],['2000-8-21 13:50:56','2000-8-21 13:52:20'],
                ['2000-8-21 13:53:47','2000-8-21 13:58:0'],['2000-8-21 16:42:34','2000-8-21 16:50:27'],
                ['2000-8-22 4:15:6', '2000-8-22 4:15:49'],['2000-8-22 4:58:00','2000-8-22 4:59:00'],
                ['2000-8-22 5:14:0','2000-8-22 5:16:0'],['2000-8-22 5:29:30','2000-8-22 5:31:0'],
                ['2000-8-22 7:29:45','2000-8-22 7:32:45'],['2000-8-22 7:33:20','2000-8-22 7:36:30'],
                ['2000-8-22 7:36:34','2000-8-22 7:43:21']]
    record_boundary['20010'] = boundary_20010

    # 20029
    t0['20029'] = time.mktime(time.strptime('2000-8-21 14:37:32','%Y-%m-%d %H:%M:%S'))
    boundary_20029 = [['2000-8-21 14:42:04','2000-8-21 14:45:17'],['2000-8-21 14:45:32','2000-8-21 14:48:26'],
                ['2000-8-21 15:1:30','2000-8-21 15:8:11'],['2000-8-21 15:15:33','2000-8-21 15:16:26'],
                ['2000-8-21 15:35:5','2000-8-21 15:35:50'],['2000-8-21 16:18:50','2000-8-21 16:20:00'],
                ['2000-8-21 16:56:15','2000-8-21 17:13:21'],['2000-8-21 17:14:50','2000-8-21 17:15:40']]
    record_boundary['20029'] = boundary_20029

    # 20058
    t0['20058'] = time.mktime(time.strptime('2000-8-21 15:38:25','%Y-%m-%d %H:%M:%S'))
    boundary_20058 = [['2000-8-22 8:16:40','2000-8-22 8:35:0'],['2000-8-22 8:36:00','2000-8-22 8:38:40'],
                ['2000-8-22 8:39:27','2000-8-22 8:42:33'],['2000-8-22 8:8:5','2000-8-22 8:11:24'],
                ['2000-8-22 7:53:58','2000-8-22 7:59:7']]
    record_boundary['20058'] = boundary_20058
    
    return record_boundary

In [None]:
def main(database_path = 'db/lydhdb', output_path = '/tmpdata/',trainfile_num = 9):
    
    record_boundary = defineTiny()
    database = findSubdir(database_path) # get all record folder name
    database = sorted(database)

    block = ['20017','20031','20048','20066']
    tiny = ['20020','20010','20029','20058']

    normal = [i for i in database if i not in block]
    normal = [i for i in normal if i not in tiny]

    with tf.io.TFRecordWriter(output_path + 'val.tfrecords') as val_writer:
        noise_sample = noise_segment(defineCoefficient())
        large_noise = get_large_noise()
        for i in range(0,trainfile_num):
            with tf.io.TFRecordWriter(output_path +'train_'+str(i+1)+'.tfrecords') as train_writer:
                record_num = math.ceil(len(database)/trainfile_num)
                records = database[i*record_num:(i+1)*record_num] if i!=trainfile_num-1 else database[i*record_num:]
                for record in records:
                    path = database_path+'/'+record
                    ecg = np.fromfile(path +'/ecg.dat', '>i2') # load ecg

                    R,A,V = annProcess(path) 
                    
                    print(record)

                    if record in normal:
                        x_train, x_val, y_train, y_val = sampling_normal(R,record,800,475,ecg) # sampling 
                        putSample_AddNoise(train_writer, len(x_train), x_train,y_train,noise_sample) # store to TFrecord
                        putSample_LargeNoise(train_writer, len(x_train), x_train,y_train, large_noise)
                        putSample(train_writer,len(x_train),x_train,y_train)
                        putSample(val_writer, len(x_val), x_val,y_val)
                    print('      normal store finished       ')  

                    if len(A)>=14: # sampling period with A
                        x_train, x_val, y_train, y_val = sampling_A(R,A,record,int(len(A)*0.075)*16,int(len(A)*0.089)*8,ecg)
                        putSample_AddNoise(train_writer,len(x_train),x_train,y_train,noise_sample)
                        putSample_LargeNoise(train_writer, len(x_train), x_train,y_train, large_noise)
                        putSample(train_writer,len(x_train),x_train,y_train)
                        putSample(val_writer, len(x_val), x_val,y_val)

                    if len(V)>=50: # sampling period with V
                        x_train, x_val, y_train, y_val = sampling_V(R,V,record,int(len(V)*0.0176)*32,int(len(V)*0.042)*8,ecg)
                        putSample_AddNoise(train_writer,len(x_train),x_train,y_train,noise_sample)
                        putSample_LargeNoise(train_writer, len(x_train), x_train,y_train, large_noise)
                        putSample(train_writer,len(x_train),x_train,y_train)
                        putSample(val_writer, len(x_val), x_val,y_val)
                    print('      premature beat store finished       ')

                    if record in block:
                        x_train, x_val, y_train, y_val = sampling_block(R,record,2976,1750,ecg)
                        putSample_AddNoise(train_writer, len(x_train), x_train,y_train,noise_sample) # store to TFrecord
                        putSample_LargeNoise(train_writer, len(x_train), x_train,y_train, large_noise)
                        putSample(train_writer,len(x_train),x_train,y_train)
                        putSample(val_writer, len(x_val), x_val,y_val)
                    print('      block store finished       ')  

                    if record in tiny: 
                        sample_boundary = get_boundary(record_boundary[record], t0[record])
                        data_x,data_y = sampling_tiny(R,sample_boundary,record,150000,3000,ecg)
                        x_train, x_val, y_train, y_val = train_test_split(data_x,data_y, test_size=0.0186,shuffle = False)
                        putSample(train_writer,len(x_train),x_train,y_train)
                        putSample(val_writer, len(x_val), x_val,y_val)
                    print('      tiny wave store finished       ')

                    x_noise, y_noise,count_noise = sampling_noise(R,record,ecg) # sampling true noise sample
                    if len(x_noise)!=0:
                        x_train, x_val, y_train, y_val = shuffle_and_split(x_noise,y_noise,seed = 10,size = 0.03)
                        putSample(train_writer, len(x_train), x_train,y_train) # store train sample to TFrecord
                        putSample(val_writer, len(x_val), x_val,y_val)
                        print('      noise store finished       ')  

                if i==8: # get regular noise
                    x_sine,y_sine = get_sin(1,10)
                    x_square, y_square = get_square(1,10)
                    x_tran, y_tran = get_triangle(1,10)
                    putSample(train_writer, 747, x_sine,y_sine)
                    putSample(train_writer, 747, x_square, y_square)
                    putSample(train_writer, 747, x_tran, y_tran)
                    print('sine, square, traingle finished') 

            train_writer.close()
    val_writer.close()

In [None]:
main()