In [2]:
import os
import glob
import wfdb
import numpy as np
import cv2
import matplotlib.pyplot as plt
import random

In [2]:
# read all files in train_data dir
train_files = glob.glob('train_data/*.dat')
print(len(train_files))

38


In [3]:
# Annotation Meaning From https://archive.physionet.org/physiobank/annotations.shtml
"""
    N = Normal beat
    L = Left bundle branch block beat
    R = Right bundle branch block beat
    V = Premature ventricular contraction
    / = Paced beat(PB)
    A = Atrial premature beat
    f = Fusion of paced and normal beat
    F = Fusion of ventricular and normal beat
    j = Nodal (junctional) escape beat
    a = Aberrated atrial premature beat
    E = Ventricular escape beat
    J = Nodal (junctional) premature beat
    e = Atrial escape beat
    S = Supraventricular premature beat
   
"""

'\n    N = Normal beat\n    L = Left bundle branch block beat\n    R = Right bundle branch block beat\n    V = Premature ventricular contraction\n    / = Paced beat(PB)\n    A = Atrial premature beat\n    f = Fusion of paced and normal beat\n    F = Fusion of ventricular and normal beat\n    j = Nodal (junctional) escape beat\n    a = Aberrated atrial premature beat\n    E = Ventricular escape beat\n    J = Nodal (junctional) premature beat\n    e = Atrial escape beat\n    S = Supraventricular premature beat\n   \n'

In [4]:
# classes to extract the corresponding heartbeat
classes = ['N', 'L', 'R', 'E', '/', 'V', 'A', 'f', 'F', 'j', 'a', 'J', 'e', 'S']

# map beat annotation to class type for saving image in floder NOR and ABNOR
mapping = {'N': 'NOR', 'L': 'ABNOR', 'R': 'ABNOR', 'E': 'ABNOR', '/': 'ABNOR', 'V': 'ABNOR', 'A': 'ABNOR', 'f': 'ABNOR', 
           'F': 'ABNOR', 'j': 'ABNOR', 'a': 'ABNOR', 'J': 'ABNOR', 'e': 'ABNOR', 'S': 'ABNOR'}

# image number
count = 0

In [5]:
for record in train_files:
    print(record)
    record = record[:-4]
    signals, fields = wfdb.rdsamp(record, channels = [0])  
    annotation = wfdb.rdann(record, 'atr')

    # dict to store the segments for each class
    segments = dict()
    
    # indices for all beats
    beats = list(annotation.sample)

    for beat in classes:
        ids = np.in1d(annotation.symbol, beat)
        imp_beats = annotation.sample[ids]

        beat_samples = []

        for i in imp_beats:
            j = beats.index(i)
            if j != 0 and j != len(beats) - 1:
                
                # beats before and after this beat
                x = beats[j-1]

                if x + 20 > beats[j]:
                    l = 2
                    while (x + 20 > beats[j]) and (j-l >= 0):
                        x = beats[j-l]
                        l -= 1

                    if j-l < 0:
                        continue
                    
                y = beats[j+1]
                if y - 20 < beats[j]:
                    l = 2
                    while (y - 20 < beats[j]) and (j+l < len(beats) - 1):
                        y = beats[j+l]
                        l += 1

                    if j+l == len(beats) - 1:
                        continue

                # 20 sec after and before above peaks x,y 
                start = x + 20
                end = y - 20

                # centre the peak
                if abs(beats[j] - start) < abs(beats[j] - end):
                    end = beats[j] + abs(beats[j] - start)
                else:
                    start = beats[j] - abs(beats[j] - end)
                
                beat_samples.append(signals[start: end, 0])
        
        segments[beat] = beat_samples
    
    for key in segments.keys():
        if not segments[key]:
            continue
            
        val = segments[key]
        directory = 'Train/' + mapping[key]
        if not os.path.isdir(directory):
            os.makedirs(directory)
        
        for i in val:
            fig = plt.figure(frameon=False)
            plt.plot(i)
            plt.xticks([]), plt.yticks([])
            for spine in plt.gca().spines.values():
                spine.set_visible(False)
        
            filename = directory + '/' + str(count + 1) + '.png'
            count += 1
            fig.savefig(filename)
            plt.close(fig=fig)

            im_gray = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
            im_gray = cv2.resize(im_gray, (234, 234), interpolation = cv2.INTER_LANCZOS4)
            cv2.imwrite(filename, im_gray)
    
    print('Completed record: ' + record)
        
print('done')

train_data\100.dat
Completed record: train_data\100
train_data\101.dat
Completed record: train_data\101
train_data\102.dat
Completed record: train_data\102
train_data\103.dat
Completed record: train_data\103
train_data\105.dat
Completed record: train_data\105
train_data\106.dat
Completed record: train_data\106
train_data\108.dat
Completed record: train_data\108
train_data\111.dat
Completed record: train_data\111
train_data\112.dat
Completed record: train_data\112
train_data\113.dat
Completed record: train_data\113
train_data\115.dat
Completed record: train_data\115
train_data\116.dat
Completed record: train_data\116
train_data\117.dat
Completed record: train_data\117
train_data\118.dat
Completed record: train_data\118
train_data\121.dat
Completed record: train_data\121
train_data\122.dat
Completed record: train_data\122
train_data\123.dat
Completed record: train_data\123
train_data\124.dat
Completed record: train_data\124
train_data\200.dat
Completed record: train_data\200
train_data\2

# Test Data Peparation

In [3]:
# read all files in test_data dir
test_files = glob.glob('test_data/*.dat')
print(len(test_files))

10


In [4]:
# define the classes to extract the corresponding heartbeat
classes = ['N', 'L', 'R', 'E', '/', 'V', 'A', 'f', 'F', 'j', 'a', 'J', 'e', 'S']

# map beat annotation to class type for saving image in floder NOR and ABNOR
mapping = {'N': 'NOR', 'L': 'ABNOR', 'R': 'ABNOR', 'E': 'ABNOR', '/': 'ABNOR', 'V': 'ABNOR', 'A': 'ABNOR', 'f': 'ABNOR', 
           'F': 'ABNOR', 'j': 'ABNOR', 'a': 'ABNOR', 'J': 'ABNOR', 'e': 'ABNOR', 'S': 'ABNOR'}

# image number
count = 0

In [6]:
for record in test_files:
    print(record)
    record = record[:-4]
    signals, fields = wfdb.rdsamp(record, channels = [0])  
    annotation = wfdb.rdann(record, 'atr')

    # dict to store the segments for each class
    segments = dict()
    
    # indices for all beats
    beats = list(annotation.sample)

    for beat in classes:
        ids = np.in1d(annotation.symbol, beat)
        imp_beats = annotation.sample[ids]

        beat_samples = []

        for i in imp_beats:
            j = beats.index(i)
            if j != 0 and j != len(beats) - 1:
                
                # beats before and after this beat
                x = beats[j-1]

                if x + 20 > beats[j]:
                    l = 2
                    while (x + 20 > beats[j]) and (j-l >= 0):
                        x = beats[j-l]
                        l -= 1

                    if j-l < 0:
                        continue
                    
                y = beats[j+1]
                if y - 20 < beats[j]:
                    l = 2
                    while (y - 20 < beats[j]) and (j+l < len(beats) - 1):
                        y = beats[j+l]
                        l += 1

                    if j+l == len(beats) - 1:
                        continue

                # 20 sec after and before above peaks x,y
                start = x + 20
                end = y - 20

                # centre the peak
                if abs(beats[j] - start) < abs(beats[j] - end):
                    end = beats[j] + abs(beats[j] - start)
                else:
                    start = beats[j] - abs(beats[j] - end)
                
                beat_samples.append(signals[start: end, 0])
        
        segments[beat] = beat_samples
    
    for key in segments.keys():
        if not segments[key]:
            continue
            
        val = segments[key]
        directory = 'Test/' + mapping[key]
        if not os.path.isdir(directory):
            os.makedirs(directory)
        
        for i in val:
            fig = plt.figure(frameon=False)
            plt.plot(i)
            plt.xticks([]), plt.yticks([])
            for spine in plt.gca().spines.values():
                spine.set_visible(False)
        
            filename = directory + '/' + str(count + 1) + '.png'
            count += 1
            fig.savefig(filename)
            plt.close(fig=fig)

            im_gray = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
            im_gray = cv2.resize(im_gray, (234, 234), interpolation = cv2.INTER_LANCZOS4)
            cv2.imwrite(filename, im_gray)
    
    print('Completed record: ' + record)
        
print('done')

test_data\104.dat
Completed record: test_data\104
test_data\107.dat
Completed record: test_data\107
test_data\109.dat
Completed record: test_data\109
test_data\114.dat
Completed record: test_data\114
test_data\119.dat
Completed record: test_data\119
test_data\203.dat
Completed record: test_data\203
test_data\212.dat
Completed record: test_data\212
test_data\220.dat
Completed record: test_data\220
test_data\230.dat
Completed record: test_data\230
test_data\233.dat
Completed record: test_data\233
done
