In [3]:
import os

In [71]:
def find_challenge_files(data_directory):
    header_files = list()
    recording_files = list()
    for f in os.listdir(data_directory):
        root, extension = os.path.splitext(f)
        if not root.startswith('.') and extension=='.hea':
            header_file = os.path.join(data_directory, root + '.hea')
            recording_file = os.path.join(data_directory, root + '.mat')
            if os.path.isfile(header_file) and os.path.isfile(recording_file):
                header_files.append(header_file)
                recording_files.append(recording_file)
    return header_files, recording_files

In [72]:
header_files, recording_files = find_challenge_files("./DataSmall/")

In [13]:
def load_recording(recording_file, header=None, leads=None, key='val'):
    from scipy.io import loadmat
    recording = loadmat(recording_file)[key]
    if header and leads:
        recording = choose_leads(recording, header, leads)
    return recording

In [14]:
def load_header(header_file):
    with open(header_file, 'r') as f:
        header = f.read()
    return header

In [16]:
def get_labels(header):
    labels = list()
    for l in header.split('\n'):
        if l.startswith('#Dx'):
            try:
                entries = l.split(': ')[1].split(',')
                for entry in entries:
                    labels.append(entry.strip())
            except:
                pass
    return labels

In [17]:
get_labels(load_header(header_files[0]))

['164867002', '427084000']

In [19]:
rec = load_recording(recording_files[0])

In [None]:
def generate_X(ecg_filenames):
    while True:
        for i in order_array:
            data = load_recording(ecg_filenames[i])
            
            X_train_new = keras.preprocessing.sequence.pad_sequences(data, maxlen=5000, truncating='post',padding="post")
            X_train_new = X_train_new.reshape(5000,12)
            yield X_train_new

In [21]:
rec[[1,3]]

(2, 5000)

In [23]:
load_header(header_files[0])

'Q0001 12 500 5000 15-May-2020 15:35:30\nQ0001.mat 16+24 1000/mV 16 0 0 -14 0 I\nQ0001.mat 16+24 1000/mV 16 0 10 -9 0 II\nQ0001.mat 16+24 1000/mV 16 0 10 -2 0 III\nQ0001.mat 16+24 1000/mV 16 0 -5 5 0 aVR\nQ0001.mat 16+24 1000/mV 16 0 -5 21 0 aVL\nQ0001.mat 16+24 1000/mV 16 0 11 -34 0 aVF\nQ0001.mat 16+24 1000/mV 16 0 52 -30 0 V1\nQ0001.mat 16+24 1000/mV 16 0 44 -15 0 V2\nQ0001.mat 16+24 1000/mV 16 0 17 -39 0 V3\nQ0001.mat 16+24 1000/mV 16 0 75 -64 0 V4\nQ0001.mat 16+24 1000/mV 16 0 16 -27 0 V5\nQ0001.mat 16+24 1000/mV 16 0 9 18 0 V6\n#Age: 53\n#Sex: Male\n#Dx: 164867002,427084000\n#Rx: Unknown\n#Hx: Unknown\n#Sx: Unknown\n'

In [36]:
import tensorflow as tf
from tensorflow import keras


In [43]:
def create_model(num_leads, classes):
    inputlayer = keras.layers.Input(shape=(5000, num_leads)) 

    conv1 = keras.layers.Conv1D(filters=128, kernel_size=15,input_shape=(5000, num_leads), padding='same')(inputlayer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.Activation(activation='relu')(conv1)
    #Legger til spatial dropout for å få med mer enn bare V4 som prediksjonsgrunnlag
    conv1 = keras.layers.SpatialDropout1D(0.1)(conv1)

    conv2 = keras.layers.Conv1D(filters=256, kernel_size=10, padding='same')(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.Activation('relu')(conv2)
    #Legger til spatial dropout for å få med mer enn bare V4 som prediksjonsgrunnlag
    conv2 = keras.layers.SpatialDropout1D(0.1)(conv2)

    conv3 = keras.layers.Conv1D(512, kernel_size=5,padding='same')(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.Activation('relu')(conv3)
    #Legger til spatial dropout for å få med mer enn bare V4 som prediksjonsgrunnlag
    conv3 = keras.layers.Dropout(0.2)(conv3)

    gap_layer = keras.layers.GlobalAveragePooling1D()(conv3)
    #gap_layer = keras.layers.Flatten()(conv3)


    output_layer = tf.keras.layers.Dense(units=classes,activation='sigmoid', name='output_layer')(gap_layer)

    model = keras.Model(inputs=inputlayer, outputs=output_layer)
    

    model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(), 
    metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy', dtype=None, threshold=0.5)])
    return model

In [44]:
model = create_model(12, 30)

In [66]:
model.output.shape[1]

30

In [69]:
model.input.shape[1]

5000

In [70]:
model.input.shape[2]

12

In [74]:
def is_integer(x):
    if is_number(x):
        return float(x).is_integer()
    else:
        return False

In [76]:
def is_number(x):
    try:
        float(x)
        return True
    except (ValueError, TypeError):
        return False

In [77]:
classes = set()
for header_file in header_files:
    header = load_header(header_file)
    classes |= set(get_labels(header))
if all(is_integer(x) for x in classes):
    classes = sorted(classes, key=lambda x: int(x)) # Sort classes numerically if numbers.
else:
    classes = sorted(classes) # Sort classes alphanumerically if not numbers.
num_classes = len(classes)

In [79]:
type(classes)

list

In [96]:
with open('classes.txt', 'w') as f:
    for class_ in classes:
        f.write("%s\n" % class_)
f.close()

In [97]:
new_list = []
with open('classes.txt', 'r') as f:
    for line in f:
        print(line)
        new_list.append(line)


27885002

54329005

63593006

164861001

164865005

164867002

164873001

164889003

164890007

164909002

164930006

164931005

251120003

270492004

284470004

413844008

426627000

427084000

427172004

428750005

429622005

713422000

713426002

713427006



In [98]:
new_list=[s.strip('\n') for s in new_list]

In [99]:
new_list

['27885002',
 '54329005',
 '63593006',
 '164861001',
 '164865005',
 '164867002',
 '164873001',
 '164889003',
 '164890007',
 '164909002',
 '164930006',
 '164931005',
 '251120003',
 '270492004',
 '284470004',
 '413844008',
 '426627000',
 '427084000',
 '427172004',
 '428750005',
 '429622005',
 '713422000',
 '713426002',
 '713427006']

In [106]:
header_files, recording_files = find_challenge_files("../PhysioNetChallenge2021official\Data_all")

In [107]:
classes = set()
for header_file in header_files:
    header = load_header(header_file)
    classes |= set(get_labels(header))
if all(is_integer(x) for x in classes):
    classes = sorted(classes, key=lambda x: int(x)) # Sort classes numerically if numbers.
else:
    classes = sorted(classes) # Sort classes alphanumerically if not numbers.
num_classes = len(classes)

In [122]:
num_classes

134

In [108]:
classes

['',
 '10370003',
 '106068003',
 '111288001',
 '11157007',
 '111975006',
 '13640000',
 '164861001',
 '164865005',
 '164867002',
 '164873001',
 '164884008',
 '164889003',
 '164890007',
 '164895002',
 '164896001',
 '164909002',
 '164912004',
 '164917005',
 '164921003',
 '164930006',
 '164931005',
 '164934002',
 '164937009',
 '164942001',
 '164947007',
 '164951009',
 '17338001',
 '17366009',
 '195042002',
 '195060002',
 '195080001',
 '195101003',
 '195126007',
 '204384007',
 '233892002',
 '233897008',
 '233917008',
 '251120003',
 '251139008',
 '251146004',
 '251164006',
 '251166008',
 '251168009',
 '251170000',
 '251173003',
 '251180001',
 '251182009',
 '251187003',
 '251198002',
 '251199005',
 '251200008',
 '251205003',
 '251211000',
 '251223006',
 '251259000',
 '251266004',
 '251268003',
 '253339007',
 '253352002',
 '266249003',
 '266257000',
 '270492004',
 '27885002',
 '282825002',
 '284470004',
 '29320008',
 '314208002',
 '368009',
 '370365005',
 '39732003',
 '413444003',
 '413844008'

In [114]:
import pandas as pd
import numpy as np

In [132]:
np.asarray(pd.read_csv("dx_mapping_scored.csv").iloc[:,1], dtype="str")

array(['164889003', '164890007', '6374002', '426627000', '733534002',
       '713427006', '270492004', '713426002', '39732003', '445118002',
       '164909002', '251146004', '698252002', '426783006', '284470004',
       '10370003', '365413008', '427172004', '164947007', '111975006',
       '164917005', '47665007', '59118001', '427393009', '426177001',
       '427084000', '63593006', '164934002', '59931005', '17338001'],
      dtype='<U21')

In [123]:
test = set()

In [127]:
test |= set(['a','b'])

In [129]:
test

{'a', 'b'}

In [130]:
test |= set(['a','c'])

In [131]:
test

{'a', 'b', 'c'}

In [133]:
for i in classes:
    if i == np.asarray(pd.read_csv("dx_mapping_scored.csv").iloc[:,1], dtype="str"):
        print(i)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [135]:
for i in classes:
    if any(j == i for j in np.asarray(pd.read_csv("dx_mapping_scored.csv").iloc[:,1], dtype="str")):
        print(i)

10370003
111975006
164889003
164890007
164909002
164917005
164934002
164947007
17338001
251146004
270492004
284470004
39732003
426177001
426627000
426783006
427084000
427172004
427393009
445118002
47665007
59118001
59931005
63593006
6374002
698252002
713426002
713427006
