In [1]:
import numpy as np
import os
import scipy.io.wavfile as wavfile
import pandas as pd
import sklearn.model_selection
import keras

Using TensorFlow backend.


In [2]:
def loadDataframe(directory):
    fileNames = [directory + item 
                 for item in os.listdir(directory)
                 if "wav" in item and "unlabel" not in item]
    
    data = [wavfile.read(fileName) for fileName in fileNames]
    
    zipped = [(fileName, sampleFrequency, data) 
              for fileName, (sampleFrequency, data) 
              in zip(fileNames, data)]
    
    df = pd.DataFrame(columns=['fileName', 'data', 'sampleFrequency', 'label', 'dataLength'])
    
    df['fileName'] = [i[0] for i in zipped]
    df['sampleFrequency'] = [i[1] for i in zipped]
    df['data'] = [i[2] for i in zipped]
    df['label'] = df['fileName'].apply(lambda x: 'normal' in x).astype(int)
    df['dataLength'] = df['data'].apply(len)
    
    return df

def loadXY(directory):
    df = loadDataframe(directory)
    
    arr = np.array(df)
    
    sortedBySize = sorted(arr, key=lambda x: x[4], reverse=True)
    
    lengths = [item[4] for item in sortedBySize]
    data = [item[1] for item in sortedBySize]
    labels = [item[3] for item in sortedBySize]
    
    examples = len(arr)
    maxLength = max(lengths)
    
    x = np.zeros([examples, maxLength, 1])
    y = np.zeros([examples, 1])
    
    for i, item in enumerate(data):
        x[i, -len(item):, 0] = item
        y[i] = labels[i]
    
    return x, y, examples, maxLength

In [3]:
x, y, examples, maxLength = loadXY('./heartbeat-sounds/set_b/')

In [4]:
model = keras.models.Sequential()

model.add(keras.layers.LSTM(32, input_shape=(maxLength, 1)))
model.add(keras.layers.Dense(1))
model.add(keras.layers.Activation('sigmoid'))

model.compile(loss='binary_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

In [None]:
model.fit(x, y, validation_split=0.25)

Train on 345 samples, validate on 116 samples
Epoch 1/1