In [68]:
#%%
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D,ConvLSTM2D, Flatten,Dropout,Conv2D,LSTM, LeakyReLU, MaxPooling2D,TimeDistributed 
from datetime import datetime
from pathlib import Path
from scipy import signal
from os import walk
from keras.preprocessing import sequence
from scipy.io import wavfile
import glob
# from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf

from multiprocessing.dummy import Pool as ThreadPool
import keras
pool=ThreadPool(24)


In [69]:
log_dir = "/tf/logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
#%tensorboard

In [70]:
# Load wavefile
def readwav(file:str):
    filepath = Path(file).absolute()
    samplerate, data = wavfile.read((filepath))
    # print(f"samplerate = {samplerate}")
    return data,samplerate

In [71]:
def getTiming(data:np.ndarray,samplerate:int):
    length = data.shape[0] / samplerate
    return np.arange(0,length,1/samplerate)



In [72]:
def filterSignal(data:np.ndarray,t:np.ndarray,plot:bool=True,length=None,filter=True):
    ## normalize input
    sig = data/np.amax(data)
    norm_heart = data/np.amax(data)
    sos = signal.butter(1, [.2,195], 'bp', fs=1000, output='sos')
    filtered_heart = signal.sosfilt(sos, sig)
    ## Removing noise
    noise_heart = signal.signaltools.wiener(filtered_heart,300)
    noise_heart = filtered_heart
    if(not filter):
        noise_heart = norm_heart
    if length:
        resampled,resampledt = signal.resample(noise_heart,33075,t=t)
    if plot:
        if length:
            # _, (ax1, ax2,ax3,ax4) = plt.subplots(4, 1, sharex=True)
            # ax4.plot(resampledt, resampled)
            # ax4.set_title('After Resampling')
            # ax4.set_xlabel('Time [seconds]')
            _, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
            ax1.plot(t, sig)
            ax1.set_title('Original Heart Rate Signal')
            ax2.plot(resampledt, resampled)
            ax2.set_title('After Resampling')
            ax2.set_xlabel('Time [seconds]')
        
        #     _, (ax1, ax2,ax3) = plt.subplots(3, 1, sharex=True)
        # ax1.plot(t, sig)
        # ax1.set_title('Original Heart Rate Signal')
        # ax2.plot(t, norm_heart)
        # ax2.set_title('After Bandpass filter')
        # ax3.plot(t, noise_heart)
        # ax3.set_title('After Noise Filter')
        # ax3.set_xlabel('Time [seconds]')

        # plt.tight_layout()
    plt.show()
    return (resampled,resampledt) if length else (noise_heart,t)
    

In [73]:
lengths = []
def generateSignal(file:str,plot:bool=False,loglevel:str=None):
    data,samplerate = readwav(file)
    length = data.shape[0] / samplerate
    lengths.append(length)
    t = getTiming(data,samplerate)

    sig,t = filterSignal(data,t,plot,length)
    return t,sig,samplerate

In [74]:
trainingFolder = "./heartbeats/classifications"
trainingpath = Path(trainingFolder)
paths = [Path(dir[0]) for dir in walk(trainingpath)][1:]


In [75]:
train_data = []
# test_data = []
plot = False
def processFiles(indexedWave,classification,trainIndex):
    wav,index = indexedWave

    t,d,_ = generateSignal(wav,plot)
    # if(index < trainIndex):
    train_data.append([d,t,classification])
    # else:
        # test_data.append([d,t,classification])


def get_training_data(path:Path):   
    classification = path.name
    wavList = glob.glob(str(path.joinpath("*.wav")))
    trainIndex=int(math.ceil(len(wavList)*.8)) # use 80% of data for training
    pool.map(lambda x: processFiles(x,classification,trainIndex),zip(wavList,range(0,len(wavList))) )
    # for wav in zip(wavList,range(0,len(wavList))):
    #     processFiles(wav,classification,trainIndex)




In [76]:
# Load Data 
#Loading data from this many files is intensive, speeding up w/ multithreading

for path in paths:
    get_training_data(path)
#train_data=np.array(train_data)
#test_data=np.array(test_data)



In [77]:
sig,time,classification = zip(*train_data)
getMaxLength = lambda list: max([len(item) for item in list])
max_length = getMaxLength(sig)


In [78]:
df = pd.DataFrame(train_data,columns=["signal","time","classification"])
# print(df[["signal","time"]].values)
xs=sequence.pad_sequences(df.signal.values,maxlen=max_length,dtype="float64")
xt=sequence.pad_sequences(df.time.values,maxlen=max_length,dtype="float64")

# y = df.iloc
# sequence.pad_sequences(df[["signal","time"]].values,maxlen=max_length,dtype="float64")
# print(df.iloc[2])
df = pd.get_dummies(df,columns=["classification"])


In [79]:
y = df.iloc[:,2:].values
x = np.dstack((xs,xt))
# x = xs
x.shape

(176, 33075, 2)

In [102]:
# Feature extraction
n_steps, n_length = 15, 2205
x = x.reshape((x.shape[0], n_steps,1, n_length, 2))
# x = x.reshape((x.shape[0], n_steps, n_length, 2))

x.shape


(176, 15, 2205, 2)

In [81]:
print(x.shape)
# y=tf.keras.utils.to_categorical(np.array(y),num_classes=5)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

(176, 15, 1, 2205, 2)


In [96]:
# define model
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    model = Sequential()
    model.add(ConvLSTM2D(filters=32, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, 2)))
    # model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))

    # model.add(ConvLSTM2D(filters=16, kernel_size=(1,3), activation='relu'))

    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(5, activation='softmax'))
    model.build(np.shape(x_train))
    print(model.summary())
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_lstm2d_19 (ConvLSTM2D)  (None, 1, 2203, 32)      13184     
                                                                 
 dropout_12 (Dropout)        (None, 1, 2203, 32)       0         
                                                                 
 flatten_12 (Flatten)        (None, 70496)             0         
                                                                 
 dense_24 (Dense)            (None, 100)               7049700   
                                                                 
 dense_25 (Dense)            (None, 5)                 505       
                                                                 
Total params: 7,063,389
Trainable pa

In [107]:
# # define model
# strategy = tf.distribute.MirroredStrategy()
# with strategy.scope():
#     model = Sequential()
#     # model.add(ConvLSTM2D(filters=32, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, 2)))
#     model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))
#     model.add(LSTM(100))

#     # # model.add(ConvLSTM2D(filters=16, kernel_size=(1,3), activation='relu'))

#     # model.add(Dropout(0.5))
#     model.add(Flatten())
#     model.add(Dense(100, activation='relu'))
#     model.add(Dense(5, activation='softmax'))
#     model.build(np.shape(x_train))
#     print(model.summary())
#     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


ValueError: Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=5. Full shape received: (158, 15, 1, 2202, 16)

In [106]:
model.fit(x_train, y_train, epochs=40, batch_size=128)


2021-11-13 07:42:23.269561: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:766] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_110311"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\023FlatMapDataset:3387"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
. Consider either turning off auto-sharding or switching the auto_shard_policy to DATA to shard this dataset.

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x153ebc6f02b0>

In [99]:
scores = model.evaluate(x_test, y_test, verbose=0)
print(scores)
m, s = np.mean(scores), np.std(scores)
print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

2021-11-13 07:36:31.734283: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:766] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_107507"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\023FlatMapDataset:3312"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
. Consider either turning off auto-sharding or switching the auto_shard_policy to DATA to shard this dataset.

[3.262042999267578, 0.4444444477558136]
Accuracy: 1.853% (+/-1.409)


In [100]:
predictions = model.predict(x_test)

2021-11-13 07:36:39.326590: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:766] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_108498"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\023FlatMapDataset:3338"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
. Consider either turning off auto-sharding or switching the auto_shard_policy to DATA to shard this dataset.