In [66]:
from data.make_dataset import load_mitbih, load_ptbdb
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, SimpleRNN, LSTM, GRU, Flatten
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.losses import SparseCategoricalCrossentropy
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from keras import optimizers
from keras.callbacks import ReduceLROnPlateau

In [45]:
train, test, y_train, y_test = load_mitbih()
#train, test = train[:,:,0], test[:,:,0]

In [46]:
train.shape, test.shape, y_train.shape, y_test.shape

((87554, 187, 1), (21892, 187, 1), (87554,), (21892,))

In [47]:
np.unique(y_train,return_counts=True)

(array([0, 1, 2, 3, 4], dtype=int8),
 array([72471,  2223,  5788,   641,  6431]))

In [74]:
sub_train, val, sub_y, y_val = train_test_split(train, y_train, test_size=0.2, random_state=1337, stratify=y_train)
sub_train.shape

(70043, 187, 1)

In [49]:
INPUT_DIM, N_CLASSES = train.shape[1], len(np.unique(y_train))

In [76]:
class_weights = compute_class_weight('balanced',np.unique(y_train),y_train)
class_weights

array([ 0.24162493,  7.87710301,  3.02536282, 27.31794072,  2.72287358])

In [85]:
model = Sequential()
model.add(Conv1D(input_shape=(INPUT_DIM,1),filters=32, kernel_size=5, padding='same', activation='relu'))
model.add(Conv1D(filters=32, kernel_size=5, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides=2))
model.add(Conv1D(filters=64, kernel_size=5, padding='same', activation='relu'))
model.add(Conv1D(filters=64, kernel_size=5, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides=2))
model.add(LSTM(64,return_sequences=True))
model.add(Flatten())
model.add(Dense(64,activation='relu'))
model.add(Dense(N_CLASSES,activation='softmax'))
model.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_67 (Conv1D)           (None, 187, 32)           192       
_________________________________________________________________
conv1d_68 (Conv1D)           (None, 187, 32)           5152      
_________________________________________________________________
max_pooling1d_34 (MaxPooling (None, 92, 32)            0         
_________________________________________________________________
conv1d_69 (Conv1D)           (None, 92, 64)            10304     
_________________________________________________________________
conv1d_70 (Conv1D)           (None, 92, 64)            20544     
_________________________________________________________________
max_pooling1d_35 (MaxPooling (None, 44, 64)            0         
_________________________________________________________________
lstm_12 (LSTM)               (None, 44, 64)          

In [86]:
opt = optimizers.Adam(clipnorm=1.)
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5,
                              patience=3, min_lr=1e-6)
model.compile(loss=SparseCategoricalCrossentropy(from_logits=True), 
              optimizer=opt,
              metrics=['sparse_categorical_accuracy'])

In [87]:
model.fit(train_down, y_down,
          batch_size=128,
          epochs=100,
          shuffle=True,
          validation_data = (val,y_val),
          #class_weight=class_weights,
          #callbacks=[reduce_lr]
         )

Train on 289885 samples, validate on 17511 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
  3456/289885 [..............................] - ETA: 12:05 - loss: 0.9324 - sparse_categorical_accuracy: 0.9725

KeyboardInterrupt: 

In [88]:
y_pred = model.predict(test, batch_size=1000)

In [89]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred.argmax(axis=1)))

              precision    recall  f1-score   support

           0       1.00      0.97      0.98     18118
           1       0.66      0.86      0.75       556
           2       0.87      0.97      0.92      1448
           3       0.52      0.90      0.66       162
           4       0.98      0.99      0.98      1608

    accuracy                           0.97     21892
   macro avg       0.81      0.94      0.86     21892
weighted avg       0.97      0.97      0.97     21892



In [90]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,y_pred.argmax(axis=1)))

[[17579   247   161   104    27]
 [   55   480    17     2     2]
 [   16     4  1399    26     3]
 [    4     0    11   146     1]
 [    6     0    12     1  1589]]


In [80]:
np.unique(sub_y,return_counts=True)

(array([0, 1, 2, 3, 4], dtype=int8),
 array([57977,  1778,  4630,   513,  5145]))

In [38]:
np.unique(y_train[:1024],return_counts=True)

(array([0, 1, 2, 3, 4], dtype=int8), array([839,  28,  67,   8,  82]))

In [11]:
def updown_sample(train, y_train, n_samples, replace=False):
    '''
    replace=False for downsampling
    replace=True for upsampling'''
    train_0 = train[y_train==0]
    train_1 = train[y_train==1]
    train_2 = train[y_train==2]
    train_3 = train[y_train==3]
    train_4 = train[y_train==4]
    
    train_0_sampled = resample(train_0, 
                            replace=replace,
                            n_samples=n_samples,
                            random_state=123)
    train_1_sampled = resample(train_1, 
                            replace=replace,
                            n_samples=n_samples,
                            random_state=123)
    train_2_sampled = resample(train_2, 
                            replace=replace,
                            n_samples=n_samples,
                            random_state=123)
    train_3_sampled = resample(train_3, 
                            replace=replace,
                            n_samples=n_samples,
                            random_state=123)
    train_4_sampled = resample(train_4, 
                            replace=replace,
                            n_samples=n_samples,
                            random_state=123)
    train_downsampled = np.vstack([train_0_sampled,
                                  train_1_sampled,
                                  train_2_sampled,
                                  train_3_sampled,
                                  train_4_sampled])
    y_downsampled = np.hstack([np.zeros(n_samples,np.int32),
                              np.ones(n_samples,np.int32),
                              np.ones(n_samples,np.int32)*2,
                              np.ones(n_samples,np.int32)*3,
                              np.ones(n_samples,np.int32)*4])
    return train_downsampled, y_downsampled
    
    

In [81]:
train_down, y_down = updown_sample(sub_train,sub_y,57977,replace=True)

In [82]:
train_down.shape, y_down.shape

((289885, 187, 1), (289885,))

In [83]:
np.unique(y_down,return_counts=True)

(array([0, 1, 2, 3, 4], dtype=int32),
 array([57977, 57977, 57977, 57977, 57977]))