In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import os.path as op
from glob import glob
import wfdb
import matplotlib.pyplot as plt
import numpy as np
import cv2
import tensorflow as tf

In [None]:
def get_records(folder='../input/mit-bih-arrhythmia-database'):
    """ Get paths for data in data/mit/ directory """
    # Download if doesn't exist
    
    # There are 3 files for each record
    # *.atr is one of them
    paths = glob(op.join(folder, '*.atr'))

    # Get rid of the extension
    paths = [path[:-4] for path in paths]
    paths.sort()

    return paths

def beat_annotations(annotation):
    """ Get rid of non-beat markers """
    """'N' for normal beats. Similarly we can give the input 'L' for left bundle branch block beats. 'R' for right bundle branch block
        beats. 'A' for Atrial premature contraction. 'V' for ventricular premature contraction. '/' for paced beat. 'E' for Ventricular
        escape beat."""
 
    good = ['N']   
    ids = np.in1d(annotation.symbol, good)

    # We want to know only the positions
    beats = annotation.sample[ids]

    return beats

def read_record(record, select):
    ecg_pulses = []
    signals, fields = wfdb.rdsamp(record, channels = [0]) 
    ann = annotation = wfdb.rdann(record, 'atr')
    sel = [select]
    ids = np.in1d(ann.symbol, sel)
    imp_beats = ann.sample[ids]
    beats = (ann.sample)
    for i in imp_beats:
        beats = list(beats)
        j = beats.index(i)
        if (j != 0 and j != (len(beats)-1)):
            x = beats[j-1]
            y = beats[j+1]
            diff1 = abs(x - beats[j]) // 2
            diff2 = abs(y - beats[j]) // 2
            ecg_pulses.append(signals[beats[j] - diff1: beats[j] + diff2, 0])
    return ecg_pulses

In [None]:
records = get_records("../input/mit-bih-arrhythmia-database")

In [None]:
save_folder_map = {
    "A": "../mit_data/arrhyth/",
    "N": "../mit_data/normal/",
    "V": "../mit_data/vpc/"
}

In [None]:
for record in records[0:10]:
    for sel in ["A", "N", "V"]:
            ecg_pulses = read_record(record, select=sel)
            r_name = record.split("/")[-1]
            for i in range(len(ecg_pulses))[0:10]:
                fig = plt.figure(frameon=False)
                plt.plot(ecg_pulses[i], linewidth=2)
                plt.xticks([]), plt.yticks([])
                for spine in plt.gca().spines.values():
                    spine.set_visible(False)
                    
            filename = save_folder_map[sel] + r_name + "_" + str(i) + '.png'
            fig.savefig(filename)
            im_gray = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
            im_gray = cv2.resize(im_gray, (128, 128), interpolation = cv2.INTER_LANCZOS4)
            cv2.imwrite(filename, im_gray)


In [None]:
signals, fields = wfdb.rdsamp(records[0], channels=[0])
annotation = wfdb.rdann(records[0], "atr")

peaks = biosppy.signals.ecg.christov_segmenter(signal=signals.ravel(), sampling_rate=200)[0]

In [None]:
test_type = ['A','L','N','P','R','V']
test_pic = '../input/mitbih2dimgdataset/RealDataSet/test/'

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1, random_state=2)
print("x_train shape",X_train.shape)
print("x_test shape",X_val.shape)
print("y_train shape",Y_train.shape)
print("y_test shape",Y_val.shape)

In [None]:
from keras.callbacks import ModelCheckpoint
filepath = 'm1'

train_path = '../input/mitbih2dimgdataset/RealDataSet/train'
valid_path = '../input/mitbih2dimgdataset/RealDataSet/test'

checkpoint = ModelCheckpoint(filepath,
                            monitor='val_acc',
                            verbose=1,
                            save_best_only=True,
                            mode='max')

batch_size = 32

IMAGE_SIZE = [128, 128]

In [None]:
import tensorflow as tf

model = tf.keras.Sequential()

model.add(tf.keras.layers.Conv2D(64, (3,3),strides = (1,1), input_shape = IMAGE_SIZE +[3] ,kernel_initializer='glorot_uniform'))

model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides= (2,2)))

model.add(tf.keras.layers.Conv2D(32, (3,3),strides = (1,1),kernel_initializer='glorot_uniform'))

model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides= (2,2)))

model.add(tf.keras.layers.Conv2D(16, (3,3),strides = (1,1),kernel_initializer='glorot_uniform'))

model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides= (2,2)))

model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(6, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
print(model.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_9 (Conv2D)            (None, 126, 126, 64)      1792      
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 63, 63, 64)        0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 61, 61, 32)        18464     
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 30, 30, 32)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 28, 28, 16)        4624      
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 14, 14, 16)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 6)                 18822     
=================================================================
Total params: 43,702
Trainable params: 43,702
Non-trainable params: 0
_________________________________________________________________
None

In [None]:
gen = tf.keras.preprocessing.image.ImageDataGenerator()

test_gen = gen.flow_from_directory(valid_path, target_size=IMAGE_SIZE)

train_gen = gen.flow_from_directory(train_path, target_size=IMAGE_SIZE)

train_generator = gen.flow_from_directory(
  train_path,
  target_size=IMAGE_SIZE,
  shuffle=True,
  batch_size=batch_size,
)
valid_generator = gen.flow_from_directory(
  valid_path,
  target_size=IMAGE_SIZE,
  shuffle=True,
  batch_size=batch_size,
)
callbacks_list = [checkpoint]

r = model.fit(
  train_generator,epochs=30,
  validation_data=valid_generator,
  callbacks=callbacks_list
)

In [None]:
#Plot to see how the loss function changed
plt.plot(range(1,31),model.history.history['loss'],marker="o")
plt.plot(range(1,31),model.history.history['val_loss'],marker="o")
plt.legend(["Training Loss","Validation Loss"])
plt.xlabel("Epoch")
plt.xticks(range(1,31))
plt.show()

In [None]:
#plot to see how the accuracy changed
plt.plot(range(1,31),model.history.history['accuracy'],marker="o")
plt.plot(range(1,31),model.history.history['val_accuracy'],marker="o")
plt.legend(["Training accuracy","Validation accuracy"])
plt.xlabel("Epoch")
plt.xticks(range(1,31))
plt.show()

In [None]:
y_pred = model.predict(test_gen)

In [None]:
y_val=[]
for y in y_pred:
    y_val.append(np.argmax(y))

In [None]:
y_true=[]
for y in valid_path:
    y_true.append(np.argmax(y))

In [None]:
print(confusion_matrix(y_true,y_val))

In [None]:
print(classification_report(y_true,y_val))

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
print("Accuracy on test data: ",accuracy_score(y_true,y_val))

In [None]:
import seaborn as sns
# Predict the values from the validation dataset
Y_pred = model.predict(test_gen)
# Convert predictions classes to one hot vectors 
Y_pred_classes = np.argmax(Y_pred,axis=1) 
# Convert validation observations to one hot vectors
Y_true = np.argmax(test_gen,axis=1) 
# compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 

In [None]:
# plot the confusion matrix
f,ax = plt.subplots(figsize=(8, 8))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01,cmap="Greens",linecolor="gray", fmt= '.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()