In [None]:
"""
The following code is for detecting peaks from ECG data of a rice/mice. 
It incorporates a deep learning LSTM model using labelled peaks information
to train the model.
"""

# Importing the required libraries
import pandas as pd
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense,LSTM
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from keras.models import model_from_json
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import StratifiedKFold
import tensorflow as tf
import keras
import keras.optimizers
import numpy as np

# A function to train the deep learning LSTM model.
def ECG(file):
    dataframe = read_csv(file)[10000:]
    dataset = dataframe.values
    #Fetching the first two columns of Raw/Voltage and Time from the dataset used for training.
    input_data = dataset[:,0:2]
    output_data = dataset[:,2]
    # Normalizing/Transformimg the input data since they are different measures in different scales.
    scaler = Normalizer().fit(input_data)
    trainX = scaler.transform(input_data)
    model = Sequential()
    # Using 64 units/neurons along with relu activation function for input and hidden layers.
    model.add(LSTM(64,activation='relu',input_shape=(trainX.shape[1],1)))
    model.add(Dense(64,activation='relu'))
    model.add(Dense(64,activation='relu'))
    model.add(Dense(64,activation='relu'))
    model.add(Dense(64,activation='relu'))
    model.add(Dense(64,activation='relu'))
    # Using softmax function for the final layer to make sure the output ranges between 0 and 1.
    model.add(Dense(2,activation='softmax'))
    adm = tf.keras.optimizers.Adam(learning_rate=0.001,beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0,clipnorm=1.,clipvalue=0.5)
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=adm, metrics=['accuracy'])
    model.fit(trainX,output_data, batch_size =32,epochs=5,verbose=1,shuffle=False)
    return model

#Training the model using the data with labelled peaks.
model_output = ECG("ecgfmob_044_3320pkcorrected.csv")

#Saving the model and weights after training. 
model_json = model_output.to_json()
with open("model_output.json", "w") as json_file:
    json_file.write(model_json)
model_output.save_weights("model_output.h5")
model_output.save('ECG_model')
print("Saved model to disk")
json_file = open('model_output.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("model_output.h5")
print("Loaded model from disk")

#Reading the un-labelled data. 
inputfile = read_csv("AA.csv")
final_output = pd.DataFrame(inputfile)
scaler = Normalizer().fit(final_output)
testX = scaler.transform(final_output)
#Predictions on unlabelled dataset using the saved model.
final_prediction = loaded_model.predict(testX)
predictlist=[]
#Considering only the second term from predictions since the model predicts in two categories. 
for i in range(len(final_prediction)):
    predictlist.append(final_prediction[i][1])
    
final_output['Label'] = predictlist
peaklist,position=[],[]

#Appending the position/index of peak values in our dataframe. 
for i in range(0,len(final_output),6):
    if final_output['Label'][i]!=0:
        position.append(i)
        
#Deleting position values which are very close to each other to avoid multiple points at close proximity.
for j in range(10):
    for i in range(0,len(position)):
        if i!= len(position)-1:
            if abs(position[i+1]-position[i])<500:
                if final_output['Raw'][position[i+1]]>final_output['Raw'][position[i]]:
                    # Deleting the position value which exists to the left of actual peak.
                    del position[i]
                elif final_output['Raw'][position[i+1]]<final_output['Raw'][position[i]]:
                    # Deleting the position value which exists to the right of actual peak.
                    del position[i+1]
                # Appending 0's,so that the overall size of list(position) remains the same.....
                # This is done since length of position is used in 'for' loop
                position.append(0)
            else: 
                pass
position = list(set(position))
#Deleting 0's added in the previous step for specific positions.
for i in position:
    if i==0:
        position.remove(0)
position = sorted(position)

#Based on poistion appending the Raw/Volatge values to a list.
for i in position:
    peaklist.append(final_output['Raw'][i])

#Since softmax function gives output in fractions, converting them to 0's and 1's.
updated =[]
for i in range(len(final_output)):
    updated.append(0)   
for i in range(len(final_output)):
    for j in peaklist:
        if final_output['Raw'][i] == j:
            updated[i] =1
            
#Addition of an extra column 'Label' to the excel. 
inputfile['Label'] = updated
inputfile.to_csv("AA.csv",index = False)
