In [None]:
from petastorm import make_reader
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kurtosis,skew
from scipy.signal import periodogram
import scipy.signal
from ecgdetectors import Detectors
import matplotlib.ticker as ticker
import pywt
from sklearn.metrics import accuracy_score,confusion_matrix,precision_score,recall_score
from matplotlib.widgets import TextBox, Button
import sys
import pandas as pd
import seaborn as sn
import os
sys.path.append(os.path.join(os.getcwd(), ".."))
from shared_utils import Time_series_dimensions_calculus as TSD
path_formatted_glasgow = "/workspaces/maitrise/data/20221006_physio_quality/set-a/dataParquet"
path_petastorm = f"file:///{path_formatted_glasgow}"
path_csv_ref_label = "/workspaces/maitrise/data/20221006_physio_quality/set-a/REFERENCE.csv"

In [None]:
###Data organization : Training dataset and Testing dataset + Reference labels

with make_reader(path_petastorm) as reader:
    for sample in reader:
        data = sample
        break

print(data)
print(data.noun_id)

In [None]:
##Get true label
label_ref = pd.read_csv(path_csv_ref_label)
label_ref = label_ref.to_numpy()
Y = label_ref[:,1].copy()
Y_true = Y[Y.copy()!="unlabeled"]
X_true = label_ref[:,0].copy()
X_true = X_true[Y!="unlabeled"].astype(int)


In [None]:
##Observation of one patients :

def plot_ECG_signal(signal,name,length= data.signal_length,fs = data.sampling_frequency):
     x = np.array(range(0,(len(signal))))
     x = x/fs    
     fig,ax = plt.subplots(nrows = 1,ncols = 2, figsize = (20,10))
     ax[0].plot(x,signal)
     ax[0].set_title(f"Full signal of Lead {name.decode('utf8')}")
     ax[0].grid()
     ax[1].plot(x,signal)
     ax[1].set_title(f"Close up signal of Lead {name.decode('utf8')}")
     ax[1].grid()
     if len(x) == data.signal_length:
          ax[1].set_xlim([0,3])
     else :
          ax[1].set_xlim([0,x[-1]])
     plt.show()

ECG_signal = data.signal
ECG_lead = data.signal_names
fs = data.sampling_frequency
status = data.noun_id
dico_ECG = {}

for i,j in zip(ECG_lead,range(12)):
     dico_ECG[i] = ECG_signal[:,j]
     print(dico_ECG[i].shape)
     plot_ECG_signal(dico_ECG[i],i)
     
     

In [None]:
###Some utilitary functions : 


def get_time_axis(sign_length,fs):
    x = np.linspace(0,int(sign_length/fs),sign_length)
    x = x/fs
    return x

def SDR_Quality_lead(SDR_dict_lead,name_lead):
    SDR_good_quality = {}
    SDR_medium_quality = {}
    SDR_bad_quality = {}
    for i in name_lead:
        if (SDR_dict_lead[i]<0.5 or SDR_dict_lead[i]>0.8):
            SDR_bad_quality[i] = SDR_dict_lead[i]
        elif (SDR_dict_lead[i]<0.6 and SDR_dict_lead[i]>0.5) or (SDR_dict_lead[i]<0.8 and SDR_dict_lead[i]>0.7):
            SDR_medium_quality[i] = SDR_dict_lead[i]
        else : 
            SDR_good_quality[i] = SDR_dict_lead[i]
    return SDR_good_quality,SDR_medium_quality,SDR_bad_quality


def wPMF_Quality_lead(wPMF_dict_lead,name_lead):
    wPMF_good_quality = {}
    wPMF_medium_quality = {}
    wPMF_bad_quality = {}
    for i in name_lead:
        if (wPMF_dict_lead[i]<0.25 or wPMF_dict_lead[i]>0):
            wPMF_bad_quality[i] = wPMF_dict_lead[i]
        elif (wPMF_dict_lead[i]<0.5 and wPMF_dict_lead[i]>0.25):
            wPMF_medium_quality[i] = wPMF_dict_lead[i]
        elif (wPMF_dict_lead[i]>0.5): 
            wPMF_good_quality[i] = wPMF_dict_lead[i]
    return wPMF_good_quality,wPMF_medium_quality,wPMF_bad_quality

def set_classification_status(func_name,index_score):
    if func_name == "SDR":
        return SDR_classification_status(index_score)
    elif func_name == "wPMF":
        return wPMF_classification_status(index_score)

def set_quality_lead(func_name,funct_dict_lead,name_lead):
    if func_name == "SDR":
        return SDR_Quality_lead(funct_dict_lead,name_lead)
    elif func_name == "wPMF":
        return wPMF_Quality_lead(funct_dict_lead,name_lead)

def wPMF_classification_status(mean_wPMF):
    if (mean_wPMF>=0.5):
        return "acceptable"
    else : 
        return "unaccceptable"

def SDR_classification_status(mean_SDR):
    if (mean_SDR>0.5 and mean_SDR<0.8):
        return "acceptable"
    else : 
        return "unaccceptable"

    
def Sorter_X_array(X_arr):
    index_sorted = np.argsort(X_arr)
    X_arr_sort = np.sort(X_arr)
    return X_arr_sort,index_sorted

In [None]:
###Index Creation : SDR 
### The label will be as follow : 0.8>mean(SDR of all lead) > 0.5 = Acceptable;mean(SDR of all lead) <0.5 or >0.8 = Unacceptable
##For each lead, we will return a mor eprecise classification based on the folloying rules
## SDR<0.5 or SDR>0.8 = Bad quality ; 0.6>SDR>0.5 or 0.8>SDR>0.7= Medium quality; 0.7>SDR>0.6 = Good quality

def SDR_score(signals,name_lead,fs):
    ##SDR coeff:
    SDR_lead = {}
    SDR_arr = np.array([])
    for i in name_lead:
        f,PSD = periodogram(signals[i],fs)
        QRS_signal_PSD = np.sum(PSD[np.logical_and(f>=5,f<=14)])
        ECG_tot = np.sum(PSD[np.logical_and(f>=5,f<=50)])
        SDR_val = QRS_signal_PSD/ECG_tot
        SDR_lead[i] = SDR_val
        SDR_arr = np.append(SDR_arr,SDR_val)
    return SDR_lead,np.mean(SDR_arr)



In [None]:
###Index Creation : wPMF
### The label will be as follow : mean(SDR of all lead) > 0.5 = Acceptable;mean(SDR of all lead) <0.5 = Unacceptable
##For each lead, we will return a mor eprecise classification based on the folloying rule: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9281614/#B25



def Wavelet_coef(sig,name,lev):
    All_coeff = pywt.wavedec(sig,name,level = lev)

    CA_w = All_coeff[0]
    CD_w = All_coeff[1:len(All_coeff)]
    return CA_w,CD_w  


def Energy_L2(coeff):
    return np.sum(np.abs(coeff)**2)

def wPMF_score(dico_signal,name_lead,fs):
    waveletname = 'db4'
    level_w = 9
    wPMF_lead = {}
    wPMF_arr = np.array([])
    for i in name_lead:
        CA_w,CD_w = Wavelet_coef(dico_signal[i],waveletname,level_w)
        p = np.array([])
        for CD in range(level_w):
            p = np.append(p,Energy_L2(np.asarray(CD_w)[-(CD+1)]))
        p = np.append(p,Energy_L2(np.asarray(CA_w)[0]))
        Etot = np.sum(p)
        p = p/Etot
        SQI_ECG = np.sum(p[3:6])
        wPMF_lead[i] = SQI_ECG
        wPMF_arr = np.append(wPMF_arr,SQI_ECG)
    return wPMF_lead, np.mean(wPMF_arr)

In [None]:
dico_SQI = {"SDR":SDR_score,"wPMF":wPMF_score}

In [None]:
##Let's create the confusion matrix of the SDR Index : 

matrix = {}
matrix["Y_True"] = Y_true

##Dictionary lead quality for each patient with SDR
lead_patient_history = {}

X_predicted = np.array([])
Y_predicted = np.array([])
with make_reader(path_petastorm) as reader:
    for sample in reader:
        data = sample
        X_predicted = np.append(X_predicted,int(data.noun_id))
        ECG_signal = data.signal
        ECG_lead = data.signal_names
        fs = data.sampling_frequency
        status = data.noun_id
        
        dico_ECG = {}

        for i,j in zip(ECG_lead,range(12)):
            dico_ECG[i] = ECG_signal[:,j]
        N = len(dico_ECG[ECG_lead[0]])

        SDR_lead,SDR_index= SDR_score(dico_ECG,ECG_lead,fs)
        prediction = SDR_classification_status(SDR_index)
        lead_good,lead_medium,lead_bad = SDR_Quality_lead(SDR_lead,ECG_lead)

        Y_predicted = np.append(Y_predicted,prediction)
        lead_patient_history[status] = np.array([lead_good,lead_medium,lead_bad])

X_pred_sorted,ind_sort = Sorter_X_array(X_predicted)
Y_predicted = Y_predicted[ind_sort]
Y_predicted = Y_predicted[Y!="unlabeled"]
matrix["Y_predict"] = Y_predicted
cm = confusion_matrix(Y_true, Y_predicted).ravel()
tp,fn,fp,tn = cm[cm!=0]
print("TP = ",tp)
print("TN = ",tn)
print("FP = ",fp)
print("FN ",fn)
print("Accuracy = ",(tp+tn)/(tp+tn+fp+fn))
print("Precision = ",tp/(tp+fp))
print("Recall = ",tp/(tp+fn))

##Confusion matrix :
df = pd.DataFrame(matrix, columns=['Y_True','Y_predict'])
confusion = pd.crosstab(df['Y_True'], df['Y_predict'], rownames=['Actual'], colnames=['Predicted'],margins = True)
sn.heatmap(confusion, annot=True,fmt='g')
plt.show()        


In [None]:
##The general function to run statistical test : 

def Runner_statistic(func,name_func,path_peta,y_true,y):
    matrix = {}
    matrix["Y_True"] = y_true

    ##Dictionary lead quality for each patient with SDR
    lead_patient_history_func = {}

    X_predicted = np.array([])
    Y_predicted = np.array([])
    with make_reader(path_peta) as reader:
        for sample in reader:
            data = sample
            X_predicted = np.append(X_predicted,int(data.noun_id))
            ECG_signal = data.signal
            ECG_lead = data.signal_names
            fs = data.sampling_frequency
            status = data.noun_id
        
            dico_ECG = {}

            for i,j in zip(ECG_lead,range(12)):
                dico_ECG[i] = ECG_signal[:,j]
            N = len(dico_ECG[ECG_lead[0]])

            func_lead,func_index= func(dico_ECG,ECG_lead,fs)
            prediction = set_classification_status(name_func,func_index)
            lead_good,lead_medium,lead_bad = set_quality_lead(name_func,func_lead,ECG_lead)

            Y_predicted = np.append(Y_predicted,prediction)
            lead_patient_history_func[status] = np.array([lead_good,lead_medium,lead_bad])

    X_pred_sorted,ind_sort = Sorter_X_array(X_predicted)
    Y_predicted = Y_predicted[ind_sort]
    Y_predicted = Y_predicted[y!="unlabeled"]
    matrix["Y_predict"] = Y_predicted
    cm = confusion_matrix(y_true, Y_predicted).ravel()
    tp,fn,fp,tn = cm[cm!=0]
    print("TP = ",tp)
    print("TN = ",tn)
    print("FP = ",fp)
    print("FN ",fn)
    print("Accuracy = ",(tp+tn)/(tp+tn+fp+fn))
    print("Precision = ",tp/(tp+fp))
    print("Recall = ",tp/(tp+fn))

    ##Confusion matrix :
    df = pd.DataFrame(matrix, columns=['Y_True','Y_predict'])
    confusion = pd.crosstab(df['Y_True'], df['Y_predict'], rownames=['Actual'], colnames=['Predicted'],margins = True)
    sn.heatmap(confusion, annot=True,fmt='g')
    plt.title(f"Confusion Matrix for using the {name_func} index")
    plt.show()  

In [None]:
###Test index wPMF

Runner_statistic(wPMF_score,"wPMF",path_petastorm,Y_true,Y)