In [None]:
from petastorm import make_reader
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kurtosis,pearsonr
from scipy.signal import periodogram
from ecgdetectors import Detectors
import pywt
from sklearn.metrics import confusion_matrix,auc
import sys
from sklearn import metrics
import pandas as pd
import seaborn as sn
import os
import xarray as xr
import warnings
from tqdm import tqdm
sys.path.append(os.path.join(os.getcwd(), ".."))
from metrics.methods import Our_SQA_method
from shared_utils.utils_data import format_data_to_xarray_2020,format_data_to_xarray
warnings.simplefilter(action="ignore", category=RuntimeWarning)
path_formatted_glasgow = "/workspaces/maitrise/data/20220902_data_physio_formatted_merged/merged/dataParquet"
path_petastorm = f"file:///{path_formatted_glasgow}"

path_formated_cinc2011= "/workspaces/maitrise/data/20221006_physio_quality/set-a/dataParquet"
path_petastorm_cinc2011 = f"file:///{path_formated_cinc2011}"

save_path = "/workspaces/maitrise/results"

In [None]:
##Load signals patient

if not os.path.exists(save_path):
    os.makedirs(save_path)

os.path.join(save_path,"")
if not "ecg_data.nc" in os.listdir(save_path):
    ds_data_2011 = format_data_to_xarray(path_petastorm_cinc2011, save_path)
else:
    ds_data_2011 = xr.load_dataset(os.path.join(save_path,"ecg_data.nc"))

save_path = "/workspaces/maitrise/results"
os.path.join(save_path,"")
if not "ecg_data_2020.nc" in os.listdir(save_path):
    ds_data_2020 = format_data_to_xarray_2020(path_petastorm, save_path)
else:
    ds_data_2020 = xr.load_dataset(os.path.join(save_path,"ecg_data_2020.nc"))

In [None]:
ds_filtered = ds_data_2011.where(ds_data_2011.data_quality != "unlabeled").dropna(dim = "id")
signals = ds_filtered.signal.transpose("id","lead_name","time")

np_label = ds_filtered.data_quality.values
unacceptable_data = signals[np_label=="unacceptable",:,:]
acceptable_data = signals[np_label=="acceptable",:,:]
print(ds_data_2020)

In [None]:
score_unacceptable_1 = np.array([])
score_unacceptable_0 = np.array([])
score_acceptable_1 = np.array([])
score_acceptable_0 = np.array([])

for i in tqdm(range(unacceptable_data.shape[0]),desc ="Calculating SQA score for each leads of the unacceptable dataset",disable=not True):
    signal_patient=np.array(unacceptable_data[i,:,:])
    y_score = Our_SQA_method.SQA_method_lead_score(signal_patient,500)
    score_unacceptable_1 = np.concatenate((score_unacceptable_1,y_score[:,1]),axis = None)
    score_unacceptable_0 = np.concatenate((score_unacceptable_0,y_score[:,0]),axis = None)

for i in tqdm(range(acceptable_data.shape[0]),desc ="Calculating SQA score for each leads of the acceptable dataset",disable=not True):
    signal_patient= np.array(acceptable_data[i,:,:])
    y_score = Our_SQA_method.SQA_method_lead_score(signal_patient,500)
    score_acceptable_1 = np.concatenate((score_acceptable_1,y_score[:,1]),axis = None)
    score_acceptable_0 = np.concatenate((score_acceptable_0,y_score[:,0]),axis = None)


In [None]:
acceptable_1_hist = pd.DataFrame(score_acceptable_1,columns = ["1"])
acceptable_0_hist = pd.DataFrame(score_acceptable_0,columns = ["0"])

unacceptable_1_hist = pd.DataFrame(score_unacceptable_1,columns = ["1"])
unacceptable_0_hist = pd.DataFrame(score_unacceptable_0,columns = ["0"])

In [None]:
acceptable_set = acceptable_1_hist["1"]
unacceptable_set = acceptable_0_hist["0"]

sn.histplot(acceptable_set, kde=True, color = 'darkblue',label = "Acceptable")
plt.axvline(score_acceptable_1.mean(), color='b', linestyle='dashed', linewidth=1,label ="mean value : {:.2f}".format((score_acceptable_1.mean()).mean()))
sn.histplot(unacceptable_set, kde=True, color = "darkorange",label = "Unacceptable")
plt.axvline(score_acceptable_0.mean(), color='orange', linestyle='dashed', linewidth=1,label ="mean value : {:.2f}".format((score_acceptable_0.mean()).mean()))
plt.legend(title = 'Quality')
plt.title('Density Plot for each classes (1 = acceptable)')
plt.xlabel('SQA scores')
plt.ylabel('Density')

In [None]:
acceptable_set = unacceptable_1_hist["1"]
unacceptable_set = unacceptable_0_hist["0"]

sn.histplot(acceptable_set, kde=True,bins = 50, color = 'darkblue',label = "Acceptable")
plt.axvline(score_unacceptable_1.mean(),color='b', linestyle='dashed', linewidth=1,label ="mean value : {:.2f}".format((score_unacceptable_1.mean()).mean()))
sn.histplot(unacceptable_set, kde=True,bins  =50, color = "darkorange",label = "Unacceptable")
plt.axvline(score_unacceptable_0.mean(), color='orange', linestyle='dashed', linewidth=1,label ="mean value : {:.2f}".format((score_unacceptable_0.mean()).mean()))
plt.legend(title = 'Quality')
plt.title('Density Plot for each classes (Unacceptable dataset)')
plt.xlabel('SQA scores')
plt.ylabel('Density')

In [None]:
data_comparison = ds_data_2020.signal.transpose("id","lead_name","time")
data_comparison = np.array(data_comparison)
score_trial = np.empty([data_comparison.shape[0]*data_comparison.shape[1],2])

In [None]:
score_trial_1 = np.array([])
score_trial_0 = np.array([])
for i in tqdm(range(data_comparison.shape[0]),desc ="Calculating SQA score for each leads",disable=not True):
    signal_patient=np.array(data_comparison[i,:,:])
    y_score = Our_SQA_method.SQA_method_lead_score(signal_patient,500)
    score_trial_1 = np.concatenate((score_trial_1,y_score[:,1]),axis = None)
    score_trial_0 = np.concatenate((score_trial_0,y_score[:,0]),axis = None)

In [None]:
signal_hist_1 = pd.DataFrame(score_trial_1,columns = ["1"])
signal_hist_0 = pd.DataFrame(score_trial_0,columns = ["0"])
acceptable_set = signal_hist_1["1"]
unacceptable_set = signal_hist_0["0"]

sn.histplot(acceptable_set, kde=True,bins = 125, color = 'darkblue',label = "Acceptable")
plt.axvline(acceptable_set.mean(),color='b', linestyle='dashed', linewidth=1,label ="mean value : {:.2f}".format((acceptable_set.mean()).mean()))
sn.histplot(unacceptable_set, kde=True,bins  =125, color = "darkorange",label = "Unacceptable")
plt.axvline(unacceptable_set.mean(), color='orange', linestyle='dashed', linewidth=1,label ="mean value : {:.2f}".format((unacceptable_set.mean()).mean()))
plt.legend(title = 'Quality')
plt.title('Density Plot for each classes with 2020 dataset')
plt.xlabel('SQA scores')
plt.ylabel('Density')