In [None]:
from petastorm import make_reader
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kurtosis,pearsonr
from scipy.signal import periodogram
from ecgdetectors import Detectors
import pickle
from sklearn.metrics import confusion_matrix,auc
import sys
from sklearn import metrics
import pandas as pd
import seaborn as sn
import os
import xarray as xr
import warnings
from tqdm import tqdm
sys.path.append(os.path.join(os.getcwd(), ".."))
from shared_utils.utils_data import format_data_to_xarray_2020,format_data_to_xarray,extract_index_label
warnings.simplefilter(action="ignore", category=RuntimeWarning)
path_formatted_glasgow = "/workspaces/ecg_evaluation/data/20220902_data_physio_formatted_merged/merged/dataParquet"
path_petastorm = f"file:///{path_formatted_glasgow}"

path_formated_cinc2011= "/workspaces/ecg_evaluation/data/20221006_physio_quality/set-a/dataParquet"
path_petastorm_cinc2011 = f"file:///{path_formated_cinc2011}"

save_path = "/workspaces/ecg_evaluation/results"

## Load your dataset (including the one with the metrics calculated on Cinc2011)

In [None]:
##Load signals patient

if not os.path.exists(save_path):
    os.makedirs(save_path)

os.path.join(save_path,"")
if not "ecg_data.nc" in os.listdir(save_path):
    ds_data_2011 = format_data_to_xarray(path_petastorm_cinc2011, save_path)
else:
    ds_data_2011 = xr.load_dataset(os.path.join(save_path,"ecg_data.nc"))

save_path = "/workspaces/ecg_evaluation/results"
os.path.join(save_path,"")
if not "quality_metrics.nc" in os.listdir(save_path):
    metrics_2011 = format_data_to_xarray(path_petastorm_cinc2011, save_path)
else:
    metrics_2011 = xr.load_dataset(os.path.join(save_path,"quality_metrics.nc"))

save_path = "/workspaces/ecg_evaluation/results"
os.path.join(save_path,"")
if not "ecg_data_2020.nc" in os.listdir(save_path):
    ds_data_2020 = format_data_to_xarray_2020(path_petastorm, save_path)
else:
    ds_data_2020 = xr.load_dataset(os.path.join(save_path,"ecg_data_2020.nc"))

## Define function to use save model and get score for each class

In [None]:
list_features  = ["Corr_interlead", "Corr_intralead", "TSD"]
def SQA_method(data,feature_ex,model_path):
    X,_ = extract_index_label(data,feature_ex)
    model = pickle.load(open(model_path,"rb"))
    X = X.values
    y_proba = model.predict_proba(X)
    return y_proba

## Set the dataset and get the labels. Divide into unacceptable and acceptable dataset

In [None]:
ds_filtered = ds_data_2011.where(ds_data_2011.data_quality != "unlabeled").dropna(dim = "id")
signals = ds_filtered.signal.transpose("id","lead_name","time")

np_label = ds_filtered.data_quality.values
unacceptable_data = signals[np_label=="unacceptable",:,:]
acceptable_data = signals[np_label=="acceptable",:,:]

## Get scores for both class on Cinc2011 using model

In [None]:
if os.path.exists(os.path.join(save_path,"acceptable_score_2011.npy")) and os.path.join(save_path,"unacceptable_score_2011.npy"):
    acceptable_score = np.load(os.path.exists(os.path.join(save_path,"acceptable_score_2011.npy")),allow_pickle=True)
    unacceptable_score = np.load(os.path.join(save_path,"unacceptable_score_2011.npy"),allow_pickle=True)
else : 
    model_path= "/workspaces/ecg_evaluation/results/hjmi_selection.sav"
    scores = SQA_method(metrics_2011,list_features,model_path=model_path)
    unacceptable_score = scores[np_label=="unacceptable",:]
    acceptable_score = scores[np_label=="acceptable",:]


    np.save(os.path.join(save_path,"acceptable_score_2011.npy"),acceptable_score)
    np.save(os.path.join(save_path,"unacceptable_score_2011.npy"),unacceptable_score)


## define Dataframe 

In [None]:
acceptable_hist = pd.DataFrame(acceptable_score[:,0],columns = ["0"])
unacceptable_hist = pd.DataFrame(unacceptable_score[:,1],columns = ["1"])

## Histogram for Cinc2011

In [None]:
acceptable_set = acceptable_hist["0"]
unacceptable_set = unacceptable_hist["1"]

sn.histplot(acceptable_set, kde=True, color = 'darkblue',label = "Acceptable")
plt.axvline(acceptable_set.mean(), color='b', linestyle='dashed', linewidth=1,label ="mean value : {:.2f}".format((acceptable_set.mean()).mean()))
sn.histplot(unacceptable_set, kde=True, color = "darkorange",label = "Unacceptable")
plt.axvline(unacceptable_set.mean(), color='orange', linestyle='dashed', linewidth=1,label ="mean value : {:.2f}".format((unacceptable_set.mean()).mean()))
plt.legend(title = 'Quality')
plt.title('Density Plot for each classes')
plt.xlabel('SQA scores')
plt.ylabel('Density')

## Get scores for both class on 2020 Challenge dataset using model

In [None]:
data_comparison = ds_data_2020.signal.transpose("id","lead_name","time")
data_comparison = np.array(data_comparison)
score_trial = np.empty([data_comparison.shape[0]*data_comparison.shape[1],2])

In [None]:
if os.path.exists(os.path.join(save_path,"score_trial_1_2020.npy")) and os.path.exists(os.path.join(save_path,"score_trial_0_2020.npy")):
    score_trial_1 = np.load(os.path.join(save_path,"score_trial_1_2020.npy"),allow_pickle=True)
    score_trial_0 = np.load(os.path.join(save_path,"score_trial_0_2020.npy"),allow_pickle=True)
else : 
    score_trial_1 = np.array([])
    score_trial_0 = np.array([])
    for i in tqdm(range(data_comparison.shape[0]),desc ="Calculating SQA score for each leads",disable=not True):
        signal_patient=np.array(data_comparison[i,:,:])
        y_score = SQA_method.SQA_method_lead_score(signal_patient,500)
        score_trial_1 = np.concatenate((score_trial_1,y_score[:,1]),axis = None)
        score_trial_0 = np.concatenate((score_trial_0,y_score[:,0]),axis = None)
    np.save(os.path.join(save_path,"score_trial_1_2020.npy"),score_trial_1)
    np.save(os.path.join(save_path,"score_trial_0_2020.npy"),score_trial_0)

## Histogram for the 2020 Challenge Dataset

In [None]:
signal_hist_1 = pd.DataFrame(score_trial_1,columns = ["1"])
signal_hist_0 = pd.DataFrame(score_trial_0,columns = ["0"])
unacceptable_set = signal_hist_1["1"]
acceptable_set = signal_hist_0["0"]

sn.histplot(acceptable_set, kde=True,bins = 125, color = 'darkblue',label = "acceptable")
plt.axvline(acceptable_set.mean(),color='k', linestyle='dashed', linewidth=1,label ="mean value : {:.2f}".format((acceptable_set.mean())))
plt.axvline(np.quantile(acceptable_set,0.10),color='tab:pink', linestyle='dashed', linewidth=1,label ="q10 acceptable : {:.2f}".format((np.quantile(acceptable_set,0.10))))
plt.axvline(np.quantile(acceptable_set,0.90),color='g', linestyle='dashed', linewidth=1,label ="q90 acceptable : {:.2f}".format((np.quantile(acceptable_set,0.90))))
# sn.histplot(unacceptable_set, kde=True,bins  =125, color = "darkorange",label = "Unacceptable")
# plt.axvline(unacceptable_set.mean(), color='c', linestyle='dashed', linewidth=1,label ="mean value : {:.2f}".format((unacceptable_set.mean()).mean()))
# plt.axvline(np.quantile(unacceptable_set,0.10),color='r', linestyle='dashed', linewidth=1,label ="q10 Unacceptable : {:.2f}".format((np.quantile(unacceptable_set,0.10))))
# plt.axvline(np.quantile(unacceptable_set,0.90),color='tab:brown', linestyle='dashed', linewidth=1,label ="q90 Unacceptable : {:.2f}".format((np.quantile(unacceptable_set,0.90))))
plt.legend(title = 'Quality')
plt.title('Density Plot for acceptable class with 2020 dataset')
#plt.grid()
plt.xlabel('SQA scores')
plt.ylabel('Density')