In [None]:
from petastorm import make_reader
import numpy as np
import matplotlib.pyplot as plt
import sys
import neurokit2 as nk
import pandas as pd
import os
from tqdm import tqdm
import xarray as xr
np.random.seed(1)
sys.path.append(os.path.join(os.getcwd(), ".."))
from shared_utils import utils_tsd_study as TSD
path_formatted_glasgow = "/workspaces/ecg_evaluation/data/20221006_physio_quality/set-a/dataParquet"
path_petastorm = f"file://{path_formatted_glasgow}"
path_csv_ref_label = "/workspaces/ecg_evaluation/data/20221006_physio_quality/set-a/REFERENCE.csv"
path_synthetic_dataset = ""

## Notebook goals : 

- ### Plot TSD evolution in function of the level of noise added to ECG recording for synthethic ones, acceptable labeled one and unacceptable labelled one

- ### Check how TSD behave when encountering pathological ECG

## Read signal and get lead name and quality label

In [None]:
with make_reader(path_petastorm) as reader:
    for sample in reader:
        data = sample
        if data.signal_quality == "acceptable".encode():
            break
        else : 
            pass

ECG_signal = data.signal
ECG_lead = data.signal_names
fs = data.sampling_frequency
SNR_level = np.linspace(-10,100,50)

## Collect 100 ECG recording of acceptable quality

In [None]:
###Get 100 acceptable ECG lead 

dataset = {}
stop_cond  = 0
with make_reader(path_petastorm) as reader:
    for sample in reader:
        data = sample
        ECG_signal = data.signal
        the_checker = np.array([])
        for j in range(len(ECG_lead)):
            the_checker = np.append(the_checker,np.mean(np.abs(ECG_signal[:,j])**2))
        if data.signal_quality == "acceptable".encode() and stop_cond<100 and the_checker.all():
            dico_ECG_pat = {}
            for i,j in zip(ECG_lead,range(len(ECG_lead))):
                dico_ECG_pat[i] = ECG_signal[:,j]
            dataset[stop_cond] = dico_ECG_pat
            stop_cond+=1
            

        elif stop_cond>=100:
            break

## Do the same for the unacceptable one

In [None]:
unac_dataset = {}
stop_cond  = 0
with make_reader(path_petastorm) as reader:
    for sample in reader:
        data = sample
        ECG_signal = data.signal
        the_checker = np.array([])
        for j in range(len(ECG_lead)):
            the_checker = np.append(the_checker,np.mean(np.abs(ECG_signal[:,j])**2))
        if data.signal_quality == "unacceptable".encode() and stop_cond<100 and the_checker.all():
            dico_ECG_pat = {}
            for i,j in zip(ECG_lead,range(len(ECG_lead))):
                dico_ECG_pat[i] = ECG_signal[:,j]
            unac_dataset[stop_cond] = dico_ECG_pat
            stop_cond+=1
            

        elif stop_cond>=100:
            break
        
print(len(unac_dataset))

## Create a synthethic ECG recording dataset 

In [None]:
###Let's synthetise ECG and plot different for different heart rate
HR_p = np.random.randint(60,180,100)
synth_dataset = {}
for i in tqdm(range(len(HR_p))):
    ecg_synth = nk.ecg_simulate(10,5000,sampling_rate=500,noise = 0,heart_rate = HR_p[i],heart_rate_std = 1,method = "multileads")
    dico_synth = {}
    np_ecg_synth = ecg_synth.to_numpy()
    for n,j in zip(ECG_lead,range(len(ECG_lead))):
        dico_synth[n] = np_ecg_synth[:,j]
    synth_dataset[i] = dico_synth

## Get the mean and SD for all the datasets

In [None]:
ok_Dmean,ok_SDDmean = TSD.TSDvsNoiseLevel_100ECG(SNR_level,dataset,ECG_lead,fs)
nok_Dmean,nokok_SDDmean = TSD.TSDvsNoiseLevel_100ECG(SNR_level,unac_dataset,ECG_lead,fs)
synth_Dmean,synth_SDDmean = TSD.TSDvsNoiseLevel_100ECG(SNR_level,synth_dataset,ECG_lead,fs)

## Compare the TSD evolution between sets 

In [None]:
TSD.Comparative_lead_plot(synth_Dmean,ok_Dmean,nok_Dmean,synth_SDDmean,ok_SDDmean,nokok_SDDmean,SNR_level,ECG_lead)

## Check the TSD behavior when encouting pathological ECG

In [None]:
###Comparison TSD from ECG with different pathologies:
path_formatted_pathos = "/workspaces/maitrise/data/20220902_data_physio_formatted_merged/merged/dataParquet"
path_petastorm_pathos = f"file:///{path_formatted_pathos}"
path_csv_SNOMED_label = "/workspaces/maitrise/data/Dx_map.csv"

###Read label : 

pd_pathos_label = pd.read_csv(path_csv_SNOMED_label)
pathos_label = pd_pathos_label.to_numpy()

pathology_name = pathos_label[:,0]

pathology_code = pathos_label[:,1]

#pathology_want = [426783006,164896001,426627000,164909002,164861001]
pathology_want = [427084000]
###Create a dico containing all the pathology and all the signals:
dico_all = {}
fs = 0
with make_reader(path_petastorm_pathos) as reader:
    for sample1 in reader:
        data1 = sample1
        score_class = data1.score_classes
        if score_class.size>=2 or score_class.size == 0 or (score_class.size == 1 and score_class[0] == 0) :
            continue
        else : 
            fs = data1.sampling_frequency
            dat = data1.signal
            dico_ECG = {}
            for i,j in zip(data1.signal_names,range(12)):
                dico_ECG[i] = dat[:,j]
            get_name_pathos = pathology_name[pathology_code == score_class[0]]
            if not get_name_pathos[0] in list(dico_all.keys()) and score_class[0] in pathology_want:
                dico_all[get_name_pathos[0]] = dico_ECG
            else : 
                continue

            if sorted(list(dico_all.keys())) == sorted(pathology_want):
                break



## Check How TSD behave when encounting patholgocial EG with increase level of noise

In [None]:
##Plot TSD difference between sample without disease and one with disease. Done for each disease selectionned before
def add_observational_noise_segment(signal,SNR):
    Power_sig = (1/len(signal))*np.sum(np.abs(signal)**2,dtype = np.float64)
    P_db = 10*np.log10(Power_sig)
    noisedb = P_db - SNR
    sd_db_watts = 10**(noisedb/10)
    noise = np.random.normal(0,np.sqrt(sd_db_watts),len(signal))
    sig_noisy_segment = signal[1500:3000].copy()+noise[1500:3000]
    sig_noisy = signal.copy()
    sig_noisy[1500:3000] = sig_noisy_segment
    return sig_noisy

pathology_name = list(dico_all.keys())
index_lead = 0
for j in pathology_name:
    i = 0
    if i<1:
        #sig = add_observational_noise_segment(dico_all[j][ECG_lead[index_lead]],10)
        sig = dico_all[j][ECG_lead[index_lead]]
        #segment_length = TSD.Interval_calculator_lead(sig,fs)
        segment_length = 100
        X = np.c_[[sig[int((w - 1)) : int((w) + segment_length)] for w in range(1, int(len(sig) - segment_length))]]
        L1 = np.array([TSD.Lq_k(X[i, :], 1, fs) for i in range(X.shape[0])])
        L2 = np.array([TSD.Lq_k(X[i, :], 2, fs) for i in range(X.shape[0])])
        Ds = (np.log(L1) - np.log(L2)) / (np.log(2))
        n = ECG_lead[0]
        fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 15))
        w_length =np.linspace(0,len(Ds),len(Ds))/fs
        ax[0].plot(w_length, Ds)
        ax[0].set_title(f"TSD time Evolution of Lead {ECG_lead[index_lead].decode('utf8')} for {j} ")
        ax[0].set_xlabel("lag")
        ax[0].set_ylabel("TSD value")
        ax[0].grid()
        ax[0].set_xlim([0,1])
        ax[1].plot(np.linspace(0, int(len(sig) / fs), len(sig)), sig, label=ECG_lead[index_lead])
        ax[1].set_title(f"Lead {ECG_lead[index_lead].decode('utf8')} for {j}")
        ax[1].set_xlabel("Time (sec)")
        ax[1].set_ylabel("Voltage Amplitude")
        ax[1].set_xlim([0,1])
        ax[1].grid()
        plt.show()
        i+=1
    else : 
        continue
        

In [None]:
###Using a normal correct ECG signal : 
ecg_synth = nk.ecg_simulate(10,5000,sampling_rate=500,noise = 0,heart_rate = 60,heart_rate_std = 1,method = "multileads")
dico_synth = {}
np_ecg_synth = ecg_synth.to_numpy()
for n,j in zip(ECG_lead,range(len(ECG_lead))):
    dico_synth[n] = np_ecg_synth[:,j]


sig_test = dico_synth[ECG_lead[0]]
segment_length = 10
Ds,_,_ = TSD.TSD_calculator(sig_test,segment_length,fs)
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 15))
w_length =np.linspace(0,len(Ds),len(Ds))/fs
ax[0].plot(w_length, Ds)
ax[0].set_title(f"TSD time Evolution of Lead {ECG_lead[index_lead].decode('utf8')} (synthetise) ")
ax[0].set_xlabel("lag")
ax[0].set_ylabel("TSD value")
ax[0].grid()
ax[0].set_xlim([0.6,1.50])
ax[1].plot(np.linspace(0, int(len(sig) / fs), len(sig)), sig_test, label=ECG_lead[index_lead])
ax[1].set_title(f"Lead {ECG_lead[index_lead].decode('utf8')} (synthetise lead)")
ax[1].set_xlabel("Time (sec)")
ax[1].set_ylabel("Voltage Amplitude")
ax[1].set_xlim([0.6,1.50])
ax[1].grid()
plt.show()