In [None]:
from petastorm import make_reader
import numpy as np
import matplotlib.pyplot as plt
import sys
import neurokit2 as nk
import pandas as pd
import os
sys.path.append(os.path.join(os.getcwd(), ".."))
from metrics.methods import tsd_metrics as TSD
from metrics.methods import hurst_exponent as Hurst
path_formatted_glasgow = "/workspaces/maitrise/data/20221006_physio_quality/set-a/dataParquet"
path_petastorm = f"file:///{path_formatted_glasgow}"
path_csv_ref_label = "/workspaces/maitrise/data/20221006_physio_quality/set-a/REFERENCE.csv"

In [None]:
with make_reader(path_petastorm) as reader:
    for sample in reader:
        data = sample
        if data.signal_quality == "acceptable".encode():
            break
        else : 
            pass

print(data)
print(data.signal_quality)
ECG_signal = data.signal
ECG_lead = data.signal_names
fs = data.sampling_frequency

dico_ECG = {}

for i,j in zip(ECG_lead,range(12)):
     dico_ECG[i] = ECG_signal[:,j]

print(len(dico_ECG))

In [None]:
def add_observational_noise(sig,SNR):
    Power_sig = (1/len(sig))*np.sum(np.abs(sig)**2,dtype = np.float64)
    P_db = 10*np.log10(Power_sig)
    noisedb = P_db - SNR
    sd_db_watts = 10**(noisedb/10)
    #sd_noise = np.sqrt(Power_sig/(SNR))
    noise = np.random.normal(0,np.sqrt(sd_db_watts),len(sig))
    sig_noisy = sig+noise
    return sig_noisy

In [None]:
SNR_level = np.linspace(-10,100,50)

def TSD_mean_calculator(signal,segment_length = 100,dt = 0.001):
    w = 1
    Ds = np.array([])
    while (w*segment_length)<=len(signal):
        sig_c  = signal[int((w-1)*segment_length):int((w)*segment_length)]
        L1 = TSD.Lq_k(sig_c,1,1/dt)
        L2 = TSD.Lq_k(sig_c,2,1/dt)
        Dv = (np.log(L1)-np.log(L2))/(np.log(2))   
        Ds = np.append(Ds,Dv)
        w+=1
    return np.mean(Ds),np.std(Ds)

def TSDvsNoiseLevel_array(noise_level,dico_signal,name_lead,fs):
    Dmean = {}
    SD_D = {}
    the_mean_lead_calculator = np.array([])
    the_SDmean_lead_calculator = np.array([])
    
    for name in name_lead:
        Dmean[name.decode('utf8')] = np.array([])
        SD_D[name.decode('utf8')] = np.array([])
    for i in noise_level:
        inter_mean  = np.array([])
        inter_SD  = np.array([])
        for name in name_lead:
            
            Obs = dico_signal[name]
            noise_obs = add_observational_noise(Obs.copy(),i)
            seg = TSD.Interval_calculator_lead(noise_obs,fs)
            Mean_TSD,SD_TSD= TSD.TSD_mean_calculator(noise_obs,seg,fs)
            inter_Dmean = np.append(inter_Dmean,Mean_TSD)
            Dmean[name.decode('utf8')] = np.append(Dmean[name.decode('utf8')],Mean_TSD)
            SD_D[name.decode('utf8')] = np.append(SD_D[name.decode('utf8')],SD_TSD)
        the_mean_lead_calculator = np.append(the_mean_lead_calculator,np.mean(inter_mean))
        the_SDmean_lead_calculator = np.append(the_SDmean_lead_calculator,np.mean(inter_SD))

    return Dmean,SD_D,the_mean_lead_calculator,the_SDmean_lead_calculator


def plt_TSDvsNoise(noise_lev,dico_sig,name_l,fs):
    Great_mean,Great_SD,mean_TSD_ECG,SD_TSD_ECG= TSDvsNoiseLevel_array(noise_lev,dico_sig,name_l,fs)
    # plt.figure()
    # plt.plot(noise_lev,mean_TSD_ECG,"ob")
    # #plt.errorbar(noise_lev,mean_TSD_ECG,SD_TSD_ECG)
    # plt.xlabel("SNR (db)")
    # plt.ylabel("mean TSD value")
    # plt.title(f"TSD vs SNR (db) for average TSD value of all lead") 
    # plt.grid()
    # plt.show()


    labels = []
    plt.figure()
    colormap = plt.cm.gist_ncar
    plt.gca().set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, len(ECG_lead)))))
    for i in name_l:
        plt.plot(noise_lev,Great_mean[i.decode('utf8')])
        labels.append(i.decode('utf8'))
    plt.legend(labels, ncol=4, loc='best', 
           columnspacing=1.0, labelspacing=0.0,
           handletextpad=0.0, handlelength=1.5,
           fancybox=True, shadow=True)
    plt.title("Mean TSD value evolution with SNR (db) for a set of acceptable lead")
    plt.xlabel("SNR (db)")
    plt.ylabel("mean TSD value")
    #plt.ylim([1.9,2.1])
    plt.grid()
    plt.show()

#plt_TSDvsNoise(SNR_level,dico_ECG,ECG_lead,fs)

In [None]:
###Get 100 acceptable ECG lead 

dataset = {}
stop_cond  = 0
with make_reader(path_petastorm) as reader:
    for sample in reader:
        data = sample
        ECG_signal = data.signal
        dico_ECG_pat = {}
        for i,j in zip(ECG_lead,range(len(ECG_lead))):
                dico_ECG_pat[i] = ECG_signal[:,j]
        the_checker = np.array([])
        for j in range(len(ECG_lead)):
            the_checker = np.append(the_checker,np.mean(np.abs(ECG_signal[:,j])**2))
        if data.signal_quality == "acceptable".encode() and stop_cond<100 and the_checker.all():
            ECG_signal = data.signal
            ECG_lead = data.signal_names
            fs = data.sampling_frequency
            dataset[stop_cond] = dico_ECG_pat
            stop_cond+=1
            

        elif stop_cond>=100:
            break
        
print(len(dataset))

In [None]:
unac_dataset = {}
stop_cond  = 0
with make_reader(path_petastorm) as reader:
    for sample in reader:
        data = sample
        ECG_signal = data.signal
        dico_ECG_pat = {}
        for i,j in zip(ECG_lead,range(len(ECG_lead))):
                dico_ECG_pat[i] = ECG_signal[:,j]
        the_checker = np.array([])
        for j in range(len(ECG_lead)):
            the_checker = np.append(the_checker,np.mean(np.abs(ECG_signal[:,j])**2))
        if data.signal_quality == "unacceptable".encode() and stop_cond<100 and the_checker.all():
            ECG_signal = data.signal
            ECG_lead = data.signal_names
            fs = data.sampling_frequency
            unac_dataset[stop_cond] = dico_ECG_pat
            stop_cond+=1
            

        elif stop_cond>=100:
            break
        
print(len(unac_dataset))

In [None]:
###Beware, strong "function for calculating mean TSD and error plot of 100 ecg (i.e. 1200 leads)" ahead:

def TSDvsNoiseLevel_100ECG(noise_level,theBIGdataset,name_lead,fs):
    Big_Dmean= {}
    Big_SDmean = {}
    N = len(theBIGdataset)
    for name in name_lead:
        Big_Dmean[name] = np.array([])
        Big_SDmean[name] = np.empty([2,len(noise_level)])
        arr = np.vstack([theBIGdataset[j][name] for j in range(N)])
        for i,n in zip(noise_level,range(len(noise_level))):
            arr_noise = np.vstack([add_observational_noise(arr[j,:].copy(),i) for j in range(N)])
            inter_Dmean = np.array([])
            for b in range(arr_noise.shape[0]):
                sig = arr_noise[b,:].copy()
                #seg = TSD.Interval_calculator_lead(sig,fs)
                Mean_TSD,_= TSD.TSD_mean_calculator(sig,100,fs)
                inter_Dmean = np.append(inter_Dmean,Mean_TSD)
            m,p25,p75 = np.mean(inter_Dmean.copy()),np.percentile(inter_Dmean.copy(),25),np.percentile(inter_Dmean.copy(),75)
            Big_Dmean[name] = np.append(Big_Dmean[name],m)
            Big_SDmean[name][:,n] = np.array([np.abs(m-p25),np.abs(m-p75)])
    
    return Big_Dmean,Big_SDmean


def TSDvsObsNoise_plot_100ECG(noise_level,dergrossdataset,name_lead,fs):
    BDM,BP = TSDvsNoiseLevel_100ECG(noise_level,dergrossdataset,name_lead,fs)
    plt.figure()
    colormap = plt.cm.gist_ncar
    plt.gca().set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, len(name_lead)))))
    labels = []
    for i in name_lead:
        plt.errorbar(noise_level,BDM[i],BP[i])
        #plt.plot(noise_level,BDM[i])
        labels.append(i.decode('utf8'))
    plt.legend(labels, ncol=4, loc='best',  
           columnspacing=1.0, labelspacing=0.0,
           handletextpad=0.0, handlelength=1.5,
           fancybox=True, shadow=True)
    
    plt.xlabel("SNR (db)")
    plt.ylabel("mean TSD value")
    plt.title(f"TSD vs SNR (dB) for average TSD value for all lead, for 100 patients") 
    plt.grid()
    plt.show()

SNR_level = np.linspace(-10,100,10)


In [None]:
###Let's synthetise ECG and plot different for different heart rate


HR_p = np.linspace(60,180,100)
synth_dataset = {}
for i in range(len(HR_p)):
    ecg_synth = nk.ecg_simulate(10,5000,sampling_rate=500,noise = 0,heart_rate = HR_p[i],heart_rate_std = 1,method = "multileads")
    dico_synth = {}
    np_ecg_synth = ecg_synth.to_numpy()
    for n,j in zip(ECG_lead,range(len(ECG_lead))):
        dico_synth[n] = np_ecg_synth[:,j]
    synth_dataset[i] = dico_synth



In [None]:
plt_TSDvsNoise(SNR_level,dico_synth,ECG_lead,fs)

In [None]:
ok_Dmean,ok_SDDmean = TSDvsNoiseLevel_100ECG(SNR_level,dataset,ECG_lead,fs)
nok_Dmean,nokok_SDDmean = TSDvsNoiseLevel_100ECG(SNR_level,unac_dataset,ECG_lead,fs)
synth_Dmean,synth_SDDmean = TSDvsNoiseLevel_100ECG(SNR_level,synth_dataset,ECG_lead,fs)

In [None]:
TSDvsObsNoise_plot_100ECG(SNR_level,dataset,ECG_lead,fs)
TSDvsObsNoise_plot_100ECG(SNR_level,unac_dataset,ECG_lead,fs)

In [None]:
###Comparative plot for each lead etween acceptable, unaccepatble and synthetique

def Comparative_lead_plot(Synth_data,Acc_data,Unacc_data,SD_synth,SD_acc,SD_unacc,S_level,name_lead,name = "TSD"):
    fig,ax = plt.subplots(nrows = 1,ncols = 3,figsize = (20,10))
    #plt.rcParams.update({'font.size':20})
    fig.tight_layout(h_pad=4)
    coordinates = [(0,y) for y in range(3)]
    for i,c in zip(name_lead[:3],coordinates):

        lead_synth,lead_acc,lead_unacc = Synth_data[i],Acc_data[i],Unacc_data[i]
        e_synth,e_acc,e_unacc = SD_synth[i],SD_acc[i],SD_unacc[i]
        if c[1] == 0:
            ax[c[1]].errorbar(S_level,lead_synth,e_synth,label = " Synthethic lead ")
            ax[c[1]].errorbar(S_level,lead_acc,e_acc,label = " Acceptable lead ")
            ax[c[1]].errorbar(S_level,lead_unacc,e_unacc,label = " Unacceptable lead ")
        else : 
            ax[c[1]].errorbar(S_level,lead_synth,e_synth)
            ax[c[1]].errorbar(S_level,lead_acc,e_acc)
            ax[c[1]].errorbar(S_level,lead_unacc,e_unacc)
            
        ax[c[1]].set_xlabel("SNR (db)")
        ax[c[1]].set_ylabel(f"mean {name} value")
        ax[c[1]].set_title(f"Lead {i.decode('utf8')}")
        ax[c[1]].grid()
    handles, labels = ax[0].get_legend_handles_labels()
    plt.figlegend(handles, labels, loc = (0.84,0.7),labelspacing=1.0,
            handletextpad=0.0, handlelength=1.5,
            fancybox=True, shadow=True)
    fig.suptitle(f"{name} vs SNR (dB) for average {name} value for 100 patients", fontsize=20)
    fig.subplots_adjust(top=0.90)

In [None]:
Comparative_lead_plot(synth_Dmean,ok_Dmean,nok_Dmean,synth_SDDmean,ok_SDDmean,nokok_SDDmean,SNR_level,ECG_lead)

In [None]:
##For Hurst exponent:
def HurstDvsNoiseLevel_100ECG(noise_level,theBIGdataset,name_lead,fs):
    Big_Dmean= {}
    Big_SDmean = {}
    N = len(theBIGdataset)
    for name in name_lead:
        Big_Dmean[name] = np.array([])
        Big_SDmean[name] = np.empty([2,len(noise_level)])
        arr = np.vstack([theBIGdataset[j][name] for j in range(N)])
        for i,n in zip(noise_level,range(len(noise_level))):
            arr_noise = np.vstack([add_observational_noise(arr[j,:].copy(),i) for j in range(N)])
            inter_Dmean = np.array([])
            for b in range(arr_noise.shape[0]):
                sig = arr_noise[b,:].copy()
                Mean_TSD = 2-Hurst.genhurst(sig,1)
                inter_Dmean = np.append(inter_Dmean,Mean_TSD)
            m,p25,p75 = np.mean(inter_Dmean.copy()),np.percentile(inter_Dmean.copy(),25),np.percentile(inter_Dmean.copy(),75)
            Big_Dmean[name] = np.append(Big_Dmean[name],m)
            Big_SDmean[name][:,n] = np.array([np.abs(m-p25),np.abs(m-p75)])
    
    return Big_Dmean,Big_SDmean


# def TSDvsObsNoise_plot_100ECG(noise_level,dergrossdataset,name_lead,fs):
#     BDM,BP = TSDvsNoiseLevel_100ECG(noise_level,dergrossdataset,name_lead,fs)
#     plt.figure()
#     colormap = plt.cm.gist_ncar
#     plt.gca().set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, len(name_lead)))))
#     labels = []
#     for i in name_lead:
#         plt.errorbar(noise_level,BDM[i],BP[i])
#         #plt.plot(noise_level,BDM[i])
#         labels.append(i.decode('utf8'))
#     plt.legend(labels, ncol=4, loc='best',  
#            columnspacing=1.0, labelspacing=0.0,
#            handletextpad=0.0, handlelength=1.5,
#            fancybox=True, shadow=True)
    
#     plt.xlabel("SNR (db)")
#     plt.ylabel("mean TSD value")
#     plt.title(f"TSD vs SNR (dB) for average TSD value for all lead, for 100 patients") 
#     plt.grid()
#     plt.show()

SNR_level = np.linspace(-10,100,10)

In [None]:
ok_HDmean,ok_SDHDmean = HurstDvsNoiseLevel_100ECG(SNR_level,dataset,ECG_lead,fs)
nok_HDmean,nokok_SDHDmean = HurstDvsNoiseLevel_100ECG(SNR_level,unac_dataset,ECG_lead,fs)
synth_HDmean,synth_SDHDmean = HurstDvsNoiseLevel_100ECG(SNR_level,synth_dataset,ECG_lead,fs)

In [None]:
Comparative_lead_plot(synth_HDmean,ok_HDmean,nok_HDmean,synth_SDHDmean,ok_SDHDmean,nokok_SDHDmean,SNR_level,ECG_lead,"Fractal Dimension from Hurst")

In [None]:
###Comparison TSD from ECG with different pathologies:
path_formatted_pathos = "/workspaces/maitrise/data/20220902_data_physio_formatted_merged/merged/dataParquet"
path_petastorm_pathos = f"file:///{path_formatted_pathos}"
path_csv_SNOMED_label = "/workspaces/maitrise/data/Dx_map.csv"

###Read label : 

pd_pathos_label = pd.read_csv(path_csv_SNOMED_label)
pathos_label = pd_pathos_label.to_numpy()

pathology_name = pathos_label[:,0]

pathology_code = pathos_label[:,1]

#pathology_want = [426783006,164896001,426627000,164909002,164861001]
pathology_want = [427084000]
###Create a dico containing all the pathology and all the signals:
dico_all = {}
fs = 0
with make_reader(path_petastorm_pathos) as reader:
    for sample1 in reader:
        data1 = sample1
        score_class = data1.score_classes
        if score_class.size>=2 or score_class.size == 0 or (score_class.size == 1 and score_class[0] == 0) :
            continue
        else : 
            fs = data1.sampling_frequency
            dat = data1.signal
            dico_ECG = {}
            for i,j in zip(data1.signal_names,range(12)):
                dico_ECG[i] = dat[:,j]
            get_name_pathos = pathology_name[pathology_code == score_class[0]]
            if not get_name_pathos[0] in list(dico_all.keys()) and score_class[0] in pathology_want:
                dico_all[get_name_pathos[0]] = dico_ECG
            else : 
                continue

            if sorted(list(dico_all.keys())) == sorted(pathology_want):
                break



In [None]:
##Plot TSD difference between sample without disease and one with disease. Done for each disease selectionned before
def add_observational_noise_segment(signal,SNR):
    Power_sig = (1/len(signal))*np.sum(np.abs(signal)**2,dtype = np.float64)
    P_db = 10*np.log10(Power_sig)
    noisedb = P_db - SNR
    sd_db_watts = 10**(noisedb/10)
    #sd_noise = np.sqrt(Power_sig/(SNR))
    noise = np.random.normal(0,np.sqrt(sd_db_watts),len(signal))
    sig_noisy_segment = signal[1500:3000].copy()+noise[1500:3000]
    sig_noisy = signal.copy()
    sig_noisy[1500:3000] = sig_noisy_segment
    return sig_noisy

pathology_name = list(dico_all.keys())
index_lead = 0
for j in pathology_name:
    i = 0
    if i<1:
        #sig = add_observational_noise_segment(dico_all[j][ECG_lead[index_lead]],10)
        sig = dico_all[j][ECG_lead[index_lead]]
        #segment_length = TSD.Interval_calculator_lead(sig,fs)
        segment_length = 100
        X = np.c_[[sig[int((w - 1)) : int((w) + segment_length)] for w in range(1, int(len(sig) - segment_length))]]
        L1 = np.array([TSD.Lq_k(X[i, :], 1, fs) for i in range(X.shape[0])])
        L2 = np.array([TSD.Lq_k(X[i, :], 2, fs) for i in range(X.shape[0])])
        Ds = (np.log(L1) - np.log(L2)) / (np.log(2))
        n = ECG_lead[0]
        fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 15))
        w_length =np.linspace(0,len(Ds),len(Ds))/fs
        ax[0].plot(w_length, Ds)
        ax[0].set_title(f"TSD time Evolution of Lead {ECG_lead[index_lead].decode('utf8')} for {j} ")
        ax[0].set_xlabel("lag")
        ax[0].set_ylabel("TSD value")
        ax[0].grid()
        ax[0].set_xlim([0,1])
        ax[1].plot(np.linspace(0, int(len(sig) / fs), len(sig)), sig, label=ECG_lead[index_lead])
        ax[1].set_title(f"Lead {ECG_lead[index_lead].decode('utf8')} for {j}")
        ax[1].set_xlabel("Time (sec)")
        ax[1].set_ylabel("Voltage Amplitude")
        ax[1].set_xlim([0,1])
        ax[1].grid()
        plt.show()
        i+=1
    else : 
        continue
        

In [None]:
###Using a normal correct ECG signal : 
ecg_synth = nk.ecg_simulate(10,5000,sampling_rate=500,noise = 0,heart_rate = 60,heart_rate_std = 1,method = "multileads")
dico_synth = {}
np_ecg_synth = ecg_synth.to_numpy()
for n,j in zip(ECG_lead,range(len(ECG_lead))):
    dico_synth[n] = np_ecg_synth[:,j]


sig_test = dico_synth[ECG_lead[0]]
segment_length = 10
Ds,_,_ = TSD.TSD_calculator(sig_test,segment_length,fs)
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 15))
w_length =np.linspace(0,len(Ds),len(Ds))/fs
ax[0].plot(w_length, Ds)
ax[0].set_title(f"TSD time Evolution of Lead {ECG_lead[index_lead].decode('utf8')} (synthetise) ")
ax[0].set_xlabel("lag")
ax[0].set_ylabel("TSD value")
ax[0].grid()
ax[0].set_xlim([0.6,1.50])
ax[1].plot(np.linspace(0, int(len(sig) / fs), len(sig)), sig_test, label=ECG_lead[index_lead])
ax[1].set_title(f"Lead {ECG_lead[index_lead].decode('utf8')} (synthetise lead)")
ax[1].set_xlabel("Time (sec)")
ax[1].set_ylabel("Voltage Amplitude")
ax[1].set_xlim([0.6,1.50])
ax[1].grid()
plt.show()