In [None]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import heartpy as hp
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
from random import shuffle
from scipy.interpolate import CubicSpline
from numba import jit



# remove the superfluous columns

In [None]:
Y_val = np.load('/media/data1/anolin/Y_val_v1.1.npy').astype(np.int64)
Y_val

In [None]:
og_labels =  ['Sinusal','Regular','Monomorph','QS complex in V1-V2-V3','R complex in V5-V6','T wave inversion (inferior - II, III, aVF)','Left bundle branch block','RaVL > 11 mm','SV1 + RV5 or RV6 > 35 mm','T wave inversion (lateral -I, aVL, V5-V6)','T wave inversion (anterior - V3-V4)','Left axis deviation','Left ventricular hypertrophy','Bradycardia','Q wave (inferior - II, III, aVF)','Afib','Irregularly irregular','Atrial tachycardia (>= 100 BPM)','Nonspecific intraventricular conduction delay','Premature ventricular complex','Polymorph','T wave inversion (septal- V1-V2)','Right bundle branch block','Ventricular paced','ST elevation (anterior - V3-V4)','ST elevation (septal - V1-V2)','1st degree AV block','Premature atrial complex','Atrial flutter',"rSR' in V1-V2",'qRS in V5-V6-I, aVL','Left anterior fascicular block','Right axis deviation','2nd degree AV block - mobitz 1','ST depression (inferior - II, III, aVF)','Acute pericarditis','ST elevation (inferior - II, III, aVF)','Low voltage','Regularly irregular','Bifid','Junctional rhythm','Left atrial enlargement','ST elevation (lateral - I, aVL, V5-V6)','Atrial paced','Right ventricular hypertrophy','Delta wave','Wolff-Parkinson-White (Pre-excitation syndrome)','Prolonged QT','ST depression (anterior - V3-V4)','QRS complex negative in III','RaVL + SV3 > 28 mm (H) or 20 mm (F)','Q wave (lateral- I, aVL, V5-V6)','Hyperacute T wave (lateral, V5-V6)','Hyperacute T wave (septal, V1-V2)','Supraventricular tachycardia','ST downslopping','ST depression (lateral - I, avL, V5-V6)','2nd degree AV block - mobitz 2','U wave','ST depression et T inversion in V5 or V6','Large >0.08 s','R/S ratio in V1-V2 >1','RV1 + SV6\xa0> 11 mm','Left posterior fascicular block','Right atrial enlargement','ST depression (septal- V1-V2)','Q wave (septal- V1-V2)','Q wave (anterior - V3-V4)','Hyperacute T wave (anterior, V3-V4)','ST upslopping','Right superior axis','Auricular bigeminy','Ventricular tachycardia','ST elevation (posterior - V7-V8-V9)','Ectopic atrial rhythm (< 100 BPM)','Lead misplacement','Biphasic','Ventricular bigeminy','J wave','Tall >2.5 mm','Third Degree AV Block','Sinus Pause','Acute MI','Early repolarization','Q wave (posterior - V7-V9)','Bi-atrial enlargement','LV pacing','Dextrocardia','Brugada','Ventricular Rhythm','ST depression (posterior - V7-V8-V9)','no_qrs']
to_remove_labels = ['ST depression (posterior - V7-V8-V9)','Tall >2.5 mm', 'J wave', 'Auricular bigeminy', 'Ventricular bigeminy', 'Sinus Pause', 'Dextrocardia', 'Hyperacute T wave (lateral, V5-V6)', 'Hyperacute T wave (septal, V1-V2)', 'Hyperacute T wave (anterior, V3-V4)', 'Bifid', 'RaVL + SV3 > 28 mm (H) or 20 mm (F)', 'Large >0.08 s', 'Biphasic', 'ST depression et T inversion in V5 or V6']

pos_to_drop = list()
new_label_names = list()
for pos, item in enumerate(og_labels):
    if item in to_remove_labels:
        pos_to_drop.append(pos)
    else:
        new_label_names.append(item)
Y_val = np.delete(Y_val, pos_to_drop, axis=1)


In [None]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

sigmoid_v = np.vectorize(sigmoid)

In [None]:
Y_pred = np.load('/volume/my_name/resnet50_standardscaler_leads_42_v1.1/output_1.npy')

def sigmoid(x):
  return 1 / (1 + math.exp(-x))

sigmoid_v = np.vectorize(sigmoid)
Y_pred = sigmoid_v(Y_pred)
sigmoid_v_bin =  np.where(Y_pred > 0.5, 1, 0)
sigmoid_v_bin

In [None]:
#check shape, should be (149177, 77)
print(Y_val.shape)
print(sigmoid_v_bin.shape)

In [None]:
from sklearn.metrics import roc_auc_score, auc, accuracy_score
from sklearn.metrics import precision_recall_curve

dict_results = dict()
for pos, label in enumerate(tqdm(new_label_names)):
    ROC = roc_auc_score(Y_val[:,pos], sigmoid_v_bin[:,pos], average=None)
    precision, recall, thresholds = precision_recall_curve(Y_val[:,pos], sigmoid_v_bin[:,pos])
    PR = auc(recall, precision)

    acc = accuracy_score(Y_val[:,pos], sigmoid_v_bin[:,pos])

    dict_results.update({label:[ROC,PR,acc]})


In [None]:
df_out = pd.DataFrame.from_dict(dict_results).T
df_out.columns = ['ROC','PR','ACC']
df_out

# this is to get the label frequency from the train set

In [None]:
Y_val_ = np.load('/media/data1/muse_ge/Y_train_v1.1.npy').astype(np.int64)

pos_to_drop = list()
new_label_names = list()
for pos, item in enumerate(og_labels):
    if item in to_remove_labels:
        pos_to_drop.append(pos)
    else:
        new_label_names.append(item)

#print(Y_train.shape)
Y_val_ = np.delete(Y_val_, pos_to_drop, axis=1)


label_counts = np.sum(Y_val_, axis=0)
label_counts/Y_val_.shape[0]

In [None]:
#add the prevalence to the df
df_out['Prevalence'] = label_counts/Y_val_.shape[0]

In [None]:
#create a label for ease of use in matplotlib
df_out_ = df_out[['ROC','Prevalence']]
df_out_.index = [f'{i} ({"{:.3f}".format(j)})' for i,j in zip(df_out_.index,df_out_.ROC)]

#### Robert final approach (seems finicky with figsize)

In [None]:
import seaborn as sns
from matplotlib.pyplot import figure
sns.set_style("whitegrid")

figure(figsize=(12, 2), dpi=80)
df_out_.sort_values('ROC').plot( kind= 'bar' , secondary_y= 'Prevalence' )
plt.xticks(rotation=90, fontsize=8)
plt.title('Distribution of ROC avg 3 seed')
#plt.savefig('/volume/core_model/ROC.jpg', dpi=600, bbox_inches="tight")


#### Robert old approach

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots(figsize=(12, 2))
ax1=sns.barplot(y=df_out.sort_values('PR', ascending=True)['PR'], x=df_out.sort_values('PR', ascending=True).index)#, ax=ax1)
#ax2 = ax1.twinx()
sns.barplot(x=[f'{i} ({round(j,4)})' for i,j in zip(df_out.sort_values('PR', ascending=True).index,df_out.sort_values('PR', ascending=True)['PR'].values)]  , y=-1*df_out.sort_values('PR', ascending=True)['preponderance'].values,color='blue')#,ax=ax2, color='blue)
plt.xticks(rotation=90, fontsize=8)
plt.ylabel('proportion - PR')


### General performance histogram

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.histplot(df_out['ROC'])
plt.title("Score distribution")

# first approach without frenquency

In [None]:
from sklearn.metrics import roc_auc_score, auc, accuracy_score
from sklearn.metrics import precision_recall_curve
import seaborn as sns
import matplotlib.pyplot as plt

og_labels =  ['Sinusal','Regular','Monomorph','QS complex in V1-V2-V3','R complex in V5-V6','T wave inversion (inferior - II, III, aVF)','Left bundle branch block','RaVL > 11 mm','SV1 + RV5 or RV6 > 35 mm','T wave inversion (lateral -I, aVL, V5-V6)','T wave inversion (anterior - V3-V4)','Left axis deviation','Left ventricular hypertrophy','Bradycardia','Q wave (inferior - II, III, aVF)','Afib','Irregularly irregular','Atrial tachycardia (>= 100 BPM)','Nonspecific intraventricular conduction delay','Premature ventricular complex','Polymorph','T wave inversion (septal- V1-V2)','Right bundle branch block','Ventricular paced','ST elevation (anterior - V3-V4)','ST elevation (septal - V1-V2)','1st degree AV block','Premature atrial complex','Atrial flutter',"rSR' in V1-V2",'qRS in V5-V6-I, aVL','Left anterior fascicular block','Right axis deviation','2nd degree AV block - mobitz 1','ST depression (inferior - II, III, aVF)','Acute pericarditis','ST elevation (inferior - II, III, aVF)','Low voltage','Regularly irregular','Bifid','Junctional rhythm','Left atrial enlargement','ST elevation (lateral - I, aVL, V5-V6)','Atrial paced','Right ventricular hypertrophy','Delta wave','Wolff-Parkinson-White (Pre-excitation syndrome)','Prolonged QT','ST depression (anterior - V3-V4)','QRS complex negative in III','RaVL + SV3 > 28 mm (H) or 20 mm (F)','Q wave (lateral- I, aVL, V5-V6)','Hyperacute T wave (lateral, V5-V6)','Hyperacute T wave (septal, V1-V2)','Supraventricular tachycardia','ST downslopping','ST depression (lateral - I, avL, V5-V6)','2nd degree AV block - mobitz 2','U wave','ST depression et T inversion in V5 or V6','Large >0.08 s','R/S ratio in V1-V2 >1','RV1 + SV6\xa0> 11 mm','Left posterior fascicular block','Right atrial enlargement','ST depression (septal- V1-V2)','Q wave (septal- V1-V2)','Q wave (anterior - V3-V4)','Hyperacute T wave (anterior, V3-V4)','ST upslopping','Right superior axis','Auricular bigeminy','Ventricular tachycardia','ST elevation (posterior - V7-V8-V9)','Ectopic atrial rhythm (< 100 BPM)','Lead misplacement','Biphasic','Ventricular bigeminy','J wave','Tall >2.5 mm','Third Degree AV Block','Sinus Pause','Acute MI','Early repolarization','Q wave (posterior - V7-V9)','Bi-atrial enlargement','LV pacing','Dextrocardia','Brugada','Ventricular Rhythm','ST depression (posterior - V7-V8-V9)','no_qrs']
to_remove_labels = ['ST depression (posterior - V7-V8-V9)','Tall >2.5 mm', 'J wave', 'Auricular bigeminy', 'Ventricular bigeminy', 'Sinus Pause', 'Dextrocardia', 'Hyperacute T wave (lateral, V5-V6)', 'Hyperacute T wave (septal, V1-V2)', 'Hyperacute T wave (anterior, V3-V4)', 'Bifid', 'RaVL + SV3 > 28 mm (H) or 20 mm (F)', 'Large >0.08 s', 'Biphasic', 'ST depression et T inversion in V5 or V6']

Y_val = np.load('/media/data1/anolin/Y_val_v1.1.npy').astype(np.int64)

pos_to_drop = list()
new_label_names = list()
for pos, item in enumerate(og_labels):
    if item in to_remove_labels:
        pos_to_drop.append(pos)
    else:
        new_label_names.append(item)

#print(Y_train.shape)
Y_val = np.delete(Y_val, pos_to_drop, axis=1)

dict_results = dict(zip(new_label_names,[[0,0,0] for _ in range(len(new_label_names))]))
for i in [2023,1997,42]:
    Y_pred = np.load(f'/volume/my_name/resnet50_notscaled_{i}_v1.1/output_1.npy')
    Y_pred = sigmoid_v(Y_pred)
    sigmoid_v_bin =  np.where(Y_pred > 0.5, 1, 0)
    

    for pos, label in enumerate(new_label_names):
        try:
            ROC = roc_auc_score(Y_val[:,pos], sigmoid_v_bin[:,pos], average=None)
        except:
            ROC = 0.5
        precision, recall, thresholds = precision_recall_curve(Y_val[:,pos], sigmoid_v_bin[:,pos])
        PR = auc(recall, precision)

        acc = accuracy_score(Y_val[:,pos], sigmoid_v_bin[:,pos])

        dict_results[label][0] += ROC
        dict_results[label][1] += PR
        dict_results[label][2] += acc

for k,v in dict_results.items():
    v[0] = v[0]/3
    v[1] = v[1]/3
    v[2] = v[2]/3
df_out = pd.DataFrame.from_dict(dict_results).T
df_out.columns = ['ROC','PR','ACC']
df_out

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(rc={'figure.figsize':(15.7,2)})

sns.barplot(y=df_out.sort_values('ACC', ascending=True)['ACC'], x=df_out.sort_values('ACC', ascending=True).index)
plt.xticks(rotation=90, fontsize=8)
plt.title('Distribution of ACC avg 3 seed')

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.histplot(df_out['ROC'])
plt.title("Score distribution")

# test the scaling's impace on the results

In [None]:
Y_val = np.load('/media/data1/anolin/Y_val_v1.1.npy').astype(np.int64)
og_labels =  ['Sinusal','Regular','Monomorph','QS complex in V1-V2-V3','R complex in V5-V6','T wave inversion (inferior - II, III, aVF)','Left bundle branch block','RaVL > 11 mm','SV1 + RV5 or RV6 > 35 mm','T wave inversion (lateral -I, aVL, V5-V6)','T wave inversion (anterior - V3-V4)','Left axis deviation','Left ventricular hypertrophy','Bradycardia','Q wave (inferior - II, III, aVF)','Afib','Irregularly irregular','Atrial tachycardia (>= 100 BPM)','Nonspecific intraventricular conduction delay','Premature ventricular complex','Polymorph','T wave inversion (septal- V1-V2)','Right bundle branch block','Ventricular paced','ST elevation (anterior - V3-V4)','ST elevation (septal - V1-V2)','1st degree AV block','Premature atrial complex','Atrial flutter',"rSR' in V1-V2",'qRS in V5-V6-I, aVL','Left anterior fascicular block','Right axis deviation','2nd degree AV block - mobitz 1','ST depression (inferior - II, III, aVF)','Acute pericarditis','ST elevation (inferior - II, III, aVF)','Low voltage','Regularly irregular','Bifid','Junctional rhythm','Left atrial enlargement','ST elevation (lateral - I, aVL, V5-V6)','Atrial paced','Right ventricular hypertrophy','Delta wave','Wolff-Parkinson-White (Pre-excitation syndrome)','Prolonged QT','ST depression (anterior - V3-V4)','QRS complex negative in III','RaVL + SV3 > 28 mm (H) or 20 mm (F)','Q wave (lateral- I, aVL, V5-V6)','Hyperacute T wave (lateral, V5-V6)','Hyperacute T wave (septal, V1-V2)','Supraventricular tachycardia','ST downslopping','ST depression (lateral - I, avL, V5-V6)','2nd degree AV block - mobitz 2','U wave','ST depression et T inversion in V5 or V6','Large >0.08 s','R/S ratio in V1-V2 >1','RV1 + SV6\xa0> 11 mm','Left posterior fascicular block','Right atrial enlargement','ST depression (septal- V1-V2)','Q wave (septal- V1-V2)','Q wave (anterior - V3-V4)','Hyperacute T wave (anterior, V3-V4)','ST upslopping','Right superior axis','Auricular bigeminy','Ventricular tachycardia','ST elevation (posterior - V7-V8-V9)','Ectopic atrial rhythm (< 100 BPM)','Lead misplacement','Biphasic','Ventricular bigeminy','J wave','Tall >2.5 mm','Third Degree AV Block','Sinus Pause','Acute MI','Early repolarization','Q wave (posterior - V7-V9)','Bi-atrial enlargement','LV pacing','Dextrocardia','Brugada','Ventricular Rhythm','ST depression (posterior - V7-V8-V9)','no_qrs']
to_remove_labels = ['ST depression (posterior - V7-V8-V9)','Tall >2.5 mm', 'J wave', 'Auricular bigeminy', 'Ventricular bigeminy', 'Sinus Pause', 'Dextrocardia', 'Hyperacute T wave (lateral, V5-V6)', 'Hyperacute T wave (septal, V1-V2)', 'Hyperacute T wave (anterior, V3-V4)', 'Bifid', 'RaVL + SV3 > 28 mm (H) or 20 mm (F)', 'Large >0.08 s', 'Biphasic', 'ST depression et T inversion in V5 or V6']

Y_val = np.load('/media/data1/anolin/Y_val_v1.1.npy').astype(np.int64)

pos_to_drop = list()
new_label_names = list()
for pos, item in enumerate(og_labels):
    if item in to_remove_labels:
        pos_to_drop.append(pos)
    else:
        new_label_names.append(item)
Y_val = np.delete(Y_val, pos_to_drop, axis=1)
Y_val

In [None]:
Y_pred = np.load('/volume/my_name/resnet50_robustscaler_leads_1997_v1.1/output_1.npy')

def sigmoid(x):
  return 1 / (1 + math.exp(-x))

sigmoid_v = np.vectorize(sigmoid)
Y_pred = sigmoid_v(Y_pred)
sigmoid_v_bin =  np.where(Y_pred > 0.5, 1, 0)
sigmoid_v_bin

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import average_precision_score, precision_recall_curve
from sklearn.metrics import roc_auc_score
import scikit_posthocs as sp
from statistics import mean

In [None]:
def hamming_score(y_true: np.ndarray, y_pred: np.ndarray):
    numerator = (y_true & y_pred).sum(axis=1)
    denominator = (y_true | y_pred).sum(axis=1)

    return np.divide(numerator, denominator, out=np.ones_like(numerator, dtype=np.float_),
                        where=denominator != 0).mean()

In [None]:
list_condition = list()
list_inter = list()
list_score = list()

metric = 'avg_acc' #['cat_accuracy', 'hamming', 'avg_acc', 'roc_macro', 'roc_micro', 'pr_macro', 'pr_micro']

Y_val = np.load('/media/data1/anolin/Y_val_v1.1.npy').astype(np.int64)
og_labels =  ['Sinusal','Regular','Monomorph','QS complex in V1-V2-V3','R complex in V5-V6','T wave inversion (inferior - II, III, aVF)','Left bundle branch block','RaVL > 11 mm','SV1 + RV5 or RV6 > 35 mm','T wave inversion (lateral -I, aVL, V5-V6)','T wave inversion (anterior - V3-V4)','Left axis deviation','Left ventricular hypertrophy','Bradycardia','Q wave (inferior - II, III, aVF)','Afib','Irregularly irregular','Atrial tachycardia (>= 100 BPM)','Nonspecific intraventricular conduction delay','Premature ventricular complex','Polymorph','T wave inversion (septal- V1-V2)','Right bundle branch block','Ventricular paced','ST elevation (anterior - V3-V4)','ST elevation (septal - V1-V2)','1st degree AV block','Premature atrial complex','Atrial flutter',"rSR' in V1-V2",'qRS in V5-V6-I, aVL','Left anterior fascicular block','Right axis deviation','2nd degree AV block - mobitz 1','ST depression (inferior - II, III, aVF)','Acute pericarditis','ST elevation (inferior - II, III, aVF)','Low voltage','Regularly irregular','Bifid','Junctional rhythm','Left atrial enlargement','ST elevation (lateral - I, aVL, V5-V6)','Atrial paced','Right ventricular hypertrophy','Delta wave','Wolff-Parkinson-White (Pre-excitation syndrome)','Prolonged QT','ST depression (anterior - V3-V4)','QRS complex negative in III','RaVL + SV3 > 28 mm (H) or 20 mm (F)','Q wave (lateral- I, aVL, V5-V6)','Hyperacute T wave (lateral, V5-V6)','Hyperacute T wave (septal, V1-V2)','Supraventricular tachycardia','ST downslopping','ST depression (lateral - I, avL, V5-V6)','2nd degree AV block - mobitz 2','U wave','ST depression et T inversion in V5 or V6','Large >0.08 s','R/S ratio in V1-V2 >1','RV1 + SV6\xa0> 11 mm','Left posterior fascicular block','Right atrial enlargement','ST depression (septal- V1-V2)','Q wave (septal- V1-V2)','Q wave (anterior - V3-V4)','Hyperacute T wave (anterior, V3-V4)','ST upslopping','Right superior axis','Auricular bigeminy','Ventricular tachycardia','ST elevation (posterior - V7-V8-V9)','Ectopic atrial rhythm (< 100 BPM)','Lead misplacement','Biphasic','Ventricular bigeminy','J wave','Tall >2.5 mm','Third Degree AV Block','Sinus Pause','Acute MI','Early repolarization','Q wave (posterior - V7-V9)','Bi-atrial enlargement','LV pacing','Dextrocardia','Brugada','Ventricular Rhythm','ST depression (posterior - V7-V8-V9)','no_qrs']
to_remove_labels = ['ST depression (posterior - V7-V8-V9)','Tall >2.5 mm', 'J wave', 'Auricular bigeminy', 'Ventricular bigeminy', 'Sinus Pause', 'Dextrocardia', 'Hyperacute T wave (lateral, V5-V6)', 'Hyperacute T wave (septal, V1-V2)', 'Hyperacute T wave (anterior, V3-V4)', 'Bifid', 'RaVL + SV3 > 28 mm (H) or 20 mm (F)', 'Large >0.08 s', 'Biphasic', 'ST depression et T inversion in V5 or V6']

Y_val = np.load('/media/data1/anolin/Y_val_v1.1.npy').astype(np.int64)

pos_to_drop = list()
new_label_names = list()
for pos, item in enumerate(og_labels):
    if item in to_remove_labels:
        pos_to_drop.append(pos)
    else:
        new_label_names.append(item)
Y_val = np.delete(Y_val, pos_to_drop, axis=1)

dict_eq = {42:0,1997:1,2023:2}

for condition in ['notscaled','standardscaler','minmaxscaler','maxabsscaler','robustscaler','quantiletransformeruniform','quantiletransformernormal']:
    for approach in [None,'leads']:
        if approach == 'leads' and condition == 'notscaled':
            continue
        for seed in [42,1997,2023]:
            if approach == None:
                output_matrix = np.load(f'/volume/my_name/resnet50_{condition}_{seed}_v1.1/output_1.npy')
            else:
                if condition != 'notscaled':
                    output_matrix = np.load(f'/volume/my_name/resnet50_{condition}_leads_{seed}_v1.1/output_1.npy')

            def sigmoid(x):
                return 1 / (1 + math.exp(-x))

            sigmoid_v = np.vectorize(sigmoid)
            Y_pred = sigmoid_v(output_matrix)
            sigmoid_v_bin =  np.where(Y_pred > 0.5, 1, 0)
            sigmoid_v_bin


            if approach != None:
                name = f'{condition}_{approach}'

            else:
                name = condition

            if metric == 'cat_accuracy':
                list_condition.append(name)
                list_inter.append(dict_eq[seed])
                list_score.append(accuracy_score(Y_val, sigmoid_v_bin))

            if metric == 'hamming':
                list_condition.append(name)
                list_inter.append(dict_eq[seed])
                list_score.append(accuracy_score(Y_val, sigmoid_v_bin))
            
            if metric == 'avg_acc':
                list_condition.append(name)
                list_inter.append(dict_eq[seed])
                list_score.append(mean([accuracy_score(Y_val[:,i], sigmoid_v_bin[:,i]) for i in range(Y_val.shape[1])]))

            if metric == 'roc_macro':
                list_condition.append(name)
                list_inter.append(dict_eq[seed])
                list_score.append(roc_auc_score(Y_val, sigmoid_v_bin, average='macro'))            
                                  
            if metric == 'roc_micro':
                list_condition.append(name)
                list_inter.append(dict_eq[seed])
                list_score.append(roc_auc_score(Y_val, sigmoid_v_bin, average='micro'))          


            if metric == 'pr_macro':
                list_condition.append(name)
                list_inter.append(dict_eq[seed])
                list_score.append(average_precision_score(Y_val, sigmoid_v_bin, average='macro'))          

            if metric == 'pr_micro':
                list_condition.append(name)
                list_inter.append(dict_eq[seed])
                list_score.append(average_precision_score(Y_val, sigmoid_v_bin, average='micro'))     


df_ = pd.DataFrame(zip(list_condition,list_inter,list_score), columns=['method','fold','score'])
avg_rank = df_.groupby('fold').score.rank(pct=True).groupby(df_.method).mean()
test_results = sp.posthoc_conover_friedman(
    df_,
    melted=True,
    block_col='fold',
    group_col='method',
    y_col='score',
)
#sp.sign_plot(test_results)
#plt.title("Conover test PR Macro")
sp.critical_difference_diagram(avg_rank, test_results)
plt.title("CDD for Acc")

# effect of filter filter

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
import heartpy as hp
import gc


In [None]:
# generate the resutls
from sklearn.metrics import accuracy_score
from sklearn.metrics import average_precision_score, precision_recall_curve
from sklearn.metrics import roc_auc_score
import scikit_posthocs as sp

list_condition = list()
list_inter = list()
list_score = list()

metric = 'pr_micro' #['cat_accuracy', 'hamming', 'avg_acc', 'roc_macro', 'roc_micro', 'pr_macro', 'pr_micro']

Y_val = np.load('/media/data1/anolin/Y_val_v1.1.npy').astype(np.int64)
og_labels =  ['Sinusal','Regular','Monomorph','QS complex in V1-V2-V3','R complex in V5-V6','T wave inversion (inferior - II, III, aVF)','Left bundle branch block','RaVL > 11 mm','SV1 + RV5 or RV6 > 35 mm','T wave inversion (lateral -I, aVL, V5-V6)','T wave inversion (anterior - V3-V4)','Left axis deviation','Left ventricular hypertrophy','Bradycardia','Q wave (inferior - II, III, aVF)','Afib','Irregularly irregular','Atrial tachycardia (>= 100 BPM)','Nonspecific intraventricular conduction delay','Premature ventricular complex','Polymorph','T wave inversion (septal- V1-V2)','Right bundle branch block','Ventricular paced','ST elevation (anterior - V3-V4)','ST elevation (septal - V1-V2)','1st degree AV block','Premature atrial complex','Atrial flutter',"rSR' in V1-V2",'qRS in V5-V6-I, aVL','Left anterior fascicular block','Right axis deviation','2nd degree AV block - mobitz 1','ST depression (inferior - II, III, aVF)','Acute pericarditis','ST elevation (inferior - II, III, aVF)','Low voltage','Regularly irregular','Bifid','Junctional rhythm','Left atrial enlargement','ST elevation (lateral - I, aVL, V5-V6)','Atrial paced','Right ventricular hypertrophy','Delta wave','Wolff-Parkinson-White (Pre-excitation syndrome)','Prolonged QT','ST depression (anterior - V3-V4)','QRS complex negative in III','RaVL + SV3 > 28 mm (H) or 20 mm (F)','Q wave (lateral- I, aVL, V5-V6)','Hyperacute T wave (lateral, V5-V6)','Hyperacute T wave (septal, V1-V2)','Supraventricular tachycardia','ST downslopping','ST depression (lateral - I, avL, V5-V6)','2nd degree AV block - mobitz 2','U wave','ST depression et T inversion in V5 or V6','Large >0.08 s','R/S ratio in V1-V2 >1','RV1 + SV6\xa0> 11 mm','Left posterior fascicular block','Right atrial enlargement','ST depression (septal- V1-V2)','Q wave (septal- V1-V2)','Q wave (anterior - V3-V4)','Hyperacute T wave (anterior, V3-V4)','ST upslopping','Right superior axis','Auricular bigeminy','Ventricular tachycardia','ST elevation (posterior - V7-V8-V9)','Ectopic atrial rhythm (< 100 BPM)','Lead misplacement','Biphasic','Ventricular bigeminy','J wave','Tall >2.5 mm','Third Degree AV Block','Sinus Pause','Acute MI','Early repolarization','Q wave (posterior - V7-V9)','Bi-atrial enlargement','LV pacing','Dextrocardia','Brugada','Ventricular Rhythm','ST depression (posterior - V7-V8-V9)','no_qrs']
to_remove_labels = ['ST depression (posterior - V7-V8-V9)','Tall >2.5 mm', 'J wave', 'Auricular bigeminy', 'Ventricular bigeminy', 'Sinus Pause', 'Dextrocardia', 'Hyperacute T wave (lateral, V5-V6)', 'Hyperacute T wave (septal, V1-V2)', 'Hyperacute T wave (anterior, V3-V4)', 'Bifid', 'RaVL + SV3 > 28 mm (H) or 20 mm (F)', 'Large >0.08 s', 'Biphasic', 'ST depression et T inversion in V5 or V6']

Y_val = np.load('/media/data1/anolin/Y_val_v1.1.npy').astype(np.int64)

pos_to_drop = list()
new_label_names = list()
for pos, item in enumerate(og_labels):
    if item in to_remove_labels:
        pos_to_drop.append(pos)
    else:
        new_label_names.append(item)
Y_val = np.delete(Y_val, pos_to_drop, axis=1)

dict_eq = {42:0,1997:1,2023:2}

for low_cut in [1, 0.1, 0.01, -1]: 
    for high_cut in [100, 75, 50, -1]:
        if low_cut == high_cut == -1:
            continue

        for seed in [42, 1997, 2023]:

            output_matrix = np.load(f"/volume/my_name/resnet50_filtered_{low_cut}_{high_cut}_{seed}_v1.1/output_1.npy")


            sigmoid_v = np.vectorize(sigmoid)
            Y_pred = sigmoid_v(output_matrix)
            sigmoid_v_bin =  np.where(Y_pred > 0.5, 1, 0)
            sigmoid_v_bin

            if low_cut != -1 and high_cut != -1:
                name_condition = f'BP_{low_cut}_{high_cut}Hz'

            elif low_cut == -1 and high_cut != -1:
                name_condition = f'LP_{high_cut}Hz'

            elif low_cut != -1 and high_cut == -1:
                name_condition = f'HP_{low_cut}Hz'

            else:
                pass


            if metric == 'cat_accuracy':
                list_condition.append(name_condition)
                list_inter.append(dict_eq[seed])
                list_score.append(accuracy_score(Y_val, sigmoid_v_bin))

            if metric == 'hamming':
                list_condition.append(name_condition)
                list_inter.append(dict_eq[seed])
                list_score.append(accuracy_score(Y_val, sigmoid_v_bin))
            
            if metric == 'avg_acc':
                list_condition.append(name_condition)
                list_inter.append(dict_eq[seed])
                list_score.append(mean([accuracy_score(Y_val[:,i], sigmoid_v_bin[:,i]) for i in range(Y_val.shape[1])]))

            if metric == 'roc_macro':
                list_condition.append(name_condition)
                list_inter.append(dict_eq[seed])
                list_score.append(roc_auc_score(Y_val, sigmoid_v_bin, average='macro'))            
                                    
            if metric == 'roc_micro':
                list_condition.append(name_condition)
                list_inter.append(dict_eq[seed])
                list_score.append(roc_auc_score(Y_val, sigmoid_v_bin, average='micro'))          


            if metric == 'pr_macro':
                list_condition.append(name_condition)
                list_inter.append(dict_eq[seed])
                list_score.append(average_precision_score(Y_val, sigmoid_v_bin, average='macro'))          

            if metric == 'pr_micro':
                list_condition.append(name_condition)
                list_inter.append(dict_eq[seed])
                list_score.append(average_precision_score(Y_val, sigmoid_v_bin, average='micro'))     
    
df_ = pd.DataFrame(zip(list_condition,list_inter,list_score), columns=['method','fold','score'])
df_ = pd.concat([df_, df_first[df_first.method == 'notscaled']])
avg_rank = df_.groupby('fold').score.rank(pct=True).groupby(df_.method).mean()
test_results = sp.posthoc_conover_friedman(
    df_,
    melted=True,
    block_col='fold',
    group_col='method',
    y_col='score',
)
#sp.sign_plot(test_results)
#plt.title("Conover test PR Macro")
sp.critical_difference_diagram(avg_rank, test_results)
plt.title("CDD for PR Micro")

In [None]:
avg_rank = df_.groupby('fold').score.rank(pct=True).groupby(df_.method).mean()
test_results = sp.posthoc_conover_friedman(
    df_,
    melted=True,
    block_col='fold',
    group_col='method',
    y_col='score',
)
#sp.sign_plot(test_results)
#plt.title("Conover test PR Macro")
sp.critical_difference_diagram(avg_rank, test_results)
plt.title("CDD for Acc")

In [None]:
#generate the datasets
list_condition = list()
list_inter = list()
list_score = list()



for file in tqdm(['X_val'], desc='levels'):

    X_val = np.load(f'/media/data1/anolin/{file}_v1.1.npy').astype(np.float16)

    for low_cut in [1, 0.1, 0.01, -1]: 
        for high_cut in [100, 75, 50, -1]:

            print(f'low_cut: {low_cut}Hz')
            print(f'high_cut: {high_cut}Hz')

            if low_cut == -1 and high_cut == -1:
                continue

            # Bandpass filter parameters
            lowcut = low_cut  # Define your own lowcut frequency
            highcut = high_cut  # Define your own highcut frequency
            fs = 250  # Sampling frequency
            order = 2  # Filter order
            N = X_val.shape[0] # Replace with your actual N

            # Create bandpass filter coefficients
            if lowcut == -1:
                b, a = butter(order, highcut, btype='low', fs=fs)

            elif highcut == -1:
                b, a = butter(order, lowcut, btype='high', fs=fs)

            else:
                b, a = butter(order, [lowcut,highcut], btype='bandpass', fs=fs)

            def apply_filter(data_slice):
                """Applies the bandpass filter to a slice of the data."""
                filtered_slice = np.empty_like(data_slice).astype(np.float16)
                for i in range(data_slice.shape[0]):
                    for j in range(data_slice.shape[-1]):
                        filtered_slice[i, :, j] = filtfilt(b, a, data_slice[i, :, j])
                return filtered_slice

            # Divide data into chunks for parallel processing
            num_processes = cpu_count()
            chunk_size = N // num_processes
            chunks = [X_val[i:i + chunk_size].astype(np.float16) for i in range(0, N, chunk_size)]

            # Perform parallel processing with progress tracking
            with Pool(num_processes) as pool:
                results = list(tqdm(pool.imap(apply_filter, chunks), total=len(chunks)))

            # Reassemble the results
            filtered_data = np.concatenate(results, axis=0).astype(np.float16)

            np.save(f'/media/data1/anolin/{file}_filtered_{low_cut}_{high_cut}_v1.1.npy', filtered_data.astype(np.float16))
            gc.collect()
                        

In [None]:
import numpy as np
from scipy.signal import butter, filtfilt

# Bandpass filter parameters
lowcut = 0.01  # Define your own lowcut frequency
highcut = 100  # Define your own highcut frequency
fs = 250  # Sampling frequency
order = 2  # Filter order
N = X_val.shape[0] # Replace with your actual N

# Create bandpass filter coefficients
b, a = butter(order, lowcut, btype='highpass', fs=fs)

def apply_filter(data_slice):
    """Applies the bandpass filter to a slice of the data."""
    filtered_slice = np.empty_like(data_slice).astype(np.float16)
    for i in range(data_slice.shape[0]):
        for j in range(data_slice.shape[-1]):
            filtered_slice[i, :, j] = filtfilt(b, a, data_slice[i, :, j])
    return filtered_slice

# Divide data into chunks for parallel processing
num_processes = cpu_count()
chunk_size = N // num_processes
chunks = [X_val[i:i + chunk_size].astype(np.float16) for i in range(0, N, chunk_size)]

# Perform parallel processing with progress tracking
with Pool(num_processes) as pool:
    results = list(tqdm(pool.imap(apply_filter, chunks), total=len(chunks)))

# Reassemble the results
filtered_data = np.concatenate(results, axis=0)