In [25]:
from utils import utils
import numpy as np

sampling_frequency=100
datafolder='../data/ptbxl/'

task='normabnorm'
input_channels = 1 # number of leads
experiment = 'custom_exp'
modelname = f'fastai_xresnet1d101{"_"+str(input_channels)+"lead" if input_channels!=12 else ""}'

outputfolder='../output/'

# Load PTB-XL data
data, raw_labels = utils.load_dataset(datafolder, sampling_frequency)
# Preprocess label data
scp = raw_labels.scp_codes
counts = {}

for threshold in range(5, 105, 5):
    indices = [i for i, d in enumerate(scp) if 'NORM' in d and d['NORM'] == threshold]
    if len(indices) == 0:
        continue
    mean_other_scp_likelihoods = sum([sum([v for k,v in d.items() if k!="NORM"]) for i, d in enumerate(scp.iloc[indices])])/len(indices)

    meta = {"num": len(indices), "mean_other_scp_likelihoods": mean_other_scp_likelihoods}
    counts[threshold] = meta

print({k: v for k, v in counts.items() if v["num"] > 0})

print(raw_labels["scp_codes"].head(10))
labels = utils.compute_label_aggregations(raw_labels, datafolder,task)

if task == "all":
    print(labels["all_scp"].head(2))
    
elif task=="normabnorm":
    print(labels["normdiagnostic"].head(10))


# Select relevant data and convert to one-hot
data, labels, Y, mlb = utils.select_data(data, labels, task, min_samples=0, outputfolder=outputfolder)
for i in range(10):
    print(Y[i])


# Max input_channels is 12
# Extract the first input_channels leads from each sample in data
data = np.array([d[:,:input_channels] for d in data])

# 1-9 for training 
X_train = data[labels.strat_fold < 10]
y_train = Y[labels.strat_fold < 10]
# 10 for validation
X_val = data[labels.strat_fold == 10]
y_val = Y[labels.strat_fold == 10]

num_classes = 1 

input_shape = [1000, input_channels] # <=== shape of samples, [None, 12] in case of different lengths

X_train.shape, y_train.shape, X_val.shape, y_val.shape

{15: {'num': 38, 'mean_other_scp_likelihoods': 34.21052631578947}, 35: {'num': 37, 'mean_other_scp_likelihoods': 21.62162162162162}, 50: {'num': 505, 'mean_other_scp_likelihoods': 35.95049504950495}, 80: {'num': 1761, 'mean_other_scp_likelihoods': 10.724020442930152}, 100: {'num': 7172, 'mean_other_scp_likelihoods': 2.1730340211935304}}
ecg_id
1     {'NORM': 100.0, 'LVOLT': 0.0, 'SR': 0.0}
2                 {'NORM': 80.0, 'SBRAD': 0.0}
3                   {'NORM': 100.0, 'SR': 0.0}
4                   {'NORM': 100.0, 'SR': 0.0}
5                   {'NORM': 100.0, 'SR': 0.0}
6                   {'NORM': 100.0, 'SR': 0.0}
7                   {'NORM': 100.0, 'SR': 0.0}
8       {'IMI': 35.0, 'ABQRS': 0.0, 'SR': 0.0}
9                   {'NORM': 100.0, 'SR': 0.0}
10                  {'NORM': 100.0, 'SR': 0.0}
Name: scp_codes, dtype: object
ecg_id
1     [NORM]
2     [NORM]
3     [NORM]
4     [NORM]
5     [NORM]
6     [NORM]
7     [NORM]
8         []
9     [NORM]
10    [NORM]
Name: normdiagno

((19601, 1000, 1), (19601, 1), (2198, 1000, 1), (2198, 1))

# Load pretrained model

For loading a pretrained model:
   1. specify `modelname` which can be seen in `code/configs/` (e.g. `modelname='fastai_xresnet1d101'`)
   2. provide `experiment` to build the path `pretrainedfolder` (here: `custom_exp` refers to the experiment that only extracts wether the sample is normal or abnormal from the SCP-statements)
   
This returns the pretrained model where the classification is replaced by a random initialized head with the same number of outputs as the number of classes.

In [26]:
from models.fastai_model import fastai_model

pretrainedfolder = '../output/'+experiment+'/models/'+modelname+'/'
mpath = '../output/'+experiment+'/models/'+modelname+'/' # <=== path where the finetuned model will be stored
n_classes_pretrained = num_classes # <=== because we load the model from exp0, this should be fixed because this depends the experiment

model = fastai_model(
    modelname, 
    num_classes, 
    sampling_frequency, 
    mpath, 
    input_shape=input_shape, 
    input_channels=input_channels,
    pretrainedfolder=pretrainedfolder,
    n_classes_pretrained=n_classes_pretrained, 
    pretrained=True,
    epochs_finetuning=2,
)

model.input_channels

../output/custom_exp/models/fastai_xresnet1d101_1lead/


1

# Preprocess data with pretrained Standardizer

Since we standardize inputs to zero mean and unit variance, your custom data needs to be standardized with the respective mean and variance. This is also provided in the respective experiment folder `output/expX/data/standard_scaler.pkl`

In [27]:
import pickle

standard_scaler = pickle.load(open('../output/'+experiment+'/data/standard_scaler.pkl', "rb"))

X_train = utils.apply_standardizer(X_train, standard_scaler)
X_val = utils.apply_standardizer(X_val, standard_scaler)

# Save model to ONNX

In [28]:
X_train.shape

(19601, 1000, 1)

In [29]:
X_test = X_val[:]
y_test = y_val[:]
X_test.shape

(2198, 1000, 1)

In [30]:
onnx_file_path = mpath + 'models/model.onnx'
model.to_onnx(X_test, onnx_file_path)

In channels: 1, shape: [1000, 1]
model: fastai_xresnet1d101_1lead
HEIHEI
1
fastai_xresnet1d101_1lead
../output/custom_exp/models/fastai_xresnet1d101_1lead/models/fastai_xresnet1d101_1lead.pth
Model exported to ../output/custom_exp/models/fastai_xresnet1d101_1lead/models/model.onnx


# Evaluate model on validation data

In [31]:
# this function requires ONLY numpy and onnxruntime
from utils.onnx import onnx_predict
y_test_pred_onnx = onnx_predict(X_test, onnx_file_path)

Shape of chunks: (15386, 250, 1)
SHape before transpose: (15386, 250, 1)
SHape after transpose: (15386, 1, 250)
Shape of predictions before: (15386, 1)
Shape of predictions after sigmoid: (15386, 1)
aggregating predictions...


In [32]:
# y_test_pred = model.predict(X_test)

In [33]:
# find where y_test_pred_onnx is nan
nan_indices = np.argwhere(np.isnan(y_test_pred_onnx))
nan_indices

array([], shape=(0, 2), dtype=int64)

In [34]:
# set nan to 0
y_test_pred_onnx[nan_indices] = 0

In [35]:
# optimal_thresholds = utils.find_optimal_cutoff_thresholds(y_test, y_test_pred_onnx)
# optimal_thresholds

In [36]:
optimal_threshold = 0.75

for i in range(10):
    idxs = [0]#np.where(y_test[i] > optimal_threshold)[0]
    nan_indices = np.argwhere(np.isnan(y_test_pred_onnx[i]))
    print(f"i={i}, idxs={idxs}, \tactual={y_test[i][idxs]},  onnx= {y_test_pred_onnx[i][idxs]}")#\ttest_pred = {y_test_pred[i][idxs]}")

optimal_thresholds = [optimal_threshold] * num_classes
y_test_pred_onnx_binary = utils.apply_thresholds(y_test_pred_onnx, optimal_thresholds)

i=0, idxs=[0], 	actual=[1],  onnx= [0.889828]
i=1, idxs=[0], 	actual=[1],  onnx= [0.893311]
i=2, idxs=[0], 	actual=[1],  onnx= [0.461641]
i=3, idxs=[0], 	actual=[1],  onnx= [0.954705]
i=4, idxs=[0], 	actual=[1],  onnx= [0.892052]
i=5, idxs=[0], 	actual=[0],  onnx= [0.847784]
i=6, idxs=[0], 	actual=[0],  onnx= [0.944056]
i=7, idxs=[0], 	actual=[1],  onnx= [0.914132]
i=8, idxs=[0], 	actual=[1],  onnx= [0.865871]
i=9, idxs=[0], 	actual=[1],  onnx= [0.89439]


In [37]:
actual_classes = mlb.inverse_transform(y_test)

# y_test_pred_binary = utils.apply_thresholds(y_test_pred, optimal_thresholds)
# predicted_classes = mlb.inverse_transform(y_test_pred_binary)

predicted_classes_onnx = mlb.inverse_transform(y_test_pred_onnx_binary)
predicted_classes_onnx[:10]

[('NORM',),
 ('NORM',),
 (),
 ('NORM',),
 ('NORM',),
 ('NORM',),
 ('NORM',),
 ('NORM',),
 ('NORM',),
 ('NORM',)]

In [38]:
onnx_is_norm = ['NORM' in str(a) for a in predicted_classes_onnx]
# pred_is_norm = ['NORM' in str(a) for a in predicted_classes]
actual_is_norm = ['NORM' in str(a) for a in actual_classes]
print(f"% of NORM in actual_classes: {sum(actual_is_norm)/len(actual_is_norm)}")
print(f"% of NORM in onnx predicted classes: {sum(onnx_is_norm)/len(onnx_is_norm)}")
# print(f"% of NORM in predicted classes: {sum(pred_is_norm)/len(pred_is_norm)}")

% of NORM in actual_classes: 0.4381255686988171
% of NORM in onnx predicted classes: 0.41264786169244766


In [39]:
def calculate_norm_accuracy(predicted_is_norm, actual_is_norm):
    correct_predictions = [1 if a==b else 0 for a, b in zip(predicted_is_norm, actual_is_norm)]
    accuracy = sum(correct_predictions) / len(correct_predictions)
    return accuracy
onnx_accuracy_norm = calculate_norm_accuracy(onnx_is_norm, actual_is_norm)
# pred_accuracy_norm = calculate_norm_accuracy(pred_is_norm, actual_is_norm)
onnx_accuracy_norm#, pred_accuracy_norm

0.7952684258416742

In [40]:
def calculate_norm_specificity_and_sensitivity(predicted_is_norm, actual_is_norm):
    true_positives = sum([1 if a and b else 0 for a, b in zip(predicted_is_norm, actual_is_norm)])
    true_negatives = sum([1 if not a and not b else 0 for a, b in zip(predicted_is_norm, actual_is_norm)])
    false_positives = sum([1 if a and not b else 0 for a, b in zip(predicted_is_norm, actual_is_norm)])
    false_negatives = sum([1 if not a and b else 0 for a, b in zip(predicted_is_norm, actual_is_norm)])
    sensitivity = true_positives / (true_positives + false_negatives)
    specificity = true_negatives / (true_negatives + false_positives)
    return sensitivity, specificity

def calculate_f1_score(predicted_is_norm, actual_is_norm):
    sensitivity, specificity = calculate_norm_specificity_and_sensitivity(predicted_is_norm, actual_is_norm)
    f1_score = 2 * (sensitivity * specificity) / (sensitivity + specificity)
    return f1_score

In [41]:
specificity, sensitivity = calculate_norm_specificity_and_sensitivity(onnx_is_norm, actual_is_norm)
f1_score = calculate_f1_score(onnx_is_norm, actual_is_norm)

print(f"Accuracy: {onnx_accuracy_norm}")
f"Specificity: {specificity}, Sensitivity: {sensitivity}, F1-Score: {f1_score}"

Accuracy: 0.7952684258416742


'Specificity: 0.7372793354101765, Sensitivity: 0.8404858299595142, F1-Score: 0.7855070548335042'