In [None]:
from Scripts.essentials import *

epochs = 300
batch_size = 256
lr = 0.00005

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience = 4,
    verbose=0,
    mode="auto",
    baseline=None,
    restore_best_weights=True
)

# Simple example of how applying preprocessing to data can help with the spectrum effect

In [None]:
from sklearn import metrics

plt.rcParams.update({'font.size': 40})
plt.rcParams["font.family"] = "Times New Roman"
#fig, ax = plt.subplots(4, 2, figsize = (20, 20))
p = "Data/"
train_x = np.load(p + "train_x.npy")
val_x = np.load(p + "val_x.npy")

train_y = np.load(p + "train_y.npy")
val_y = np.load(p + "val_y.npy")

HF_1887 = train_x[np.argmax(train_y, axis = 1) == 10] # Get sample spectra
d = HF_1887
min_ = np.min(d, axis = 0)
max_ = np.max(d, axis = 0)
sd = np.std(d, axis = 0)
mean = np.mean(d, axis = 0)

plt.fill_between(np.arange(1738), mean - sd, mean + sd, alpha = 1, color = "Red")
plt.plot(mean, linestyle = "--", color = "black")
plt.savefig("Images/(MANUAL)SolveSpectrumEffect_1887.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

# Get the other sample
HF_2070 = train_x[np.argmax(train_y, axis = 1) == 17]
d = HF_2070
min_ = np.min(d, axis = 0)
max_ = np.max(d, axis = 0)
sd = np.std(d, axis = 0)
mean = np.mean(d, axis = 0)

plt.fill_between(np.arange(1738), mean - sd, mean + sd, alpha = 1, color = "Red")
plt.plot(mean, linestyle = "--", color = "black")
plt.savefig("Images/(MANUAL)SolveSpectrumEffect_2070.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

# Make a model and show how easy it is to learn the difference
reset_seed()
model = make_model(lr = lr, out_dim = 2, reg_param = 1e-7)
model.summary()


HF_1887_val = val_x[np.argmax(val_y, axis = 1) == 10]

HF_2070_val = val_x[np.argmax(val_y, axis = 1) == 17]


d = np.concatenate([HF_1887, HF_2070])
l = np.concatenate([np.zeros(len(HF_1887)), np.ones(len(HF_2070))])

d_val = np.concatenate([HF_1887_val, HF_2070_val])
l_val = np.concatenate([np.zeros(len(HF_1887_val)), np.ones(len(HF_2070_val))])

ix = np.arange(len(d))
np.random.shuffle(ix)

d = d[ix]
l = l[ix]

print(d.shape)
print("Sample ratio:", str(len(HF_1887)/(len(HF_2070))))

hist1 = model.fit(d, l, epochs = epochs, batch_size = batch_size, validation_data = [d_val, l_val])

plt.rcParams.update({'font.size': 30})
# Evaluate the model on validation data
preds = np.argmax(model.predict(d_val), axis = 1)
print("Balanced accuracy:", balanced_accuracy_score(l_val, preds))
fpr, tpr, thresholds = metrics.roc_curve(l_val, preds)
roc_auc = metrics.auc(fpr, tpr)
display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='')
display.plot()
plt.savefig("Images/(MANUAL)SolveSpectrumEffect_unprep_auroc.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

plt.rcParams.update({'font.size': 40})
# Plot training process
plt.plot(hist1.history["loss"], label = "Training")
plt.plot(hist1.history["val_loss"], label = "Validation")
plt.ylim([-0.05, 1.25])
plt.savefig("Images/(MANUAL)SolveSpectrumEffect_unprep_loss.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()




# Load the manually preprocessed data
train_x = np.load(p + "train_x_MANUAL.npy")
val_x = np.load(p + "val_x_MANUAL.npy")


HF_1887 = train_x[np.argmax(train_y, axis = 1) == 10]
d = HF_1887
min_ = np.min(d, axis = 0)
max_ = np.max(d, axis = 0)
sd = np.std(d, axis = 0)
mean = np.mean(d, axis = 0)

plt.fill_between(np.arange(1738), mean - sd, mean + sd, alpha = 1, color = "Red")
plt.plot(mean, linestyle = "--", color = "black")

plt.savefig("Images/(MANUAL)SolveSpectrumEffect_1887(prep).png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

HF_2070 = train_x[np.argmax(train_y, axis = 1) == 17]
d = HF_2070
min_ = np.min(d, axis = 0)
max_ = np.max(d, axis = 0)
sd = np.std(d, axis = 0)
mean = np.mean(d, axis = 0)

plt.fill_between(np.arange(1738), mean - sd, mean + sd, alpha = 1, color = "Red")
plt.plot(mean, linestyle = "--", color = "black")

plt.savefig("Images/(MANUAL)SolveSpectrumEffect_2070(prep).png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()
reset_seed()
model = make_model(lr = lr, out_dim = 2, reg_param = 1e-7)

HF_1887_val = val_x[np.argmax(val_y, axis = 1) == 10]

HF_2070_val = val_x[np.argmax(val_y, axis = 1) == 17]


d = np.concatenate([HF_1887, HF_2070])
l = np.concatenate([np.zeros(len(HF_1887)), np.ones(len(HF_2070))])

d_val = np.concatenate([HF_1887_val, HF_2070_val])
l_val = np.concatenate([np.zeros(len(HF_1887_val)), np.ones(len(HF_2070_val))])

ix = np.arange(len(d))
np.random.shuffle(ix)

d = d[ix]
l = l[ix]

hist2 = model.fit(d, l, epochs = epochs, batch_size = batch_size, validation_data = [d_val, l_val])

# Evaluate the model on validation data
preds = np.argmax(model.predict(d_val), axis = 1)
print("Balanced accuracy:", balanced_accuracy_score(l_val, preds))
fpr, tpr, thresholds = metrics.roc_curve(l_val, preds)

roc_auc = metrics.auc(fpr, tpr)
plt.rcParams.update({'font.size': 30})
display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='')
display.plot()

plt.savefig("Images/(MANUAL)SolveSpectrumEffect_prep_auroc.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

plt.rcParams.update({'font.size': 40})
plt.plot(hist2.history["loss"], label = "Training")
plt.plot(hist2.history["val_loss"], label = "Validation")
plt.ylim([-0.05, 1.25])
plt.savefig("Images/(MANUAL)SolveSpectrumEffect_prep_loss.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

In [None]:
# Load the labels with tumors joined according to the mother tumor
train_y = np.load(p + "train_y_46.npy")


# Compute the weights of each unique label relative to their frequencies
counts = np.bincount(np.argmax(train_y, axis = 1))
class_weights = np.sqrt((1/(counts/np.max(counts))))

cw = {}

for i in range(len(class_weights)):
    cw[i] = class_weights[i]
    print(i,":", cw[i], "(", counts[i], " spectra in training set)")


In [None]:
gc.collect()
p = "Data/"
train_x = np.load(p + "train_x.npy")
test_x = np.load(p + "test_x.npy")
val_x = np.load(p + "val_x.npy")

train_y = np.load(p + "train_y_46.npy")
test_y = np.load(p + "test_y_46.npy")
val_y = np.load(p + "val_y_46.npy")

train_lgm = np.load(p + "train_lgm.npy")
test_lgm = np.load(p + "test_lgm.npy")
val_lgm = np.load(p + "val_lgm.npy")

# Binary encoding from lgm to mutant vs. wildtype
train_lgm = np.argmax(train_lgm, axis = 1)
test_lgm = np.argmax(test_lgm, axis = 1)
val_lgm = np.argmax(val_lgm, axis = 1)

train_lgm = np.where(train_lgm > 2, 0, 1)
test_lgm = np.where(test_lgm > 2, 0, 1)
val_lgm = np.where(val_lgm > 2, 0, 1)

eye = np.eye(2)

train_lgm = eye[train_lgm]
val_lgm = eye[val_lgm]
test_lgm = eye[test_lgm]


np.random.seed(0)
ix = np.arange(len(train_x))
np.random.shuffle(ix)
train_x = train_x[ix]
train_y = train_y[ix]
train_lgm = train_lgm[ix]

# Train and save the model
reset_seed()
sample_model = make_split_model(lr, out_dims = [len(train_y[0]), len(train_lgm[0])])
sample_model.summary()
hist = sample_model.fit(train_x, [train_y, train_lgm], epochs = epochs,
                        batch_size = batch_size, validation_data = (val_x, [val_y, val_lgm]), callbacks = [early_stop])
sample_model.save_weights("Models\Raw_Bias_quantifier.h5")


# or save to csv: 
hist_csv_file = 'Raw_bias_history.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

# Show metrics on the datasets
print("ID balanced accuracy")
preds = np.argmax(sample_model.predict(train_x)[0], axis = 1)
print("Train:", np.round(balanced_accuracy_score(np.argmax(train_y, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(val_x)[0], axis = 1)
print("Validation:", np.round(balanced_accuracy_score(np.argmax(val_y, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(test_x)[0], axis = 1)
print("Test:", np.round(balanced_accuracy_score(np.argmax(test_y, axis = 1), preds), 2))

print("LGM balanced accuracy")
preds = np.argmax(sample_model.predict(train_x)[1], axis = 1)
print("Train:", np.round(balanced_accuracy_score(np.argmax(train_lgm, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(val_x)[1], axis = 1)
print("Validation:", np.round(balanced_accuracy_score(np.argmax(val_lgm, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(test_x)[1], axis = 1)
print("Test:", np.round(balanced_accuracy_score(np.argmax(test_lgm, axis = 1), preds), 2))
# Plot training process

# Loss
plt.rcParams.update({'font.size': 40})
plt.rcParams["font.family"] = "Times New Roman"
plt.plot(hist.history["loss"], label = "Training")
plt.plot(hist.history["val_loss"], label = "Validation")
plt.title("Loss")
plt.legend(fontsize = 30)
plt.savefig("Images/Histories/NOPREPBiasLoss.png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

# Accuracies
plt.plot(hist.history["t_id_out_accuracy"], label = "Training")
plt.plot(hist.history["val_t_id_out_accuracy"], label = "Validation")
plt.ylim([-0.1, 1.1])
plt.title("Accuracy")
plt.legend(fontsize = 30)
plt.savefig("Images/Histories/NOPREPBiasAcc.png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

# get performance on each sample
sample_dict = {}

num_patients = len(np.unique(np.argmax(train_y, axis = 0)))
for i in range(num_patients):

        # Get data from class i of the training set
    d = np.squeeze(train_x[np.argmax(train_y, axis = 1) == i])
    y = np.squeeze(train_y[np.argmax(train_y, axis = 1) == i])
    lgm = np.squeeze(train_lgm[np.argmax(train_y, axis = 1) == i])
    _, train_loss, _, train_acc, train_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
    train_size = int(len(d))

        # Get data from class i of the validation set, these spectra have not been seen before
    d = np.squeeze(val_x[np.argmax(val_y, axis = 1) == i])
    y = np.squeeze(val_y[np.argmax(val_y, axis = 1) == i])
    lgm = np.squeeze(val_lgm[np.argmax(val_y, axis = 1) == i])
    _, val_loss, _, val_acc, val_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
    val_size = int(len(d))
        
        # Get data from class i of the test set, these spectra have not been seen before
    d = np.squeeze(test_x[np.argmax(test_y, axis = 1) == i])
    y = np.squeeze(test_y[np.argmax(test_y, axis = 1) == i])
    lgm = np.squeeze(test_lgm[np.argmax(test_y, axis = 1) == i])
    _, test_loss, _, test_acc, test_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
    test_size = int(len(d))

    sample_dict[i] = np.array([train_acc, train_acc_lgm, train_size, val_acc, val_acc_lgm, val_size, test_acc, test_acc_lgm, test_size])

# Save the metrics in a dataframe
header = ["Sample ID", "Train Accuracy", "Train LGm Accuracy", "Train Size",
          "Validation Accuracy", "Validation LGm Accuracy", "Validation Size",
          "Test Accuracy", "Test LGm Accuracy", "Test Size"]

df = pd.DataFrame.from_dict(sample_dict, columns = header[1:],
                                        orient = "index")
# Round them to two decimals
df = df.round(decimals = 2)

# Cast the number of spectra to integer, looks nicer than the decimal form with .0 after each number
df["Train Size"] = df["Train Size"].apply(lambda x: int(x))
df["Validation Size"] = df["Validation Size"].apply(lambda x: int(x))
df["Test Size"] = df["Test Size"].apply(lambda x: int(x))

# Styling for convertion into latex format
df.style.set_table_styles([
    {'selector': 'toprule', 'props': ':hline;'},
    {'selector': 'midrule', 'props': ':hline;'},
    {'selector': 'bottomrule', 'props': ':hline;'},
], overwrite=False)

latex = df.style.format(decimal=',', thousands='.', precision=2).to_latex(clines="all;data",  column_format="|l|l|")

# Print the latex table, can be copied into the editor
print(latex)

# Save metrics
np.save("Results/RADAREffectOnBias/NOPREPtestaccuracy.npy", df["Test Accuracy"].values)
np.save("Results/RADAREffectOnBias/NOPREPallaccuracies.npy", df.values)

del train_x
del train_y
del val_x
del val_y
del test_x
del test_y
del sample_model
gc.collect()

In [None]:
# Redo experiment for the manually corrected data

gc.collect()
p = "Data/"
train_x = np.load(p + "train_x_MANUAL.npy")
test_x = np.load(p + "test_x_MANUAL.npy")
val_x = np.load(p + "val_x_MANUAL.npy")

train_y = np.load(p + "train_y_46.npy")
test_y = np.load(p + "test_y_46.npy")
val_y = np.load(p + "val_y_46.npy")

train_lgm = np.load(p + "train_lgm.npy")
test_lgm = np.load(p + "test_lgm.npy")
val_lgm = np.load(p + "val_lgm.npy")


# Binary encoding from lgm to mutant vs. wildtype
train_lgm = np.argmax(train_lgm, axis = 1)
test_lgm = np.argmax(test_lgm, axis = 1)
val_lgm = np.argmax(val_lgm, axis = 1)

train_lgm = np.where(train_lgm > 2, 0, 1)
test_lgm = np.where(test_lgm > 2, 0, 1)
val_lgm = np.where(val_lgm > 2, 0, 1)

eye = np.eye(2)

train_lgm = eye[train_lgm]
val_lgm = eye[val_lgm]
test_lgm = eye[test_lgm]


np.random.seed(0)
ix = np.arange(len(train_x))
np.random.shuffle(ix)
train_x = train_x[ix]
train_y = train_y[ix]
train_lgm = train_lgm[ix]

# Train and save the model
reset_seed()
sample_model = make_split_model(lr, out_dims = [len(train_y[0]), len(train_lgm[0])])
sample_model.summary()
hist = sample_model.fit(train_x, [train_y, train_lgm], epochs = epochs,
                        batch_size = batch_size, validation_data = (val_x, [val_y, val_lgm]), callbacks = [early_stop])
sample_model.save_weights("Models\MANUAL_Bias_quantifier.h5")


# or save to csv: 
hist_csv_file = 'MANUAL_bias_history.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

# Show metrics on the datasets
print("ID balanced accuracy")
preds = np.argmax(sample_model.predict(train_x)[0], axis = 1)
print("Train:", np.round(balanced_accuracy_score(np.argmax(train_y, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(val_x)[0], axis = 1)
print("Validation:", np.round(balanced_accuracy_score(np.argmax(val_y, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(test_x)[0], axis = 1)
print("Test:", np.round(balanced_accuracy_score(np.argmax(test_y, axis = 1), preds), 2))

print("LGM balanced accuracy")
preds = np.argmax(sample_model.predict(train_x)[1], axis = 1)
print("Train:", np.round(balanced_accuracy_score(np.argmax(train_lgm, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(val_x)[1], axis = 1)
print("Validation:", np.round(balanced_accuracy_score(np.argmax(val_lgm, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(test_x)[1], axis = 1)
print("Test:", np.round(balanced_accuracy_score(np.argmax(test_lgm, axis = 1), preds), 2))


# Plot training process

# Loss
plt.rcParams.update({'font.size': 40})
plt.rcParams["font.family"] = "Times New Roman"
plt.plot(hist.history["loss"], label = "Training")
plt.plot(hist.history["val_loss"], label = "Validation")
plt.title("Loss")
maxim = np.max(hist.history["val_loss"])

plt.legend(fontsize = 30)
plt.savefig("Images/Histories/MANUALBiasLoss.png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

# Accuracies
plt.plot(hist.history["t_id_out_accuracy"], label = "Training")
plt.plot(hist.history["val_t_id_out_accuracy"], label = "Validation")
plt.ylim([-0.1, 1.1])
plt.title("Accuracy")
plt.legend(fontsize = 30)
plt.savefig("Images/Histories/MANUALBiasAcc.png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

# get performance on each sample
sample_dict = {}

num_patients = len(np.unique(np.argmax(train_y, axis = 0)))
for i in range(num_patients):

        # Get data from class i of the training set
    d = np.squeeze(train_x[np.argmax(train_y, axis = 1) == i])
    y = np.squeeze(train_y[np.argmax(train_y, axis = 1) == i])
    lgm = np.squeeze(train_lgm[np.argmax(train_y, axis = 1) == i])
    _, train_loss, _, train_acc, train_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
    train_size = int(len(d))

        # Get data from class i of the validation set, these spectra have not been seen before
    d = np.squeeze(val_x[np.argmax(val_y, axis = 1) == i])
    y = np.squeeze(val_y[np.argmax(val_y, axis = 1) == i])
    lgm = np.squeeze(val_lgm[np.argmax(val_y, axis = 1) == i])
    _, val_loss, _, val_acc, val_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
    val_size = int(len(d))
        
        # Get data from class i of the test set, these spectra have not been seen before
    d = np.squeeze(test_x[np.argmax(test_y, axis = 1) == i])
    y = np.squeeze(test_y[np.argmax(test_y, axis = 1) == i])
    lgm = np.squeeze(test_lgm[np.argmax(test_y, axis = 1) == i])
    _, test_loss, _, test_acc, test_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
    test_size = int(len(d))

    sample_dict[i] = np.array([train_acc, train_acc_lgm, train_size, val_acc, val_acc_lgm, val_size, test_acc, test_acc_lgm, test_size])

# Save the metrics in a dataframe
header = ["Sample ID", "Train Accuracy", "Train LGm Accuracy", "Train Size",
          "Validation Accuracy", "Validation LGm Accuracy", "Validation Size",
          "Test Accuracy", "Test LGm Accuracy", "Test Size"]

df = pd.DataFrame.from_dict(sample_dict, columns = header[1:],
                                        orient = "index")
# Round them to two decimals
df = df.round(decimals = 2)

# Cast the number of spectra to integer, looks nicer than the decimal form with .0 after each number
df["Train Size"] = df["Train Size"].apply(lambda x: int(x))
df["Validation Size"] = df["Validation Size"].apply(lambda x: int(x))
df["Test Size"] = df["Test Size"].apply(lambda x: int(x))

# Styling for convertion into latex format
df.style.set_table_styles([
    {'selector': 'toprule', 'props': ':hline;'},
    {'selector': 'midrule', 'props': ':hline;'},
    {'selector': 'bottomrule', 'props': ':hline;'},
], overwrite=False)

latex = df.style.format(decimal=',', thousands='.', precision=2).to_latex(clines="all;data",  column_format="|l|l|")

# Print the latex table, can be copied into the editor
print(latex)

# Save metrics
np.save("Results/RADAREffectOnBias/MANUALtestaccuracy.npy", df["Test Accuracy"].values)
np.save("Results/RADAREffectOnBias/MANUALallaccuracies.npy", df.values)

del train_x
del train_y
del val_x
del val_y
del test_x
del test_y
del sample_model
gc.collect()

In [None]:
gc.collect()
# Redo the experiment using RADAR processed data

p = "Data/"
train_x = np.load(p + "train_x_RADAR.npy")
test_x = np.load(p + "test_x_RADAR.npy")
val_x = np.load(p + "val_x_RADAR.npy")

train_y = np.load(p + "train_y_46.npy")
test_y = np.load(p + "test_y_46.npy")
val_y = np.load(p + "val_y_46.npy")

train_lgm = np.load(p + "train_lgm.npy")
test_lgm = np.load(p + "test_lgm.npy")
val_lgm = np.load(p + "val_lgm.npy")

# Binary encoding from lgm to mutant vs. wildtype
train_lgm = np.argmax(train_lgm, axis = 1)
test_lgm = np.argmax(test_lgm, axis = 1)
val_lgm = np.argmax(val_lgm, axis = 1)

train_lgm = np.where(train_lgm > 2, 0, 1)
test_lgm = np.where(test_lgm > 2, 0, 1)
val_lgm = np.where(val_lgm > 2, 0, 1)

eye = np.eye(2)

train_lgm = eye[train_lgm]
val_lgm = eye[val_lgm]
test_lgm = eye[test_lgm]


np.random.seed(0)
ix = np.arange(len(train_x))
np.random.shuffle(ix)
train_x = train_x[ix]
train_y = train_y[ix]
train_lgm = train_lgm[ix]

# Train and save the model
reset_seed()
sample_model = make_split_model(lr, out_dims = [len(train_y[0]), len(train_lgm[0])])
sample_model.summary()
hist = sample_model.fit(train_x, [train_y, train_lgm], epochs = epochs,
                        batch_size = batch_size, validation_data = (val_x, [val_y, val_lgm]), callbacks = [early_stop])
sample_model.save_weights("Models\RADAR_Bias_quantifier.h5")


# or save to csv: 
hist_csv_file = 'RADAR_bias_history.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)
    
# Show metrics on the datasets
print("ID balanced accuracy")
preds = np.argmax(sample_model.predict(train_x)[0], axis = 1)
print("Train:", np.round(balanced_accuracy_score(np.argmax(train_y, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(val_x)[0], axis = 1)
print("Validation:", np.round(balanced_accuracy_score(np.argmax(val_y, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(test_x)[0], axis = 1)
print("Test:", np.round(balanced_accuracy_score(np.argmax(test_y, axis = 1), preds), 2))

print("LGM balanced accuracy")
preds = np.argmax(sample_model.predict(train_x)[1], axis = 1)
print("Train:", np.round(balanced_accuracy_score(np.argmax(train_lgm, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(val_x)[1], axis = 1)
print("Validation:", np.round(balanced_accuracy_score(np.argmax(val_lgm, axis = 1), preds), 2))

preds = np.argmax(sample_model.predict(test_x)[1], axis = 1)
print("Test:", np.round(balanced_accuracy_score(np.argmax(test_lgm, axis = 1), preds), 2))


# Plot training process
plt.rcParams.update({'font.size': 40})
plt.rcParams["font.family"] = "Times New Roman"
plt.plot(hist.history["loss"], label = "Training")
plt.plot(hist.history["val_loss"], label = "Validation")
plt.title("Loss")
maxim = np.max(hist.history["val_loss"])

plt.legend(fontsize = 30)
plt.savefig("Images/Histories/RADARBiasLoss.png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

plt.plot(hist.history["t_id_out_accuracy"], label = "Training")
plt.plot(hist.history["val_t_id_out_accuracy"], label = "Validation")
plt.ylim([-0.1, 1.1])
plt.title("Accuracy")
plt.legend(fontsize = 30)
plt.savefig("Images/Histories/RADARBiasAcc.png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

# get performance on each sample
sample_dict = {}

num_patients = len(np.unique(np.argmax(train_y, axis = 0)))
for i in range(num_patients):

        # Get data from class i of the training set
    d = np.squeeze(train_x[np.argmax(train_y, axis = 1) == i])
    y = np.squeeze(train_y[np.argmax(train_y, axis = 1) == i])
    lgm = np.squeeze(train_lgm[np.argmax(train_y, axis = 1) == i])
    _, train_loss, _, train_acc, train_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
    train_size = int(len(d))

        # Get data from class i of the validation set, these spectra have not been seen before
    d = np.squeeze(val_x[np.argmax(val_y, axis = 1) == i])
    y = np.squeeze(val_y[np.argmax(val_y, axis = 1) == i])
    lgm = np.squeeze(val_lgm[np.argmax(val_y, axis = 1) == i])
    _, val_loss, _, val_acc, val_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
    val_size = int(len(d))
        
        # Get data from class i of the test set, these spectra have not been seen before
    d = np.squeeze(test_x[np.argmax(test_y, axis = 1) == i])
    y = np.squeeze(test_y[np.argmax(test_y, axis = 1) == i])
    lgm = np.squeeze(test_lgm[np.argmax(test_y, axis = 1) == i])
    _, test_loss, _, test_acc, test_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
    test_size = int(len(d))

    sample_dict[i] = np.array([train_acc, train_acc_lgm, train_size, val_acc, val_acc_lgm, val_size, test_acc, test_acc_lgm, test_size])

# Save the metrics in a dataframe
header = ["Sample ID", "Train Accuracy", "Train LGm Accuracy", "Train Size",
          "Validation Accuracy", "Validation LGm Accuracy", "Validation Size",
          "Test Accuracy", "Test LGm Accuracy", "Test Size"]

df = pd.DataFrame.from_dict(sample_dict, columns = header[1:],
                                        orient = "index")

df = df.round(decimals = 2)

df["Train Size"] = df["Train Size"].apply(lambda x: int(x))
df["Validation Size"] = df["Validation Size"].apply(lambda x: int(x))
df["Test Size"] = df["Test Size"].apply(lambda x: int(x))


df.style.set_table_styles([
    {'selector': 'toprule', 'props': ':hline;'},
    {'selector': 'midrule', 'props': ':hline;'},
    {'selector': 'bottomrule', 'props': ':hline;'},
], overwrite=False)
latex = df.style.format(decimal=',', thousands='.', precision=2).to_latex(clines="all;data",  column_format="|l|l|")
print(latex)

np.save("Results/RADAREffectOnBias/RADARtestaccuracy.npy", df["Test Accuracy"].values)
np.save("Results/RADAREffectOnBias/RADARallaccuracies.npy", df.values)

del train_x
del train_y
del val_x
del val_y
del test_x
del test_y
del sample_model
gc.collect()

In [None]:
# Load the saved metrics
raw_test_acc = np.load("Results/RADAREffectOnBias/NOPREPtestaccuracy.npy")
prep_test_acc = np.load("Results/RADAREffectOnBias/RADARtestaccuracy.npy")
manual_test_acc = np.load("Results/RADAREffectOnBias/MANUALtestaccuracy.npy")

# Set image parameters
plt.rcParams.update({'font.size': 35})
plt.rcParams["font.family"] = "Times New Roman"

# Sort the samples according to RADAR performance
sorting = np.argsort(manual_test_acc)

plt.figure(figsize = (10, 5))

mean_diff = np.mean(np.array(raw_test_acc) - np.array(manual_test_acc))
print("Mean improvement between the raw data and manually processed data:", np.round(mean_diff, 2))

mean_diff = np.mean(np.array(raw_test_acc) - np.array(prep_test_acc))
print("Mean improvement between the raw data and RADAR processed data:", np.round(mean_diff, 2))

num_unique_labels = 46
plt.scatter(np.arange(num_unique_labels), np.array(raw_test_acc)[sorting], label = "Raw data:" + str(np.round(np.mean(raw_test_acc), 2)))
plt.scatter(np.arange(num_unique_labels), np.array(manual_test_acc)[sorting], label = "Manually processed:" + str(np.round(np.mean(manual_test_acc), 2)))
plt.scatter(np.arange(num_unique_labels), np.array(prep_test_acc)[sorting], label = "RADAR:" + str(np.round(np.mean(prep_test_acc), 2)))
plt.ylim([-0.1, 1.1])
plt.legend(fontsize = 15)

plt.savefig("Images/Histories/TestACCComparison_rawVprep.png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

In [None]:
from sklearn.metrics import PrecisionRecallDisplay, auc
# load the models and get their per-spectrum predictions
p = "Data/"
# Raw
sample_model = make_split_model(lr, out_dims = [46, 2])
sample_model.load_weights("Models\RAW_Bias_quantifier.h5")

test_x = np.load(p + "test_x.npy")
test_y = np.load(p + "test_y_46.npy")
y_t = np.argmax(test_y, axis = 1)

y_p = sample_model.predict(test_x, batch_size = 128)
y_p_id = np.argmax(y_p[0], axis = 1)

RAW_auc= []
for n in np.unique(y_t):
    temp_yt = np.where(y_t == n, 1, 0)
    temp_yp = np.where(y_p_id == n, 1, 0)
    disp = PrecisionRecallDisplay.from_predictions(temp_yt, temp_yp)

    RAW_auc.append(auc(disp.recall, disp.precision))

np.save("Results/RADAREffectOnBias/NOPREPtestauc.npy", RAW_auc)

del test_x, test_y, y_p, y_p_id, y_t
gc.collect()

# Manual
sample_model = make_split_model(lr, out_dims = [46, 2])
sample_model.load_weights("Models\MANUAL_Bias_quantifier.h5")

test_x = np.load(p + "test_x_MANUAL.npy")
test_y = np.load(p + "test_y_46.npy")
y_t = np.argmax(test_y, axis = 1)

y_p = sample_model.predict(test_x, batch_size = 128)
y_p_id = np.argmax(y_p[0], axis = 1)


MANUAL_auc = []
for n in np.unique(y_t):
    temp_yt = np.where(y_t == n, 1, 0)
    temp_yp = np.where(y_p_id == n, 1, 0)
    disp = PrecisionRecallDisplay.from_predictions(temp_yt, temp_yp)

    MANUAL_auc.append(auc(disp.recall, disp.precision))

np.save("Results/RADAREffectOnBias/MANUALtestauc.npy", MANUAL_auc)

del test_x, test_y, y_p, y_p_id, y_t
gc.collect()

# RADAR
sample_model = make_split_model(lr, out_dims = [46, 2])
sample_model.load_weights("Models\RADAR_Bias_quantifier.h5")

test_x = np.load(p + "test_x_RADAR.npy")
test_y = np.load(p + "test_y_46.npy")
y_t = np.argmax(test_y, axis = 1)

y_p = sample_model.predict(test_x, batch_size = 128)
y_p_id = np.argmax(y_p[0], axis = 1)

RADAR_auc= []
for n in np.unique(y_t):
    temp_yt = np.where(y_t == n, 1, 0)
    temp_yp = np.where(y_p_id == n, 1, 0)
    disp = PrecisionRecallDisplay.from_predictions(temp_yt, temp_yp)

    RADAR_auc.append(auc(disp.recall, disp.precision))

np.save("Results/RADAREffectOnBias/RADARtestauc.npy", RADAR_auc)

del test_x, test_y, y_p, y_p_id, y_t
gc.collect()

plt.show()
# Set image parameters
plt.rcParams.update({'font.size': 35})
plt.rcParams["font.family"] = "Times New Roman"

# Sort the samples according to MANUAL performance
sorting = np.argsort(MANUAL_auc)

plt.figure(figsize = (10, 5))



mean_diff = np.mean(np.array(RAW_auc) - np.array(MANUAL_auc))
print("Mean improvement between the raw data and manually processed data:", np.round(mean_diff, 2))

mean_diff = np.mean(np.array(RAW_auc) - np.array(RADAR_auc))
print("Mean improvement between the raw data and RADAR processed data:", np.round(mean_diff, 2))
num_unique_labels = 46

for i in range(len(RAW_auc)):
    if np.array(RAW_auc)[sorting][i] >= np.array(MANUAL_auc)[sorting][i]:
        plt.plot(i, np.array(RAW_auc)[sorting][i], i, np.array(MANUAL_auc)[sorting][i], color = "red")
    else:
        plt.plot(i, np.array(RAW_auc)[sorting][i], i, np.array(MANUAL_auc)[sorting][i], color = "green")

plt.scatter(np.arange(num_unique_labels), np.array(RAW_auc)[sorting], label = "Raw data:" + str(np.round(np.mean(RAW_auc), 2)))
plt.scatter(np.arange(num_unique_labels), np.array(MANUAL_auc)[sorting], label = "Manually processed:" + str(np.round(np.mean(MANUAL_auc), 2)))
plt.scatter(np.arange(num_unique_labels), np.array(RADAR_auc)[sorting], label = "RADAR:" + str(np.round(np.mean(RADAR_auc), 2)))
plt.ylim([-0.1, 1.1])
plt.legend(fontsize = 15)

plt.savefig("Images/Histories/TestAUCComparison_rawVprep.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()