In [None]:
from Scripts.essentials import *
from Scripts.vars import *

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve, auc, balanced_accuracy_score
from sklearn.metrics import precision_recall_curve, average_precision_score

In [None]:
x_axis = pd.read_excel("Data/RamanFreqTable.xlsx")["Wave"].values

print("Using:\n\t n_folds = ", n_folds, "\n\t learning rate= ", lr, "\n\t batch size = ", batch_size, "\n\t epochs = ", epochs)

In [None]:
m = make_model()
m.summary()

In [None]:
m = make_split_model()
m.summary()

In [None]:
m = make_encoder()
m.summary()

# Simple example of how applying preprocessing to data can help with the spectrum effect

In [None]:
from sklearn import metrics

# Axes for plotting comparisons



########################
# Train on raw data
########################


p = "Data/"
X = np.concatenate([np.load(p + "train_x.npy"), np.load(p + "val_x.npy"), np.load(p + "test_x.npy")])
y = np.concatenate([np.load(p + "train_y.npy"), np.load(p + "val_y.npy"), np.load(p + "test_y.npy")])

HF_1887 = X[np.argmax(y, axis = 1) == 10] # Get sample spectra
d = HF_1887
min_ = np.min(d, axis = 0)
max_ = np.max(d, axis = 0)
sd = np.std(d, axis = 0)
mean = np.mean(d, axis = 0)


plt.rcParams.update({'font.size': 40})
plt.rcParams["font.family"] = "Times New Roman"
plt.fill_between(x_axis, mean - sd, mean + sd, alpha = 1, color = "Red")
plt.plot(x_axis, mean, linestyle = "--", color = "Black")
plt.xticks(x_axis[[0, int(1737/2), 1737]])
plt.rcParams.update({'font.size': 40})
plt.xlabel(r"Wavenumber (cm$^{-1}$)")
plt.savefig("Images/(RAW)SolveSpectrumEffect_1887.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

# Get the other sample

HF_2070 = X[np.argmax(y, axis = 1) == 17]
d = HF_2070
min_ = np.min(d, axis = 0)
max_ = np.max(d, axis = 0)
sd = np.std(d, axis = 0)
mean = np.mean(d, axis = 0)

plt.rcParams.update({'font.size': 40})
plt.rcParams["font.family"] = "Times New Roman"
plt.fill_between(x_axis, mean - sd, mean + sd, alpha = 1, color = "Red")
plt.plot(x_axis, mean, linestyle = "--", color = "Black")
plt.xticks(x_axis[[0, int(1737/2), 1737]])
plt.xlabel(r"Wavenumber (cm$^{-1}$)")
plt.savefig("Images/(RAW)SolveSpectrumEffect_2070.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

# Plot comparison between populations
d = HF_1887
min_1 = np.min(d, axis = 0)
max_1 = np.max(d, axis = 0)
sd1 = np.std(d, axis = 0)
mean1 = np.mean(d, axis = 0)

d = HF_2070
min_2 = np.min(d, axis = 0)
max_2 = np.max(d, axis = 0)
sd2 = np.std(d, axis = 0)
mean2 = np.mean(d, axis = 0)

plt.rcParams.update({'font.size': 40})
plt.rcParams["font.family"] = "Times New Roman"
plt.fill_between(x_axis, mean1 - sd1, mean1 + sd1, alpha = 0.5, color = "Cyan", label = "HF-1887")
plt.plot(x_axis, mean1, linestyle = "--", color = "blue", label = "Mean(HF-1887)")
plt.fill_between(x_axis, mean2 - sd2, mean2 + sd2, alpha = 0.5, color = "Magenta", label = "HF-2070")
plt.plot(x_axis, mean2, linestyle = "--", color = "Red", label = "Mean(HF-2070)")
plt.xticks(x_axis[[0, int(1737/2), 1737]])
plt.xlabel(r"Wavenumber (cm$^{-1}$)")
plt.legend(fontsize=20)
plt.savefig("Images/(RAW)SolveSpectrumEffect_Comparison.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

d = np.concatenate([HF_1887, HF_2070])
l = np.concatenate([np.zeros(len(HF_1887)), np.ones(len(HF_2070))])

print("Sample ratio:", str(len(HF_1887)/(len(HF_2070))))

# Get the folds
organizer = StratifiedKFold(n_splits=n_folds, shuffle = True, random_state = 42)
folds = organizer.split(d, l)


mean_fpr = np.linspace(0, 1, 200)
tprs = []
aucs = []

model = make_model(lr = lr, out_dim = 2)
model.summary()
plt.figure(figsize = (10, 10))
for en, fold in enumerate(folds):
    train_ix = fold[0]
    val_ix = fold[1]
    
    d_train = d[train_ix]
    l_train = l[train_ix]
    d_val = d[val_ix]
    l_val = l[val_ix]
    
    ix = np.arange(len(d_train))
    np.random.shuffle(ix)
    d_train = d_train[ix]
    l_train = l_train[ix]
    
    
    # Create and train the model
    reset_seed()
    model = make_model(lr = lr, out_dim = 2)
    hist1 = model.fit(d_train, l_train, epochs = epochs, batch_size = batch_size, validation_data = [d_val, l_val])
    
    np.save("Results/InitialTest_Raw_hist_"+str(en)+".npy", hist1.history)
    model.save_weights("Models/InitialTest_Raw_Model_"+str(en)+".npy")

    pred = model.predict(d_val)
    print(pred.shape)
    prob = pred[:, 1]
    fpr, tpr, _ = roc_curve(l_val, prob)
    roc_auc = auc(fpr, tpr)
    aucs.append(roc_auc)

    interp_tpr = np.interp(mean_fpr, fpr, tpr)
    interp_tpr[0] = 0.0
    tprs.append(interp_tpr)

    plt.plot(fpr, tpr, lw=1, alpha=0.3)#, label=f"Fold {en} AUC={roc_auc:.2f}")

# ---- Mean ROC ----
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)

# ---- 95% CI band ----
std_tpr = np.std(tprs, axis=0)
tpr_upper = np.minimum(mean_tpr + 1.96 * std_tpr, 1)
tpr_lower = np.maximum(mean_tpr - 1.96 * std_tpr, 0)

plt.plot(mean_fpr, mean_tpr, color="black", lw=2,
         label=f"Mean ROC (AUC={mean_auc:.3f})")

plt.fill_between(mean_fpr, tpr_lower, tpr_upper,
                 color="gray", alpha=0.25,
                 label="95% CI")

# ---- Decorations ----
plt.plot([0, 1], [0, 1], "--", color="gray")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(fontsize = 20)
plt.savefig("Images/(Raw)FinalROC.png", format="png", transparent = True,
                        dpi = 300,
                        bbox_inches='tight',
                        pad_inches=0.5)
plt.show()
########################
# Train on manual data
########################


fig, ax_spectra = plt.subplots(1)

X = np.concatenate([np.load(p + "train_x_MANUAL.npy"), np.load(p + "val_x_MANUAL.npy"), np.load(p + "test_x_MANUAL.npy")])
y = np.concatenate([np.load(p + "train_y.npy"), np.load(p + "val_y.npy"), np.load(p + "test_y.npy")])
HF_1887 = X[np.argmax(y, axis = 1) == 10]
d = HF_1887
min_ = np.min(d, axis = 0)
max_ = np.max(d, axis = 0)
sd = np.std(d, axis = 0)
mean = np.mean(d, axis = 0)

plt.rcParams.update({'font.size': 40})
plt.rcParams["font.family"] = "Times New Roman"
plt.fill_between(x_axis, mean - sd, mean + sd, alpha = 1, color = "Red")
plt.plot(x_axis, mean, linestyle = "--", color = "black")
plt.plot(x_axis, mean, linestyle = "--", color = "black")
plt.xticks(x_axis[[0, int(1737/2), 1737]])
plt.xlabel(r"Wavenumber (cm$^{-1}$)")
plt.savefig("Images/(MANUAL)SolveSpectrumEffect_1887(prep).png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

HF_2070 = X[np.argmax(y, axis = 1) == 17]
d = HF_2070
min_ = np.min(d, axis = 0)
max_ = np.max(d, axis = 0)
sd = np.std(d, axis = 0)
mean = np.mean(d, axis = 0)

plt.rcParams.update({'font.size': 40})
plt.rcParams["font.family"] = "Times New Roman"
plt.fill_between(x_axis, mean - sd, mean + sd, alpha = 1, color = "Red")
plt.fill_between(x_axis, mean - sd, mean + sd, alpha = 1, color = "Red")
plt.plot(x_axis, mean, linestyle = "--", color = "black")
plt.xticks(x_axis[[0, int(1737/2), 1737]])
plt.xlabel(r"Wavenumber (cm$^{-1}$)")
plt.savefig("Images/(MANUAL)SolveSpectrumEffect_2070(prep).png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()


# Plot comparison between populations
d = HF_1887
min_1 = np.min(d, axis = 0)
max_1 = np.max(d, axis = 0)
sd1 = np.std(d, axis = 0)
mean1 = np.mean(d, axis = 0)

d = HF_2070
min_2 = np.min(d, axis = 0)
max_2 = np.max(d, axis = 0)
sd2 = np.std(d, axis = 0)
mean2 = np.mean(d, axis = 0)

plt.rcParams.update({'font.size': 40})
plt.rcParams["font.family"] = "Times New Roman"
plt.fill_between(x_axis, mean1 - sd1, mean1 + sd1, alpha = 0.5, color = "Cyan", label = "HF-1887")
plt.plot(x_axis, mean1, linestyle = "--", color = "blue", label = "Mean(HF-1887)")
plt.fill_between(x_axis, mean2 - sd2, mean2 + sd2, alpha = 0.5, color = "Magenta", label = "HF-2070")
plt.plot(x_axis, mean2, linestyle = "--", color = "Red", label = "Mean(HF-2070)")
plt.xticks(x_axis[[0, int(1737/2), 1737]])
plt.xlabel(r"Wavenumber (cm$^{-1}$)")
plt.legend(fontsize=20)
plt.savefig("Images/(MANUAL)SolveSpectrumEffect_Comparison.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

d = np.concatenate([HF_1887, HF_2070])
l = np.concatenate([np.zeros(len(HF_1887)), np.ones(len(HF_2070))])

print("Sample ratio:", str(len(HF_1887)/(len(HF_2070))))

# Get the folds
organizer = StratifiedKFold(n_splits=n_folds)
folds = organizer.split(d, l)


mean_fpr = np.linspace(0, 1, 200)
tprs = []
aucs = []

plt.figure(figsize = (10, 10))
for en, fold in enumerate(folds):
    train_ix = fold[0]
    val_ix = fold[1]
    
    d_train = d[train_ix]
    l_train = l[train_ix]
    d_val = d[val_ix]
    l_val = l[val_ix]
    
    ix = np.arange(len(d_train))
    np.random.shuffle(ix)
    d_train = d_train[ix]
    l_train = l_train[ix]
    
    
    # Create and train the model
    reset_seed()
    model = make_model(lr = lr, out_dim = 2)
    hist1 = model.fit(d_train, l_train, epochs = epochs, batch_size = batch_size, validation_data = [d_val, l_val])
    
    np.save("Results/InitialTest_MANUAL_hist_"+str(en)+".npy", hist1.history)
    model.save_weights("Models/InitialTest_MANUAL_Model_"+str(en)+".npy")

    pred = model.predict(d_val)
    prob = pred[:, 1]
    fpr, tpr, _ = roc_curve(l_val, prob)
    roc_auc = auc(fpr, tpr)
    aucs.append(roc_auc)

    interp_tpr = np.interp(mean_fpr, fpr, tpr)
    interp_tpr[0] = 0.0
    tprs.append(interp_tpr)

    plt.plot(fpr, tpr, lw=1, alpha=0.3)#, label=f"Fold {en} AUC={roc_auc:.2f}")

# ---- Mean ROC ----
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)

# ---- 95% CI band ----
std_tpr = np.std(tprs, axis=0)
tpr_upper = np.minimum(mean_tpr + 1.96 * std_tpr, 1)
tpr_lower = np.maximum(mean_tpr - 1.96 * std_tpr, 0)

plt.plot(mean_fpr, mean_tpr, color="black", lw=2,
         label=f"Mean ROC (AUC={mean_auc:.2f})")

plt.fill_between(mean_fpr, tpr_lower, tpr_upper,
                 color="gray", alpha=0.25,
                 label="95% CI")

# ---- Decorations ----
plt.plot([0, 1], [0, 1], "--", color="gray")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(fontsize = 20)
plt.savefig("Images/(MANUAL)FinalROC.png", format="png", transparent = True,
                        dpi = 300,
                        bbox_inches='tight',
                        pad_inches=0.5)
plt.show()

In [None]:
def plot_training_comparison(key, title = "Raw", ylims = [-0.05, 4.05]):

    plt.rcParams.update({'font.size': 40})
    plt.rcParams["font.family"] = "Times New Roman"
    metrics = []
    for en in range(n_folds):
        
        hist = np.load("Results/InitialTest_"+title+"_hist_"+str(en)+".npy", allow_pickle = True).item()
        metrics.append(hist[key])
    
    metrics = np.array(metrics)
    

    plt.plot(metrics.T, alpha = 0.1, color = "blue")
    #plt.fill_between(np.arange(epochs), np.min(metrics, axis = 0), np.max(metrics, axis = 0), color = "blue", alpha = 0.2)
    mean = np.mean(metrics, axis = 0)
    std = np.std(metrics, axis = 0)
    plt.fill_between(np.arange(epochs), mean - std, mean + std, color = "blue", alpha = 0.2, title = "Train Mean ± Std")
    plt.plot(mean, color = "black", label = "Train, Final Mean:" + str(np.round(mean[-1], 2)))
    
    
    metrics = []
    for en in range(n_folds):
     
        hist = np.load("Results/InitialTest_"+title+"_hist_"+str(en)+".npy", allow_pickle = True).item()
        metrics.append(hist["val_"+key])
    
    metrics = np.array(metrics)
    
    plt.plot(metrics.T, alpha = 0.1, color = "red")
    
    mean = np.mean(metrics, axis = 0)
    std = np.std(metrics, axis = 0)
    plt.fill_between(np.arange(epochs), mean - std, mean + std, color = "red", alpha = 0.2, title = "Validation Mean ± Std")
    plt.plot(mean, color = "black", ls = "--", label = "Val., , Final Mean:" + str(np.round(mean[-1], 2)))
    plt.legend(fontsize = 20)

    plt.ylim(ylims)

    plt.savefig("Images/"+title+"_InitialLearning_"+key+".png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
    plt.show()

In [None]:
import scipy.stats as stats
conf_level = 0.95


key = "loss"

plot_training_comparison(key, title = "Raw")




In [None]:
key = "loss"

plot_training_comparison(key, title = "MANUAL")

In [None]:

key = "accuracy"

plot_training_comparison(key, title = "Raw", ylims = [-0.05, 1.05])

In [None]:

key = "accuracy"

plot_training_comparison(key, title = "MANUAL", ylims = [-0.05, 1.05])

In [None]:
from sklearn.preprocessing import label_binarize

def TrainModel(train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title):
    
    # Compute the weights for each category type
    counts = np.bincount(train_y)
    class_weights = np.sqrt((1/(counts/np.max(counts))))
    
    cw_id = {}
    
    for i in range(len(class_weights)):
        cw_id[i] = class_weights[i]
        
    counts = np.bincount(train_lgm)
    class_weights = np.sqrt((1/(counts/np.max(counts))))
    
    cw_lgm = {}
    
    for i in range(len(class_weights)):
        cw_lgm[i] = class_weights[i]
    
    # Get sample wise weights to enable multi-output-balancing
    sw_id = np.array([cw_id[c] for c in train_y])
    sw_lgm = np.array([cw_lgm[c] for c in train_lgm])


    # Create and train the model
    reset_seed()
    sample_model = make_split_model(lr, out_dims = [len(np.unique(train_y)), len(np.unique(train_lgm))],
                                losses = ["sparse_categorical_crossentropy", "sparse_categorical_crossentropy"])
    sample_model.summary()

    hist = sample_model.fit(train_x, {'ID': train_y, 'LGm': train_lgm}, sample_weight={'ID': sw_id, 'LGm': sw_lgm},
                            epochs = epochs, batch_size = batch_size, validation_data = (val_x, [val_y, val_lgm]))

    # Save history and model parameters
    np.save("Results/FullTest_"+title+"_hist_"+str(en)+".npy", hist.history)
    sample_model.save_weights("Models/FullTest_"+title+"_Model_"+str(en)+".h5")

    return sample_model

def sample_evaluation(sample_model, train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title, verbose = False):

    # get performance on each sample
    sample_dict = {}
    
    num_patients = len(np.unique(train_y))
    for i in range(num_patients):
    
        # Get data from class i of the training set
        d = np.squeeze(train_x[train_y == i])
        y = np.squeeze(train_y[train_y == i])
        lgm = np.squeeze(train_lgm[train_y == i])
        _, train_loss, _, train_acc, train_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
        train_size = int(len(d))
    
        # Get data from class i of the validation set, these spectra have not been seen before
        d = np.squeeze(val_x[val_y == i])
        y = np.squeeze(val_y[val_y == i])
        lgm = np.squeeze(val_lgm[val_y == i])
        _, val_loss, _, val_acc, val_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
        val_size = int(len(d))

    
        sample_dict[i] = np.array([train_acc, train_acc_lgm, train_size, val_acc, val_acc_lgm, val_size])
    
    # Save the metrics in a dataframe
    header = ["Sample ID", "Train Accuracy", "Train LGm Accuracy", "Train Size",
              "Validation Accuracy", "Validation LGm Accuracy", "Validation Size"]
    
    df = pd.DataFrame.from_dict(sample_dict, columns = header[1:],
                                            orient = "index")
    # Round them to two decimals
    df = df.round(decimals = 2)
    
    # Cast the number of spectra to integer, looks nicer than the decimal form with .0 after each number
    df["Train Size"] = df["Train Size"].apply(lambda x: int(x))
    df["Validation Size"] = df["Validation Size"].apply(lambda x: int(x))
    
    # Styling for convertion into latex format
    df.style.set_table_styles([
        {'selector': 'toprule', 'props': ':hline;'},
        {'selector': 'midrule', 'props': ':hline;'},
        {'selector': 'bottomrule', 'props': ':hline;'},
    ], overwrite=False)

    if verbose:
        # Print the latex table, can be copied into the editor
        latex = df.style.format(decimal=',', thousands='.', precision=2).to_latex(clines="all;data",  column_format="|l|l|")
        print(latex)
    
    # Save metrics
    np.save("Results/"+title+"val_accuracy_"+str(en)+".npy", df["Validation Accuracy"].values)
    np.save("Results/"+title+"all_accuracies_"+str(en)+".npy", df.values)
    

def compute_multiclass_roc(true_labels, pred_probs):
    """
    Handles:
    - binary (N,), (N,1)
    - multiclass (N,C)
    """
    norm_preds = []
    for p in pred_probs:
        p = np.asarray(p)
        if p.ndim == 1:
            p = p.reshape(-1,1)
        norm_preds.append(p)

    true_labels = [np.asarray(y) for y in true_labels]

    # Detect number of classes from TRUE labels
    classes = np.unique(true_labels[0])
    n_classes = len(classes)

    # Binary case
    if n_classes == 2:
        fprs = []
        tprs = []
        aucs = []
        for y_fold, p_fold in zip(true_labels, norm_preds):
            # Ensure p_fold is (N,2)
            if p_fold.shape[1] == 1:
                # convert sigmoid output to 2-class softmax
                p_fold = np.hstack([1-p_fold, p_fold])
            fpr, tpr, _ = roc_curve(y_fold, p_fold[:,1])
            aucs.append(auc(fpr, tpr))
            fprs.append(fpr)
            tprs.append(tpr)
        return [fprs], [tprs], np.array(aucs), classes

    # Multiclass case
    fpr_list = [[] for _ in range(n_classes)]
    tpr_list = [[] for _ in range(n_classes)]
    auc_scores = []

    for y_fold, p_fold in zip(true_labels, norm_preds):
        # ensure p_fold has enough columns
        if p_fold.shape[1] != n_classes:
            raise ValueError(f"Prediction columns {p_fold.shape[1]} != number of classes {n_classes}")
        Y = label_binarize(y_fold, classes=classes)
        auc_fold = {}
        for k in range(n_classes):
            fpr, tpr, _ = roc_curve(Y[:,k], p_fold[:,k])
            fpr_list[k].append(fpr)
            tpr_list[k].append(tpr)
            auc_fold[k] = auc(fpr, tpr)
        auc_scores.append(np.mean(list(auc_fold.values())))
    return fpr_list, tpr_list, np.array(auc_scores), classes


# ========= Plotting function =========
def plot_roc_with_folds_and_mean(true_labels, pred_probs, title, color='Black'):
    """
    Plot ROC curves for individual folds (one per fold), 
    mean curve with 95% CI, and mean AUC ± CI in legend.
    Works for binary and multiclass (One-vs-Rest averaged).
    """
    norm_preds = []
    for p in pred_probs:
        p = np.asarray(p)
        if p.ndim == 1:
            p = p.reshape(-1,1)
        norm_preds.append(p)
    
    true_labels = [np.asarray(y) for y in true_labels]
    classes = np.unique(true_labels[0])
    n_classes = len(classes)
    
    mean_fpr = np.linspace(0,1,500)
    all_interp_tprs = []
    aucs = []

    plt.figure(figsize=(10,10))
    
    for fold_idx, (y_fold, p_fold) in enumerate(zip(true_labels, norm_preds)):
        # Binary
        if n_classes == 2:
            if p_fold.shape[1] == 1:
                p_fold = np.hstack([1-p_fold, p_fold])
            fpr, tpr, _ = roc_curve(y_fold, p_fold[:,1])
            plt.plot(fpr, tpr, alpha=0.2, lw=1)  # individual fold
            interp_tpr = np.interp(mean_fpr, fpr, tpr)
            interp_tpr[0] = 0.0
            all_interp_tprs.append(interp_tpr)
            aucs.append(auc(fpr, tpr))
        # Multiclass
        else:
            if p_fold.shape[1] != n_classes:
                raise ValueError(f"Prediction columns {p_fold.shape[1]} != number of classes {n_classes}")
            Y = label_binarize(y_fold, classes=classes)
            # Compute TPR for each class
            tprs_per_class = []
            aucs_per_class = []
            for k in range(n_classes):
                fpr, tpr, _ = roc_curve(Y[:,k], p_fold[:,k])
                tprs_per_class.append(np.interp(mean_fpr, fpr, tpr))
                aucs_per_class.append(auc(fpr, tpr))
            # Average across classes → **one curve per fold**
            mean_tpr_fold = np.mean(tprs_per_class, axis=0)
            plt.plot(mean_fpr, mean_tpr_fold, alpha=0.2, lw=1)  # individual fold
            all_interp_tprs.append(mean_tpr_fold)
            aucs.append(np.mean(aucs_per_class))

    # Compute mean TPR and CI
    all_interp_tprs = np.array(all_interp_tprs)
    mean_tpr = np.mean(all_interp_tprs, axis=0)
    lower_tpr = np.percentile(all_interp_tprs, 2.5, axis=0)
    upper_tpr = np.percentile(all_interp_tprs, 97.5, axis=0)
    mean_auc = np.mean(aucs)
    auc_ci = (np.percentile(aucs, 2.5), np.percentile(aucs, 97.5))
    
    # Plot mean and CI
    plt.plot(mean_fpr, mean_tpr, color=color, lw=2)
    plt.fill_between(mean_fpr, lower_tpr, upper_tpr, color=color, alpha=0.2)
    plt.plot([0,1],[0,1],'k--', lw=1)
    
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    delta = max(auc_ci[1]-mean_auc, mean_auc-auc_ci[0])
    plt.legend([f"Mean AUC = {mean_auc:.2f} ± {delta:.2f}"], loc='lower right', fontsize = 32)

    plt.savefig("Images/"+title+".png", format="png", transparent = True,
                        dpi = 300,
                        bbox_inches='tight',
                        pad_inches=0.5)
    plt.show()
    
    return mean_fpr, mean_tpr, lower_tpr, upper_tpr, mean_auc, auc_ci

def plot_pr_with_folds_and_mean(true_labels, pred_probs, title, color='black'):
    """
    Plot Precision-Recall curves for multiclass (one-vs-rest) with folds:
    - individual fold curves (transparent)
    - mean curve across folds
    - 95% CI
    - mean Average Precision (AP) ± CI in legend
    """
    
    # Normalize predictions (ensure 2D)
    norm_preds = []
    for p in pred_probs:
        p = np.asarray(p)
        if p.ndim == 1:
            p = p.reshape(-1,1)
        norm_preds.append(p)

    true_labels = [np.asarray(y) for y in true_labels]
    classes = np.unique(true_labels[0])
    n_classes = len(classes)

    mean_recall = np.linspace(0, 1, 500)
    fold_interp_precisions = []
    fold_auc_values = []

    plt.figure(figsize=(10, 10))


    for y_fold, p_fold in zip(true_labels, norm_preds):

        # ---------- BINARY ----------
        if n_classes == 2:
            if p_fold.shape[1] == 1:
                p_fold = np.hstack([1 - p_fold, p_fold])

            prec, rec, _ = precision_recall_curve(y_fold, p_fold[:, 1])
            pr_auc = auc(rec, prec)

            # Plot 1 line per fold
            plt.plot(rec, prec, alpha=0.25, lw=1, color=color)

            interp_prec = np.interp(mean_recall, rec[::-1], prec[::-1])
            fold_interp_precisions.append(interp_prec)
            fold_auc_values.append(pr_auc)

        # ---------- MULTICLASS ----------
        else:
            if p_fold.shape[1] != n_classes:
                raise ValueError("Predict-prob dimensions don't match number of classes!")

            Y = label_binarize(y_fold, classes=classes)

            # Compute PR per class, average WITHIN fold
            class_interp = []
            class_aucs = []

            for k in range(n_classes):
                prec, rec, _ = precision_recall_curve(Y[:, k], p_fold[:, k])
                class_aucs.append(auc(rec, prec))

                interp_prec = np.interp(mean_recall, rec[::-1], prec[::-1])
                class_interp.append(interp_prec)

            # ---- AVERAGE classes → ONE line per fold ----
            fold_mean_prec = np.mean(class_interp, axis=0)
            fold_mean_auc = np.mean(class_aucs)

            # Plot 1 line per fold (not per class)
            plt.plot(mean_recall, fold_mean_prec, alpha=0.25, lw=1, color=color)

            fold_interp_precisions.append(fold_mean_prec)
            fold_auc_values.append(fold_mean_auc)


    fold_interp_precisions = np.array(fold_interp_precisions)

    mean_prec = fold_interp_precisions.mean(axis=0)
    lower_prec = np.percentile(fold_interp_precisions, 2.5, axis=0)
    upper_prec = np.percentile(fold_interp_precisions, 97.5, axis=0)

    mean_auc = np.mean(fold_auc_values)
    ci_low, ci_high = np.percentile(fold_auc_values, [2.5, 97.5])
    delta = max(mean_auc - ci_low, ci_high - mean_auc)
    


    plt.plot(mean_recall, mean_prec, color=color, lw=2)
    plt.fill_between(mean_recall, lower_prec, upper_prec, color=color, alpha=0.2)
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    
    # Short legend with ± notation

    plt.legend([f"AUC-PR = {mean_auc:.2f} ± {delta:.2f}"], loc="lower left", fontsize = 32)
    plt.savefig("Images/"+title+".png", format="png", transparent = True,
                        dpi = 300,
                        bbox_inches='tight',
                        pad_inches=0.5)
    plt.show()
    
    return mean_recall, mean_prec, lower_prec, upper_prec, mean_auc, (ci_low, ci_high)

In [None]:
p = "Data/"
lgm_all = np.concatenate([np.load(p + "train_lgm.npy"), np.load(p + "val_lgm.npy"), np.load(p + "test_lgm.npy")])
y_all = np.concatenate([np.load(p + "train_y_46.npy"), np.load(p + "val_y_46.npy"), np.load(p + "test_y_46.npy")])
lgm_all = np.argmax(lgm_all, axis = -1)
y_all = np.argmax(y_all, axis = -1)
lgm_all = np.where(lgm_all > 2, 0, 1)


In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

gc.collect()

# Get and prepare the full dataset (join the static train-val-test splits for the full data)
p = "Data/"
title = "MANUAL"
X_all = np.concatenate([np.load(p + "train_x_"+title+".npy"), np.load(p + "val_x_"+title+".npy"), np.load(p + "test_x_"+title+".npy")])

# Get the folds
organizer = StratifiedKFold(n_splits=n_folds, shuffle = True, random_state = 42)
folds = organizer.split(X_all, y_all)

plt.figure(figsize = (8, 8))
for en, fold in enumerate(folds):
    train_ix = fold[0]
    val_ix = fold[1]

    # Training set
    train_x = X_all[train_ix]
    train_y = y_all[train_ix]
    train_lgm = lgm_all[train_ix]

    # Validation set
    val_x = X_all[val_ix]
    val_y = y_all[val_ix]
    val_lgm = lgm_all[val_ix]

    # Data shuffle
    ix = np.arange(len(train_x))
    np.random.shuffle(ix)
    train_x = train_x[ix]
    train_y = train_y[ix]
    train_lgm = train_lgm[ix]


    sample_model = TrainModel(train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title = title)

    # code for ROC curve displays
    if en == 0:
        pred_ID_all = []
        pred_LGm_all = []
        true_ID_all = []
        true_LGm_all = []

    preds = sample_model.predict(val_x, batch_size=256)
    pred_ID_all.append(preds[0])       # model output for ID
    pred_LGm_all.append(preds[1])      # model output for LGm
    true_ID_all.append(val_y)
    true_LGm_all.append(val_lgm)


    
    # get performance on each sample
    sample_evaluation(sample_model, train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title = title, verbose = False)


    del train_x
    del train_y
    del val_x
    del val_y
    del sample_model
    gc.collect()




# ----- Compute ROC for both outputs -----
fpr_ID, tpr_ID, auc_ID, classes_ID = compute_multiclass_roc(true_ID_all, pred_ID_all)
fpr_LGm, tpr_LGm, auc_LGm, classes_LGm = compute_multiclass_roc(true_LGm_all, pred_LGm_all)

# ---- ID head ----
mean_fpr_ID, mean_tpr_ID, lower_tpr_ID, upper_tpr_ID, mean_auc_ID, auc_ci_ID = \
    plot_roc_with_folds_and_mean(true_ID_all, pred_ID_all, title="("+title+")ROC_ID", color='black')

mean_recall_ID, mean_precision_ID, lower_precision_ID, upper_precision_ID, \
    mean_auprc_ID, auprc_ci_ID = \
    plot_pr_with_folds_and_mean(true_ID_all, pred_ID_all, 
                                title="("+title+")PR_ID", color='black')

# ---- LGm head ----
mean_fpr_LGm, mean_tpr_LGm, lower_tpr_LGm, upper_tpr_LGm, mean_auc_LGm, auc_ci_LGm = \
    plot_roc_with_folds_and_mean(true_LGm_all, pred_LGm_all, title="("+title+")ROC_IDH1", color='black')


mean_recall_LGm, mean_precision_LGm, lower_precision_LGm, upper_precision_LGm, \
    mean_auprc_LGm, auprc_ci_LGm = \
    plot_pr_with_folds_and_mean(true_LGm_all, pred_LGm_all, 
                                title="("+title+")PR_LGm", color='black')


# ---- Optionally save AUCs ----
np.save("Results/"+title+"_AUC_ID.npy", [mean_auc_ID, auc_ci_ID[0], auc_ci_ID[1]])
np.save("Results/"+title+"_AUC_LGm.npy", [mean_auc_LGm, auc_ci_LGm[0], auc_ci_LGm[1]])


In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

gc.collect()

# Get and prepare the full dataset (join the static train-val-test splits for the full data)
p = "Data/"
title = "RADAR"
X_all = np.concatenate([np.load(p + "train_x_"+title+".npy"), np.load(p + "val_x_"+title+".npy"), np.load(p + "test_x_"+title+".npy")])

plt.figure(figsize = (8, 8))

# Get the folds
organizer = StratifiedKFold(n_splits=n_folds, shuffle = True, random_state = 42)
folds = organizer.split(X_all, y_all)

for en, fold in enumerate(folds):
    train_ix = fold[0]
    val_ix = fold[1]

    # Training set
    train_x = X_all[train_ix]
    train_y = y_all[train_ix]
    train_lgm = lgm_all[train_ix]

    # Validation set
    val_x = X_all[val_ix]
    val_y = y_all[val_ix]
    val_lgm = lgm_all[val_ix]

    # Data shuffle
    ix = np.arange(len(train_x))
    np.random.shuffle(ix)
    train_x = train_x[ix]
    train_y = train_y[ix]
    train_lgm = train_lgm[ix]

    sample_model = TrainModel(train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title = title)

    # code for ROC curve displays

    if en == 0:
        pred_ID_all = []
        pred_LGm_all = []
        true_ID_all = []
        true_LGm_all = []

    preds = sample_model.predict(val_x, batch_size=256)
    pred_ID_all.append(preds[0])       # model output for ID
    pred_LGm_all.append(preds[1])      # model output for LGm
    true_ID_all.append(val_y)
    true_LGm_all.append(val_lgm)

    # get performance on each sample
    sample_evaluation(sample_model, train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title = title, verbose = False)

    del train_x
    del train_y
    del val_x
    del val_y
    del sample_model
    gc.collect()

# ----- Compute ROC for both outputs -----
fpr_ID, tpr_ID, auc_ID, classes_ID = compute_multiclass_roc(true_ID_all, pred_ID_all)
fpr_LGm, tpr_LGm, auc_LGm, classes_LGm = compute_multiclass_roc(true_LGm_all, pred_LGm_all)

# ---- ID head ----
mean_fpr_ID, mean_tpr_ID, lower_tpr_ID, upper_tpr_ID, mean_auc_ID, auc_ci_ID = \
    plot_roc_with_folds_and_mean(true_ID_all, pred_ID_all, title="("+title+")ROC_ID", color='black')

# ---- LGm head ----
mean_fpr_LGm, mean_tpr_LGm, lower_tpr_LGm, upper_tpr_LGm, mean_auc_LGm, auc_ci_LGm = \
    plot_roc_with_folds_and_mean(true_LGm_all, pred_LGm_all, title="("+title+")ROC_IDH1", color='black')

# ---- Optionally save AUCs ----
np.save("Results/"+title+"_AUC_ID.npy", [mean_auc_ID, auc_ci_ID[0], auc_ci_ID[1]])
np.save("Results/"+title+"_AUC_LGm.npy", [mean_auc_LGm, auc_ci_LGm[0], auc_ci_LGm[1]])


In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

gc.collect()

# Get and prepare the full dataset (join the static train-val-test splits for the full data)
p = "Data/"
title = "RAW"
X_all = np.concatenate([np.load(p + "train_x.npy"), np.load(p + "val_x.npy"), np.load(p + "test_x.npy")])

plt.figure(figsize = (8, 8))

# Get the folds
organizer = StratifiedKFold(n_splits=n_folds, shuffle = True, random_state = 42)
folds = organizer.split(X_all, y_all)

for en, fold in enumerate(folds):
    train_ix = fold[0]
    val_ix = fold[1]

    # Training set
    train_x = X_all[train_ix]
    train_y = y_all[train_ix]
    train_lgm = lgm_all[train_ix]

    # Validation set
    val_x = X_all[val_ix]
    val_y = y_all[val_ix]
    val_lgm = lgm_all[val_ix]

    # Data shuffle
    ix = np.arange(len(train_x))
    np.random.shuffle(ix)
    train_x = train_x[ix]
    train_y = train_y[ix]
    train_lgm = train_lgm[ix]

    sample_model = TrainModel(train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title = title)

    # code for ROC curve displays

    if en == 0:
        pred_ID_all = []
        pred_LGm_all = []
        true_ID_all = []
        true_LGm_all = []

    preds = sample_model.predict(val_x, batch_size=256)
    pred_ID_all.append(preds[0])       # model output for ID
    pred_LGm_all.append(preds[1])      # model output for LGm
    true_ID_all.append(val_y)
    true_LGm_all.append(val_lgm)

    # get performance on each sample
    sample_evaluation(sample_model, train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title = title, verbose = False)

    del train_x
    del train_y
    del val_x
    del val_y
    del sample_model
    gc.collect()

# ----- Compute ROC for both outputs -----
fpr_ID, tpr_ID, auc_ID, classes_ID = compute_multiclass_roc(true_ID_all, pred_ID_all)
fpr_LGm, tpr_LGm, auc_LGm, classes_LGm = compute_multiclass_roc(true_LGm_all, pred_LGm_all)

# ---- ID head ----
mean_fpr_ID, mean_tpr_ID, lower_tpr_ID, upper_tpr_ID, mean_auc_ID, auc_ci_ID = \
    plot_roc_with_folds_and_mean(true_ID_all, pred_ID_all, title="("+title+")ROC_ID", color='black')

# ---- LGm head ----
mean_fpr_LGm, mean_tpr_LGm, lower_tpr_LGm, upper_tpr_LGm, mean_auc_LGm, auc_ci_LGm = \
    plot_roc_with_folds_and_mean(true_LGm_all, pred_LGm_all, title="("+title+")ROC_IDH1", color='black')

# ---- Optionally save AUCs ----
np.save("Results/"+title+"_AUC_ID.npy", [mean_auc_ID, auc_ci_ID[0], auc_ci_ID[1]])
np.save("Results/"+title+"_AUC_LGm.npy", [mean_auc_LGm, auc_ci_LGm[0], auc_ci_LGm[1]])


In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

gc.collect()

# Get and prepare the full dataset (join the static train-val-test splits for the full data)
p = "Data/"
title = "MANUAL"
X_all = np.concatenate([np.load(p + "train_x_"+title+".npy"), np.load(p + "val_x_"+title+".npy"), np.load(p + "test_x_"+title+".npy")])

organizer = StratifiedKFold(n_splits=n_folds, shuffle = True, random_state = 42)
folds = organizer.split(X_all, y_all)

global_ID_acc = []
global_IDH_acc = []

sample_model = make_split_model(lr, out_dims = [len(np.unique(y_all)), len(np.unique(lgm_all))],
                                losses = ["sparse_categorical_crossentropy", "sparse_categorical_crossentropy"])
sample_model.summary()
for en, fold in enumerate(folds):
    train_ix = fold[0]
    val_ix = fold[1]

    # Training set
    train_x = X_all[train_ix]
    train_y = y_all[train_ix]
    train_lgm = lgm_all[train_ix]

    # Validation set
    val_x = X_all[val_ix]
    val_y = y_all[val_ix]
    val_lgm = lgm_all[val_ix]

    # Data shuffle
    ix = np.arange(len(train_x))
    np.random.shuffle(ix)
    train_x = train_x[ix]
    train_y = train_y[ix]
    train_lgm = train_lgm[ix]

    sample_model.load_weights("Models/FullTest_"+title+"_Model_"+str(en)+".h5")

    preds = sample_model.predict(val_x)
    id_preds = np.argmax(preds[0], axis = 1)
    lgm_preds = np.argmax(preds[1], axis = 1)

    global_ID_acc.append(balanced_accuracy_score(val_y, id_preds))
    global_IDH_acc.append(balanced_accuracy_score(val_y, lgm_preds))
    print(id_preds)
    print(lgm_preds)

    # code for ROC curve displays
    if en == 0:
        pred_ID_all = []
        pred_LGm_all = []
        true_ID_all = []
        true_LGm_all = []

    pred_ID_all.append(preds[0])       # model output for ID
    pred_LGm_all.append(preds[1])      # model output for LGm
    true_ID_all.append(val_y)
    true_LGm_all.append(val_lgm)
    
    # get performance on each sample
    # sample_evaluation(sample_model, train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title = title, verbose = False)
    del train_x
    del train_y
    del val_x
    del val_y
    
    gc.collect()


print(global_ID_acc)
print(global_IDH_acc)
print(np.mean(global_ID_acc), np.mean(global_IDH_acc))

# ----- Compute ROC for both outputs -----
fpr_ID, tpr_ID, auc_ID, classes_ID = compute_multiclass_roc(true_ID_all, pred_ID_all)
fpr_LGm, tpr_LGm, auc_LGm, classes_LGm = compute_multiclass_roc(true_LGm_all, pred_LGm_all)

# ---- ID head ----
mean_fpr_ID, mean_tpr_ID, lower_tpr_ID, upper_tpr_ID, mean_auc_ID, auc_ci_ID = \
    plot_roc_with_folds_and_mean(true_ID_all, pred_ID_all, title="("+title+")AUC_ID", color='black')

mean_fpr_ID, mean_tpr_ID, lower_tpr_ID, upper_tpr_ID, mean_auc_ID, auc_ci_ID = \
    plot_pr_with_folds_and_mean(true_ID_all, pred_ID_all, title="("+title+")PR_ID", color='black')

# ---- LGm head ----
mean_fpr_LGm, mean_tpr_LGm, lower_tpr_LGm, upper_tpr_LGm, mean_auc_LGm, auc_ci_LGm = \
    plot_roc_with_folds_and_mean(true_LGm_all, pred_LGm_all, title="("+title+")AUC_IDH1", color='black')

mean_fpr_LGm, mean_tpr_LGm, lower_tpr_LGm, upper_tpr_LGm, mean_auc_LGm, auc_ci_LGm = \
    plot_pr_with_folds_and_mean(true_LGm_all, pred_LGm_all, title="("+title+")PR_IDH1", color='black')

# ---- Optionally save AUCs ----
np.save("Results/"+title+"_AUC_ID.npy", [mean_auc_ID, auc_ci_ID[0], auc_ci_ID[1]])
np.save("Results/"+title+"_AUC_LGm.npy", [mean_auc_LGm, auc_ci_LGm[0], auc_ci_LGm[1]])


In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

gc.collect()

# Get and prepare the full dataset (join the static train-val-test splits for the full data)
p = "Data/"
title = "RADAR"
X_all = np.concatenate([np.load(p + "train_x_"+title+".npy"), np.load(p + "val_x_"+title+".npy"), np.load(p + "test_x_"+title+".npy")])

organizer = StratifiedKFold(n_splits=n_folds, shuffle = True, random_state = 42)
folds = organizer.split(X_all, y_all)

sample_model = make_split_model(lr, out_dims = [len(np.unique(y_all)), len(np.unique(lgm_all))],
                                losses = ["sparse_categorical_crossentropy", "sparse_categorical_crossentropy"])
sample_model.summary()
global_ID_acc = []
global_IDH_acc = []

for en, fold in enumerate(folds):
    train_ix = fold[0]
    val_ix = fold[1]

    # Training set
    train_x = X_all[train_ix]
    train_y = y_all[train_ix]
    train_lgm = lgm_all[train_ix]

    # Validation set
    val_x = X_all[val_ix]
    val_y = y_all[val_ix]
    val_lgm = lgm_all[val_ix]

    # Data shuffle
    ix = np.arange(len(train_x))
    np.random.shuffle(ix)
    train_x = train_x[ix]
    train_y = train_y[ix]
    train_lgm = train_lgm[ix]



    sample_model.load_weights("Models/FullTest_"+title+"_Model_"+str(en)+".h5")

    preds = sample_model.predict(val_x)
    id_preds = np.argmax(preds[0], axis = 1)
    lgm_preds = np.argmax(preds[1], axis = 1)

    global_ID_acc.append(balanced_accuracy_score(val_y, id_preds))
    global_IDH_acc.append(balanced_accuracy_score(val_y, lgm_preds))
    print(id_preds)
    print(lgm_preds)

    # code for ROC curve displays
    if en == 0:
        pred_ID_all = []
        pred_LGm_all = []
        true_ID_all = []
        true_LGm_all = []

    pred_ID_all.append(preds[0])       # model output for ID
    pred_LGm_all.append(preds[1])      # model output for LGm
    true_ID_all.append(val_y)
    true_LGm_all.append(val_lgm)


    
    # get performance on each sample
    # sample_evaluation(sample_model, train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title = title, verbose = False)
    del train_x
    del train_y
    del val_x
    del val_y
    gc.collect()


print(global_ID_acc)
print(global_IDH_acc)
print(np.mean(global_ID_acc), np.mean(global_IDH_acc))

# ----- Compute ROC for both outputs -----
fpr_ID, tpr_ID, auc_ID, classes_ID = compute_multiclass_roc(true_ID_all, pred_ID_all)
fpr_LGm, tpr_LGm, auc_LGm, classes_LGm = compute_multiclass_roc(true_LGm_all, pred_LGm_all)

# ---- ID head ----
mean_fpr_ID, mean_tpr_ID, lower_tpr_ID, upper_tpr_ID, mean_auc_ID, auc_ci_ID = \
    plot_roc_with_folds_and_mean(true_ID_all, pred_ID_all, title="("+title+")AUC_ID", color='black')

mean_fpr_ID, mean_tpr_ID, lower_tpr_ID, upper_tpr_ID, mean_auc_ID, auc_ci_ID = \
    plot_pr_with_folds_and_mean(true_ID_all, pred_ID_all, title="("+title+")PR_ID", color='black')

# ---- LGm head ----
mean_fpr_LGm, mean_tpr_LGm, lower_tpr_LGm, upper_tpr_LGm, mean_auc_LGm, auc_ci_LGm = \
    plot_roc_with_folds_and_mean(true_LGm_all, pred_LGm_all, title="("+title+")AUC_IDH1", color='black')

mean_fpr_LGm, mean_tpr_LGm, lower_tpr_LGm, upper_tpr_LGm, mean_auc_LGm, auc_ci_LGm = \
    plot_pr_with_folds_and_mean(true_LGm_all, pred_LGm_all, title="("+title+")PR_IDH1", color='black')

# ---- Optionally save AUCs ----
np.save("Results/"+title+"_AUC_ID.npy", [mean_auc_ID, auc_ci_ID[0], auc_ci_ID[1]])
np.save("Results/"+title+"_AUC_LGm.npy", [mean_auc_LGm, auc_ci_LGm[0], auc_ci_LGm[1]])


In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

gc.collect()

# Get and prepare the full dataset (join the static train-val-test splits for the full data)
p = "Data/"
title = "RAW"
X_all = np.concatenate([np.load(p + "train_x.npy"), np.load(p + "val_x.npy"), np.load(p + "test_x.npy")])


organizer = StratifiedKFold(n_splits=n_folds, shuffle = True, random_state = 42)
folds = organizer.split(X_all, y_all)

sample_model = make_split_model(lr, out_dims = [len(np.unique(y_all)), len(np.unique(lgm_all))],
                                losses = ["sparse_categorical_crossentropy", "sparse_categorical_crossentropy"])

global_ID_acc = []
global_IDH_acc = []

for en, fold in enumerate(folds):
    train_ix = fold[0]
    val_ix = fold[1]

    # Training set
    train_x = X_all[train_ix]
    train_y = y_all[train_ix]
    train_lgm = lgm_all[train_ix]

    # Validation set
    val_x = X_all[val_ix]
    val_y = y_all[val_ix]
    val_lgm = lgm_all[val_ix]

    # Data shuffle
    ix = np.arange(len(train_x))
    np.random.shuffle(ix)
    train_x = train_x[ix]
    train_y = train_y[ix]
    train_lgm = train_lgm[ix]

    sample_model.load_weights("Models/FullTest_"+title+"_Model_"+str(en)+".h5")

    preds = sample_model.predict(val_x)
    id_preds = np.argmax(preds[0], axis = 1)
    lgm_preds = np.argmax(preds[1], axis = 1)

    global_ID_acc.append(balanced_accuracy_score(val_y, id_preds))
    global_IDH_acc.append(balanced_accuracy_score(val_y, lgm_preds))
    print(id_preds)
    print(lgm_preds)

    # code for ROC curve displays
    if en == 0:
        pred_ID_all = []
        pred_LGm_all = []
        true_ID_all = []
        true_LGm_all = []

    pred_ID_all.append(preds[0])       # model output for ID
    pred_LGm_all.append(preds[1])      # model output for LGm
    true_ID_all.append(val_y)
    true_LGm_all.append(val_lgm)


    
    # get performance on each sample
    # sample_evaluation(sample_model, train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title = title, verbose = False)
    del train_x
    del train_y
    del val_x
    del val_y
    gc.collect()


print(global_ID_acc)
print(global_IDH_acc)
print(np.mean(global_ID_acc), np.mean(global_IDH_acc))

# ----- Compute ROC for both outputs -----
fpr_ID, tpr_ID, auc_ID, classes_ID = compute_multiclass_roc(true_ID_all, pred_ID_all)
fpr_LGm, tpr_LGm, auc_LGm, classes_LGm = compute_multiclass_roc(true_LGm_all, pred_LGm_all)

# ---- ID head ----
mean_fpr_ID, mean_tpr_ID, lower_tpr_ID, upper_tpr_ID, mean_auc_ID, auc_ci_ID = \
    plot_roc_with_folds_and_mean(true_ID_all, pred_ID_all, title="("+title+")AUC_ID", color='black')

mean_fpr_ID, mean_tpr_ID, lower_tpr_ID, upper_tpr_ID, mean_auc_ID, auc_ci_ID = \
    plot_pr_with_folds_and_mean(true_ID_all, pred_ID_all, title="("+title+")PR_ID", color='black')

# ---- LGm head ----
mean_fpr_LGm, mean_tpr_LGm, lower_tpr_LGm, upper_tpr_LGm, mean_auc_LGm, auc_ci_LGm = \
    plot_roc_with_folds_and_mean(true_LGm_all, pred_LGm_all, title="("+title+")AUC_IDH1", color='black')

mean_fpr_LGm, mean_tpr_LGm, lower_tpr_LGm, upper_tpr_LGm, mean_auc_LGm, auc_ci_LGm = \
    plot_pr_with_folds_and_mean(true_LGm_all, pred_LGm_all, title="("+title+")PR_IDH1", color='black')

# ---- Optionally save AUCs ----
np.save("Results/"+title+"_AUC_ID.npy", [mean_auc_ID, auc_ci_ID[0], auc_ci_ID[1]])
np.save("Results/"+title+"_AUC_LGm.npy", [mean_auc_LGm, auc_ci_LGm[0], auc_ci_LGm[1]])


In [None]:
id_dict = {}
lgm_dict = {}

for title in ["MANUAL", "RADAR", "RAW"]:

    accs = []
    for en in range(n_folds):
        #hist = np.load("Results/FullTest_"+title+"_hist_"+str(en)+".npy", allow_pickle = True)
        acc = np.load("Results/"+title+"all_accuracies_"+str(en)+".npy", allow_pickle = True)
        accs.append(acc[:, -3:-1])

    accs = np.array(accs)

    mean = np.mean(accs, axis = 0)
    id_dict[title] = mean.T[0]
    lgm_dict[title] = mean.T[1]
    

In [None]:
import scipy.stats as stats
################
# ID accuracies
################

sorting = np.argsort(id_dict["RAW"])
x_range = np.arange(len(id_dict["RAW"]))

plt.figure(figsize = (10, 5))


conf_level = 0.95
d = id_dict["RAW"]
RAW_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
RAW_delta = np.round(np.max(np.abs(np.mean(d) - RAW_cl)), 3)
d = id_dict["MANUAL"]
MANUAL_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
MANUAL_delta = np.round(np.max(np.abs(np.mean(d) - MANUAL_cl)), 3)
d = id_dict["RADAR"]
RADAR_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
RADAR_delta = np.round(np.max(np.abs(np.mean(d) - RADAR_cl)), 3)


plt.scatter(x_range, id_dict["RAW"][sorting], label = "Raw data:" + str(np.round(np.mean(id_dict["RAW"]), 2)) + ", CI: ± " + str(RAW_delta))
plt.scatter(x_range, id_dict["MANUAL"][sorting], label = "Manual data:" + str(np.round(np.mean(id_dict["MANUAL"]), 2)) + ", CI: ± " + str(MANUAL_delta))
plt.scatter(x_range, id_dict["RADAR"][sorting], label = "RADAR data:" + str(np.round(np.mean(id_dict["RADAR"]), 2)) + ", CI: ± " + str(RADAR_delta))

plt.ylim([-0.1, 1.1])
plt.legend(fontsize = 25, loc = "upper left")

plt.savefig("Images/Histories/ID_ACC_comparison.png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()



################
# LGm accuracies
################

sorting = np.argsort(lgm_dict["RAW"])


d = lgm_dict["RAW"]
RAW_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
RAW_delta = np.round(np.max(np.abs(np.mean(d) - RAW_cl)), 3)
d = lgm_dict["MANUAL"]
MANUAL_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
MANUAL_delta = np.round(np.max(np.abs(np.mean(d) - MANUAL_cl)), 3)
d = lgm_dict["RADAR"]
RADAR_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
RADAR_delta = np.round(np.max(np.abs(np.mean(d) - RADAR_cl)), 3)


plt.figure(figsize = (10, 5))

plt.scatter(x_range, lgm_dict["RAW"][sorting], label = "Raw data:" + str(np.round(np.mean(lgm_dict["RAW"]), 2)) + ", CI: ± " + str(RAW_delta))
plt.scatter(x_range, lgm_dict["MANUAL"][sorting], label = "Manual data:" + str(np.round(np.mean(lgm_dict["MANUAL"]), 2)) + ", CI: ± " + str(MANUAL_delta))
plt.scatter(x_range, lgm_dict["RADAR"][sorting], label = "RADAR data:" + str(np.round(np.mean(lgm_dict["RADAR"]), 2)) + ", CI: ± " + str(RADAR_delta))

plt.ylim([-0.1, 1.1])
plt.legend(fontsize = 25, loc = "lower right")

plt.savefig("Images/Histories/LGM_ACC_comparison.png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()



# Adding the reduction script here to continue running

In [None]:
from Scripts.essentials import *
from matplotlib.animation import FuncAnimation
import io
from PIL import Image

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_recall_curve, average_precision_score

In [None]:

title = "MANUAL"

X_all = np.concatenate([np.load(p + "train_x_"+title+".npy"), np.load(p + "val_x_"+title+".npy"), np.load(p + "test_x_"+title+".npy")])
lgm_all = np.concatenate([np.load(p + "train_lgm.npy"), np.load(p + "val_lgm.npy"), np.load(p + "test_lgm.npy")])
y_all = np.concatenate([np.load(p + "train_y_46.npy"), np.load(p + "val_y_46.npy"), np.load(p + "test_y_46.npy")])
lgm_all = np.argmax(lgm_all, axis = -1)
y_all = np.argmax(y_all, axis = -1)
lgm_all = np.where(lgm_all > 2, 0, 1)

# Get the folds
organizer = StratifiedKFold(n_splits=n_folds, shuffle = True, random_state = 42)
folds = organizer.split(X_all, y_all)
gc.collect()

In [None]:
start_lr = 0.001
decay_rate = 0.05


# Create animation of feature importance vector evolution
def plotImp(i, histories):
    plt.clf()
    fig, ax = plt.subplots(2, figsize = (20, 10))
    
    
    mean = np.mean(train_x, axis = 0) * importances[i]
    
        #ax[0].plot(mean, color = "Red", alpha = 0.5)
    ax[0].plot(importances[i], alpha =  0.1, linestyle = "--", color = "Blue")
                 
    ax[0].scatter(np.arange(1738), importances[i], color = "blue", alpha = 0.2, s = 5) # Scatter all features
    ax[0].set_ylim([0, 1.05])
        
    ax[1].fill_between(np.arange(len(transformations[i])), all_transformations[i][0], all_transformations[i][1])
    ax[1].plot(transformations[i], color = "red", alpha = 0.5)
    ax[1].set_ylim([0, 1.05])
        
    plt.title("Epoch: " + str(i) + "    Val ID Acc: " + str(np.round(histories[i][0], 2)) + "    Val LGm Acc: " + str(np.round(histories[i][1], 2)))
                
    
plt.figure(figsize = (8, 8))
for en, fold in enumerate(folds):
    
    lr = start_lr
    
    train_ix = fold[0]
    val_ix = fold[1]

    # Training set
    train_x = X_all[train_ix]
    train_y = y_all[train_ix]
    train_lgm = lgm_all[train_ix]

    # Validation set
    val_x = X_all[val_ix]
    val_y = y_all[val_ix]
    val_lgm = lgm_all[val_ix]

    # Data shuffle
    ix = np.arange(len(train_x))
    np.random.shuffle(ix)
    train_x = train_x[ix]
    train_y = train_y[ix]
    train_lgm = train_lgm[ix]

    

    hist_I = []
    hist_II = []

    # Compute the weights for each category type
    counts = np.bincount(train_y)
    class_weights = np.sqrt((1/(counts/np.max(counts))))
    
    cw_id = {}
    
    for i in range(len(class_weights)):
        cw_id[i] = class_weights[i]
        
    counts = np.bincount(train_lgm)
    class_weights = np.sqrt((1/(counts/np.max(counts))))
    
    cw_lgm = {}
    
    for i in range(len(class_weights)):
        cw_lgm[i] = class_weights[i]
    
    # Get sample wise weights to enable multi-output-balancing
    sw_id = np.array([cw_id[c] for c in train_y])
    sw_lgm = np.array([cw_lgm[c] for c in train_lgm])

    reset_seed()
    enc = make_encoder(feature_max_increment = (len(train_x)/batch_size) * epochs)
    bias_model = make_split_model(out_dims = [len(np.unique(train_y)), 2],
                                         losses = ["sparse_categorical_crossentropy", "sparse_categorical_crossentropy"])
    
    importances = []
    imp = enc.get_layer("importance").importance.numpy()
    importances.append(imp)
    
    transformations = []
    
    transf = np.squeeze(enc.predict(np.expand_dims(np.mean(train_x, axis = 0), 0)))
    transformations.append(transf)
    
    
    fig, ax = plt.subplots(1, 2, figsize = (15, 7))
    ax[0].plot(imp)
    ax[0].set_ylim([0, 1])
        
    ax[1].plot(transf)
    
    
    del transf
    gc.collect()
    
    all_transformations = []
    
    transf = np.squeeze(enc.predict(np.expand_dims(train_x[:1024], 0)))
    std = np.std(transf, axis = 0)
    mean = np.mean(transf, axis = 0)
    all_transformations.append([mean - std, mean + std])
    
    ax[1].fill_between(np.arange(1738), mean - std, mean + std, alpha = 0.5, color = "black")
    ax[1].set_ylim([0, 1])
     
    plt.show()
    
    del transf, mean, std, imp
    
    gc.collect()
    histories = []
    
    
    for repeat in range(epochs):
        
        print("Repeat:", str(repeat+1), ", alpha:", str(lr))
    
        #################
        # I
        #################
        """
        Train the model with a locked feature importance layer positioned immediately after the input
        """
        enc.trainable = False
        bias_model.trainable = True
        reset_seed()

        
        split_model = make_combined_model(enc, bias_model,
                                             lr = lr,
                                             losses = ["sparse_categorical_crossentropy", "sparse_categorical_crossentropy"])
        
        print("Train the id and MutWt models")
        hist_I.append(split_model.fit(train_x,
                         [train_y, train_lgm],                # list instead of dict
                        sample_weight=[sw_id, sw_lgm],
                        batch_size = batch_size,
                        epochs = 1,
                       validation_data=(val_x, [val_y, val_lgm])
                       ).history
        )
        
        # II
        """
        Train the Feature importance layer by unlocking it. Lock the other layers to prevent the model from immediately adjusting to the 
        learned feature transformations.
        """
        enc.trainable = True
        bias_model.trainable = False
        reset_seed()
        split_model = make_combined_model(enc, bias_model,
                                             lr = lr,
                                             losses = [negative_CE, "sparse_categorical_crossentropy"],
                                             metrics = ["accuracy"])
        
        print("Train the encoder to decrease id accuracy and maintain MutWt accuracy")
        hist_II.append(split_model.fit(train_x, 
                     [train_y, train_lgm],                # list instead of dict
                        sample_weight=[sw_id, sw_lgm],
                    batch_size = batch_size,
                    epochs = 1,
                    validation_data=(val_x, [val_y, val_lgm]) # Provide true patient ids to the validation to see how accuracy decreases on unseen data
                    ).history
                      )
    
        # Gather metrics and signals for plotting the gif later
        imp = enc.get_layer("importance").importance.numpy()
        importances.append(imp)
    
        transf = np.squeeze(enc.predict(np.expand_dims(np.mean(train_x, axis = 0), 0)))
        transformations.append(transf)
    
        fig, ax = plt.subplots(1, 2, figsize = (15, 7))
        ax[0].plot(imp)
        ax[0].set_ylim([0, 1])
        
        p = split_model.predict(val_x)
        y_1 = np.argmax(p[0], axis = 1)
        y_2 = np.argmax(p[1], axis = 1)
        
        h1 = balanced_accuracy_score(val_y, y_1)
        h2 = balanced_accuracy_score(val_lgm, y_2)
        
        histories.append([h1, h2])
        ax[1].plot(transf)
    
        del transf
    
        transf = np.squeeze(enc.predict(np.expand_dims(train_x[:1024], 0)))
        std = np.std(transf, axis = 0)
        mean = np.mean(transf, axis = 0)
        all_transformations.append([mean - std, mean + std])
    
        ax[1].fill_between(np.arange(1738), mean - std, mean + std, alpha = 0.5, color = "black")
        ax[1].set_ylim([0, 1])
        
        
        plt.show()
        
        del split_model
        del transf, mean, std, imp
        gc.collect()
    
        lr = lr - (lr * decay_rate)

        np.save("Results/Features/(MANUAL)MutantVsWildtype_importance"+str(en)+".npy", enc.get_layer("importance").importance.numpy())
        np.save("Results/Features/(MANUAL)MutantVsWildtype_maximum"+str(en)+".npy", enc.get_layer("importance").maximum.numpy())
        enc.save_weights("Models/data_encoders/(MANUAL)MutantVsWildtype_importance"+str(en)+".h5")


    plt.rcParams.update({'font.size': 20})
    spec = train_x[0]
    

    fig = plt.figure(figsize=(14, 7))
    frames = []
    for i in range(len(importances)-1):
        plotImp(i, histories)
        buf = io.BytesIO()
        plt.savefig(buf, format="png")
        plt.close()
        buf.seek(0)
        frames.append(Image.open(buf))

    # Create and save the animated GIF
    frames[0].save(
        "(MANUAL)FeatureImportanceEvolution_"+str(en)+".gif",
        save_all=True,
        append_images=frames[1:],
        duration=100,
        loop=0,
    )
    del frames, fig, spec
    gc.collect()

In [None]:
lr = 1e-5

In [None]:
p = "Data/"
title = "MANUAL"

X_all = np.concatenate([np.load(p + "train_x_"+title+".npy"), np.load(p + "val_x_"+title+".npy"), np.load(p + "test_x_"+title+".npy")])
lgm_all = np.concatenate([np.load(p + "train_lgm.npy"), np.load(p + "val_lgm.npy"), np.load(p + "test_lgm.npy")])
y_all = np.concatenate([np.load(p + "train_y_46.npy"), np.load(p + "val_y_46.npy"), np.load(p + "test_y_46.npy")])
lgm_all = np.argmax(lgm_all, axis = -1)
y_all = np.argmax(y_all, axis = -1)
lgm_all = np.where(lgm_all > 2, 0, 1)



In [None]:
def TrainModel(train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title):
    
    # Compute the weights for each category type
    counts = np.bincount(train_y)
    class_weights = np.sqrt((1/(counts/np.max(counts))))
    
    cw_id = {}
    
    for i in range(len(class_weights)):
        cw_id[i] = class_weights[i]
        
    counts = np.bincount(train_lgm)
    class_weights = np.sqrt((1/(counts/np.max(counts))))
    
    cw_lgm = {}
    
    for i in range(len(class_weights)):
        cw_lgm[i] = class_weights[i]
    
    # Get sample wise weights to enable multi-output-balancing
    sw_id = np.array([cw_id[c] for c in train_y])
    sw_lgm = np.array([cw_lgm[c] for c in train_lgm])


    # Create and train the model
    reset_seed()
    sample_model = make_split_model(lr, out_dims = [len(np.unique(train_y)), len(np.unique(train_lgm))],
                                losses = ["sparse_categorical_crossentropy", "sparse_categorical_crossentropy"])
    sample_model.summary()

    hist = sample_model.fit(train_x, {'ID': train_y, 'LGm': train_lgm}, sample_weight={'ID': sw_id, 'LGm': sw_lgm},
                            epochs = epochs, batch_size = batch_size, validation_data = (val_x, [val_y, val_lgm]))

    # Save history and model parameters
    np.save("Results/BatchEffectReduction_"+title+"_hist_"+str(en)+".npy", hist.history)
    sample_model.save_weights("Models/BatchEffectReduction_"+title+"_Model_"+str(en)+".h5")

    return sample_model

def sample_evaluation(sample_model, train_x, train_y, train_lgm, val_x, val_y, val_lgm, en, title, verbose = False):

    # get performance on each sample
    sample_dict = {}
    
    num_patients = len(np.unique(train_y))
    for i in range(num_patients):
    
        # Get data from class i of the training set
        d = np.expand_dims(np.squeeze(train_x[train_y == i]), axis = -1)
        y = np.squeeze(train_y[train_y == i])
        lgm = np.squeeze(train_lgm[train_y == i])
        _, train_loss, _, train_acc, train_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
        train_size = int(len(d))
    
        # Get data from class i of the validation set, these spectra have not been seen before
        d = np.expand_dims(np.squeeze(val_x[val_y == i]), axis = -1)
        y = np.squeeze(val_y[val_y == i])
        lgm = np.squeeze(val_lgm[val_y == i])
        _, val_loss, _, val_acc, val_acc_lgm  = sample_model.evaluate(d, [y, lgm], batch_size = 256)
        val_size = int(len(d))

    
        sample_dict[i] = np.array([train_acc, train_acc_lgm, train_size, val_acc, val_acc_lgm, val_size])
    
    # Save the metrics in a dataframe
    header = ["Sample ID", "Train Accuracy", "Train LGm Accuracy", "Train Size",
              "Validation Accuracy", "Validation LGm Accuracy", "Validation Size"]
    
    df = pd.DataFrame.from_dict(sample_dict, columns = header[1:],
                                            orient = "index")
    # Round them to two decimals
    df = df.round(decimals = 2)
    
    # Cast the number of spectra to integer, looks nicer than the decimal form with .0 after each number
    df["Train Size"] = df["Train Size"].apply(lambda x: int(x))
    df["Validation Size"] = df["Validation Size"].apply(lambda x: int(x))
    
    # Styling for convertion into latex format
    df.style.set_table_styles([
        {'selector': 'toprule', 'props': ':hline;'},
        {'selector': 'midrule', 'props': ':hline;'},
        {'selector': 'bottomrule', 'props': ':hline;'},
    ], overwrite=False)

    if verbose:
        # Print the latex table, can be copied into the editor
        latex = df.style.format(decimal=',', thousands='.', precision=2).to_latex(clines="all;data",  column_format="|l|l|")
        print(latex)
    
    # Save metrics
    np.save("Results/"+title+"_BatchEffectReduction_val_accuracy_"+str(en)+".npy", df["Validation Accuracy"].values)
    np.save("Results/"+title+"_BatchEffectReduction_all_accuracies_"+str(en)+".npy", df.values)
    

In [None]:
# Get the folds
organizer = StratifiedKFold(n_splits=n_folds, shuffle = True, random_state = 42)
folds = organizer.split(X_all, y_all)
gc.collect()

plt.figure(figsize = (8, 8))
for en, fold in enumerate(folds):
    train_ix = fold[0]
    val_ix = fold[1]

    # Training set
    train_x = X_all[train_ix]
    train_y = y_all[train_ix]
    train_lgm = lgm_all[train_ix]

    # Validation set
    val_x = X_all[val_ix]
    val_y = y_all[val_ix]
    val_lgm = lgm_all[val_ix]

    # Data shuffle
    ix = np.arange(len(train_x))
    np.random.shuffle(ix)
    train_x = train_x[ix]
    train_y = train_y[ix]
    train_lgm = train_lgm[ix]

    enc = make_encoder()
    enc.summary()
    path = "Models/data_encoders/"+title+"_MutantVsWildtype_importance"+str(en)+".h5"
    enc.load_weights(path, by_name = False)
        
    X = enc.predict(train_x)
    X_v = enc.predict(val_x)
    sample_model = TrainModel(X, train_y, train_lgm, X_v, val_y, val_lgm, en, title = title)


    # get performance on each sample
    sample_evaluation(sample_model, X, train_y, train_lgm, X_v, val_y, val_lgm, en, title = title, verbose = False)


    del train_x
    del train_y
    del val_x
    del val_y
    X, X_v
    del sample_model
    gc.collect()

In [None]:
X_v.shape

In [None]:
id_dict = {}
lgm_dict = {}

for title in ["MANUAL", "RAW"]:

    accs = []
    for en in range(n_folds):
        #hist = np.load("Results/FullTest_"+title+"_hist_"+str(en)+".npy", allow_pickle = True)
        acc = np.load("Results/"+title+"all_accuracies_"+str(en)+".npy", allow_pickle = True)
        accs.append(acc[:, -3:-1])

    accs = np.array(accs)

    mean = np.mean(accs, axis = 0)
    id_dict[title] = mean.T[0]
    lgm_dict[title] = mean.T[1]

accs = []
title = "MANUAL"
for en in range(n_folds):
    acc = np.load("Results/"+title+"_BatchEffectReduction_all_accuracies_"+str(en)+".npy")
    accs.append(acc[:, -3:-1])

accs = np.array(accs)

mean = np.mean(accs, axis = 0)
id_dict[title+"_After"] = mean.T[0]
lgm_dict[title+"_After"] = mean.T[1]

In [None]:
import scipy.stats as stats

plt.rcParams.update({'font.size': 40})
plt.rcParams["font.family"] = "Times New Roman"

################
# ID accuracies
################

sorting = np.argsort(id_dict["MANUAL_After"])
x_range = np.arange(len(id_dict["RAW"]))

plt.figure(figsize = (10, 5))


conf_level = 0.95
d = id_dict["RAW"]
RAW_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
RAW_delta = np.round(np.max(np.abs(np.mean(d) - RAW_cl)), 3)
d = id_dict["MANUAL"]
MANUAL_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
MANUAL_delta = np.round(np.max(np.abs(np.mean(d) - MANUAL_cl)), 3)
d = id_dict["MANUAL_After"]
RADAR_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
RADAR_delta = np.round(np.max(np.abs(np.mean(d) - RADAR_cl)), 3)


plt.scatter(x_range, id_dict["RAW"][sorting], label = "Raw data:" + str(np.round(np.mean(id_dict["RAW"]), 2)) + ", CI: ± " + str(RAW_delta))
plt.scatter(x_range, id_dict["MANUAL"][sorting], label = "Manual Before:" + str(np.round(np.mean(id_dict["MANUAL"]), 2)) + ", CI: ± " + str(MANUAL_delta))
plt.scatter(x_range, id_dict["MANUAL_After"][sorting], label = "Manual After:" + str(np.round(np.mean(id_dict["MANUAL_After"]), 2)) + ", CI: ± " + str(RADAR_delta))

plt.ylim([-0.1, 1.1])
plt.legend(fontsize = 20, loc = "upper left")

plt.savefig("Images/Histories/Corrected_ID_ACC_comparison.png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()



################
# LGm accuracies
################

sorting = np.argsort(lgm_dict["MANUAL_After"])


d = lgm_dict["RAW"]
RAW_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
RAW_delta = np.round(np.max(np.abs(np.mean(d) - RAW_cl)), 3)
d = lgm_dict["MANUAL"]
MANUAL_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
MANUAL_delta = np.round(np.max(np.abs(np.mean(d) - MANUAL_cl)), 3)
d = lgm_dict["MANUAL_After"]
RADAR_cl = stats.t.interval(conf_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))
RADAR_delta = np.round(np.max(np.abs(np.mean(d) - RADAR_cl)), 3)


plt.figure(figsize = (10, 5))

plt.scatter(x_range, lgm_dict["RAW"][sorting], label = "Raw data:" + str(np.round(np.mean(lgm_dict["RAW"]), 2)) + ", CI: ± " + str(RAW_delta))
plt.scatter(x_range, lgm_dict["MANUAL"][sorting], label = "Manual Before:" + str(np.round(np.mean(lgm_dict["MANUAL"]), 2)) + ", CI: ± " + str(MANUAL_delta))
plt.scatter(x_range, lgm_dict["MANUAL_After"][sorting], label = "Manual After:" + str(np.round(np.mean(lgm_dict["MANUAL_After"]), 2)) + ", CI: ± " + str(RADAR_delta))

plt.ylim([-0.1, 1.1])
plt.legend(fontsize = 20, loc = "lower right")

plt.savefig("Images/Histories/Corrected_LGM_ACC_comparison.png", format="png", transparent = True,
                    dpi = 1000,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()

