In [None]:
import matplotlib.colors
from Scripts.essentials import *

## Load the glioma samples

In [None]:
data_path = "Data/RawData/"

data = {}
LGm = {}
for lgm in os.listdir(data_path):
    if lgm in ["LGm-1", "LGm-2", "LGm-3", "LGm-4", "LGm-5", "LGm-6"]:
        sample_path = data_path + lgm + "/"
        print(sample_path)
        for sample in os.listdir(sample_path):
                data[sample] = np.load(sample_path+sample)
                LGm[sample] = int(lgm[-1])
                print(sample, ":", data[sample].shape)


In [None]:
## Train one pca-model for the entire dataset first

# Get the fine-tuned ensemble model
ensemble_model = make_ensemble()
ensemble_model.load_weights(filepath = "Models/ensembleModelRes.h5")


# Spectral data
new_flat_data = np.empty((0, 1738))
flat_data_RADAR = np.empty((0, 1738))

# Labels for the spectra
patient_id = np.empty((0))
lgm_labels = np.empty((0))

# Now we will re-load and store the data in lists, this makes loading the data easier
for key in data.keys():

    # Get the sample
    sample = data[key]
    shape = sample.shape

    # Flatten
    norm_data = sample.reshape((-1, 1738))

    # Normalization
    _min = np.expand_dims(np.min(norm_data, axis = 1), -1)
    _max = np.expand_dims(np.max(norm_data, axis = 1), -1)
    norm_data = (norm_data - _min) / (_max - _min)
    
    # Save the tumor spectra
    new_flat_data = np.concatenate([new_flat_data, norm_data])
    flat_data_RADAR = np.concatenate([flat_data_RADAR,
                                      normalize(ensemble_model.predict(norm_data, batch_size = 256)[-1])])
    patient_id = np.concatenate([patient_id, np.array([key for i in range(len(norm_data))])])
    lgm_labels = np.concatenate([lgm_labels, np.array([LGm[key] for i in range(len(norm_data))])])


In [None]:
# Save the data for other experiments
np.save("Data/FlatData.npy", new_flat_data)
np.save("Data/FlatDataRADAR.npy", flat_data_RADAR)
np.save("Data/patient_id.npy", patient_id)
np.save("Data/lgm_labels.npy", lgm_labels)

In [None]:
# For comparison, we also get the corrections from the Kazemzadeh et al. model

cascade = tf.keras.models.load_model("Models/unet_three.23-64.96.h5")

new_flat_data = np.load("Data/FlatData.npy")

denoised_1 = np.squeeze(cascade.predict(np.expand_dims(new_flat_data[:, :1024], -1) * 1000,
                                        batch_size = 256)[-1]/1000)
np.save("Data/denoised1.npy", denoised_1) # Saved in case of OOM exception while running

denoised_2 = np.squeeze(cascade.predict(np.expand_dims(new_flat_data[:, -1024 :], -1) * 1000,
                                        batch_size = 256)[-1]/1000)
np.save("Data/denoised2.npy", denoised_2)

# Join the predictions in the middle
denoised_1 = np.load("Data/denoised1.npy") 
denoised = np.hstack((denoised_1, denoised_2[:, 1024 - (1738 - 1024):]))

np.save("Data/FlatDataKAZEM.npy", normalize(denoised))

In [None]:
# Ad-hoc solution, manually prepare the data based on the peak locations
new_flat_data = np.load("Data/FlatData.npy")

# Display peak-locations
ranges = [[240, 350], [360, 410], [430, 610], [635, 655], [705, 745], [1175, 1260], [1370, 1400], [1430, 1550]]

# Curated sample
curated = np.zeros_like(new_flat_data)

# Get the peaks
for r in ranges:
    peak = new_flat_data[:, r[0]: r[1] + 1]

    
    start = np.median(peak[:, 0:10], axis = 1)
    end = np.median(peak[:, -10:], axis = 1)

    # The baseline at peak location is a linear line between the medians of the left and right ends, estimated by the 10 first and last points
    bl = np.linspace(start, end, r[1] - r[0] + 1, axis = -1)

    # Remove the baseline
    peak -= bl

    # Insert the peak
    curated[:, r[0]:r[1]+1] = peak

# Clip all negative values to 0 and normalize the spectra
curated = np.clip(curated, 0, 1)
curated = normalize(curated)

# Save the data
np.save("Data/FlatDataMANUAL.npy", normalize(curated))