# Notebook 4
Copy and downsample tiff files obtained from the registration process for the sample app 

### Load important modules

In [1]:
# Standard modules
import numpy as np
import matplotlib.pyplot as plt
import shutil
import os
from skimage import io
from xtiff import to_tiff

# multithreading
from threadpoolctl import threadpool_limits

# Move to root directory for easier module import
os.chdir("../../")

# set thread limit
threadpool_limits(16)

<threadpoolctl.threadpool_limits at 0x7fe6a91e3f70>

### Copy the original slice images, keeping only the last channel

In [2]:
path_output = "data_sample/tiff_files/original_data/"
path_input = "data/tiff_files/original_data/"

# Create directory for sampled tiff files
os.makedirs("data_sample/tiff_files/original_data", exist_ok=True)

# Compute number of slice
n_slices = len([x for x in os.listdir(path_input) if "slice_" in x])

# Loop over slices
for i in range(n_slices):
     
    # Load as numpy array
    slice_array = np.array(io.imread(path_input + "slice_" + str(i + 1) + ".tiff"), dtype=np.int32)[:, :, 2]
    # Record as tiff in the sample directory
    to_tiff(
        slice_array,
        path_output + "slice_" + str(i + 1) + ".tiff",
        image_name="slice_" + str(i+1),
        pixel_size=25,
    )




### Copy the original coordinates files as float16 arrays

In [5]:
# Create path for input and output for original coordinates tiff files
path_input = "data/tiff_files/coordinates_original_data/"
path_output = "data_sample/tiff_files/coordinates_original_data/"

# Create directory for sampled coordinates tiff files
os.makedirs(path_output, exist_ok=True)

# Loop over slices
for i in range(n_slices):
    appendix = [
            x
            for x in os.listdir(path_input)
            if str(i + 1) == x.split("slice_")[1].split(".tiff")[0]
        ][0]

    # Float 16 to gain space
    original_coor = np.array(io.imread(path_input + appendix  ), dtype=np.float16)

    # Save file as numpy array
    np.save(path_output + appendix[:-3] + 'npy', original_coor)




### Downsample and copy the warped data as a numpy array


In [18]:
array_coordinates_warped_data = np.array(io.imread("data/tiff_files/coordinates_warped_data.tif"), dtype=np.float16)
print(round(array_coordinates_warped_data[:,::2,::2,:].nbytes / 1024 / 1024, 2))
print(array_coordinates_warped_data[:,::2,::2,:].shape)
np.savez_compressed("data_sample/tiff_files/coordinates_warped_data.npz", array_coordinates_warped_data)

55.13
(32, 459, 656, 3)


### Keep only most prevalent lipids

In [None]:
# Merge columns 'name' 'structure' and 'cations'
df_annotation["full_name_lipid"] = (
    df_annotation["name"] + df_annotation["structure"] + df_annotation["cation"]
)
df_annotation = df_annotation.drop(["name", "structure", "cation"], axis=1)


In [None]:
# Keep only lipids present in all slices and remove all others
l_most_prevalent_lipids = list(df_annotation["full_name_lipid"].value_counts()[:60].index)
df_annotation = df_annotation[df_annotation["full_name_lipid"].isin(l_most_prevalent_lipids)]
df_annotation


### Get the corresponding images

In [None]:
# Dictionnary containing the list of lipid image (among most prevalent ones) for each slice
dic_l_images = {}
data = MaldiData()

# Loop over slices
for slice_index in data.get_slice_list(indices="brain_2"):
    print(slice_index, " started.")

    # Get lipid annotation for the current slice
    df_currentSlice = df_annotation[df_annotation["slice"] == slice_index]

    # Compute lipid distribution for all annotated lipids
    l_images = []
    for lipid in l_most_prevalent_lipids:
        row_lipid = df_currentSlice[df_currentSlice["full_name_lipid"] == lipid]
        # Sum over lipids that have similar annotation
        im_lipid = np.nansum(
            [
                compute_normalized_image_per_lipid(
                    row[1]["min"],
                    row[1]["max"],
                    data.get_array_spectra(slice_index),
                    data.get_array_lookup_pixels(slice_index),
                    data.get_image_shape(slice_index),
                    data.get_array_lookup_mz(slice_index),
                    data.get_array_cumulated_lookup_mz_image(slice_index),
                    data.get_divider_lookup(slice_index),
                    data.get_array_peaks_transformed_lipids(slice_index),
                    data.get_array_corrective_factors(slice_index),
                    apply_transform=False,
                    percentile_normalization=99,
                    RGB_channel_format=True,
                )
                for row in row_lipid.iterrows()
            ],
            axis=0,
        )
        l_images.append([im_lipid, lipid])
    dic_l_images[slice_index] = l_images


### Display results and choose the lipids that represent the brain best

In [None]:
# Register total ion content + main lipids in a jpg file
l_slice_indexs = data.get_slice_list(indices="brain_2")

# Build a huge canvas with lipids as rows and slices as columns
fig, ll_ax = plt.subplots(
    len(l_most_prevalent_lipids[:100]),
    len(l_slice_indexs),
    figsize=(160, len(l_most_prevalent_lipids[:100]) * 4),
)

# Loop over slices and add the lipid images in each column
for i, index in enumerate(l_slice_indexs):

    # Compute TIC in first row
    ll_ax[0, i].imshow(
        np.sum(
            [
                image[0]
                for index_lipid, image in enumerate(dic_l_images[index])
                if len(image[0].shape) > 0
            ],
            axis=0,
        )
    )
    ll_ax[0, i].axis("off")
    ll_ax[0, i].set_title("slice: " + str(index) + ", TIC ")

    # Invidiual lipids for remaining rows
    for j, image in enumerate(dic_l_images[index][:99]):
        try:
            ll_ax[j + 1, i].imshow(image[0])
            ll_ax[j + 1, i].axis("off")
            ll_ax[j + 1, i].set_title(
                "slice: " + str(index) + ", lipid: " + image[1] + " (" + str(j) + ")"
            )
        except:
            pass

plt.tight_layout()
plt.savefig("notebooks/data_processing/data/images/all_slices.jpg", dpi=72)
# fig.subplots_adjust(hspace=0.1)
plt.close()


In [None]:
# Define the lipids index we want to keep for registration
# NB: this must be redone everytime!
ll_index_to_keep = [
    [
        4,
        9,
        10,
        14,
        43,
    ],  # Red, full brain
    [
        29,
        34,
        12,
        21,
        24,
    ],  # Green, specific structures
    list(dic_l_images.keys()),  # Blue, TIC
]


In [None]:
# Get an overview of the quality of the slices used for the registration with the current lipid selection
l_slice_indexs = data.get_slice_list(indices="brain_2")[::4]
l_cmap = ["Reds", "Greens", "Blues"]
fig, ll_ax = plt.subplots(3, len(l_slice_indexs), figsize=(30, 10))

# Loop over slices and compute image for each lipid selection
for i, index in enumerate(l_slice_indexs):
    for j, l_index_to_keep in enumerate(ll_index_to_keep):
        image = np.sum(
            [
                image[0]
                for index_lipid, image in enumerate(dic_l_images[index])
                if index_lipid in l_index_to_keep and len(image[0].shape) > 0
            ],
            axis=0,
        )
        image_norm = image / np.max(image) * 255
        ll_ax[j, i].imshow(image_norm, cmap=l_cmap[j])
        ll_ax[j, i].axis("off")
        ll_ax[j, i].set_title("slice: " + str(index))

plt.savefig("notebooks/data_processing/data/images/lipidschannels.png")
plt.show()


### Record the lipid selection as RGB image under tiff format for each slice

In [None]:
# Loop over slices
for slice_index in data.get_slice_list(indices="brain_2"):
    image_tiff = []
    # Loop over lipid selection
    for j, l_index_to_keep in enumerate(ll_index_to_keep):
        image = np.sum(
            [
                image[0]
                for index_lipid, image in enumerate(dic_l_images[slice_index])
                if index_lipid in l_index_to_keep and len(image[0].shape) > 0
            ],
            axis=0,
        )
        image_norm = image / np.max(image) * 255
        image_tiff.append(image_norm)

    # Export to tiff
    to_tiff(
        image_tiff,
        "notebooks/data_processing/data/images/tiff/slice_" + str(slice_index) + ".tiff",
        image_name="slice_" + str(slice_index),
        pixel_size=25,
    )
