# Notebook 1
Raw data export into np.memaps.

### Load important modules

In [1]:
# Standard modules
import numpy as np
import os
import lzma
import pickle

# Move to root directory for easier module handling
os.chdir("../..")
print(os.listdir("."))
from modules.tools import maldi_conversion 
from modules.tools import lookup_tables
from modules.tools.misc import delete_all_files_in_folder

# multithreading/multiprocessing
from multiprocessing import Pool
from threadpoolctl import threadpool_limits

# set thread limit
threadpool_limits(16)

# Define if the app uses only MAIA-transformed lipids
maldi_conversion.SAMPLE_APP = False
if maldi_conversion.SAMPLE_APP:
    lookup_tables.DIVIDER_LOOKUP = 600

['.git', '.gitattributes', '.vscode', 'LICENSE.md', 'TODO.py', 'app.py', 'assets', 'config.py', 'data_sample', 'documentation', 'index.py', 'js', 'main.py', 'modules', 'notebooks', 'pages', 'readme', 'requirements.txt', 'style', 'data', '__pycache__', 'nohup.out', 'README.md', '.ipynb_checkpoints', '.gitignore']


### Create a list of raw data filenames

In [2]:
path_brain_1 = "/data/lipidatlas/data/data_raw/BRAIN1/"
path_brain_2 = "/data/lipidatlas/data/data_raw/BRAIN2/"
path_brain_1_temp = "/data/lipidatlas/data/app/data/temp/brain_1"
path_brain_2_temp = "/data/lipidatlas/data/app/data/temp/brain_2"
split_value_1 = "MouseBrainCMC_S"
split_value_2 = "MouseBrain2_S"
ll_t_names = []
for path_brain, path_brain_temp, split_value in zip(
    [path_brain_1, path_brain_2],
    [path_brain_1_temp, path_brain_2_temp],
    [split_value_1, split_value_2],
):
    # Load filenames
    l_t_names = sorted(
        [
            [
                int(name.split(split_value)[1].split("_")[0].split("A")[0].split("(")[0]),
                path_brain + name + "/" + name,
            ]
            for name in os.listdir(path_brain)
            if "MouseBrain" in name
        ]
    )

    # Correct for duplicates
    for t_names_1, t_names_2 in zip(l_t_names[:-1], l_t_names[1:]):
        if t_names_2[0] == t_names_1[0]:
            t_names_2.append("bis")
            print("WARNING: duplicate for slice " + str(t_names_1[0]))

    # Remove slices that have already been processed
    os.makedirs(path_brain_temp, exist_ok=True)
    remove_already_loaded = False
    if remove_already_loaded:
        existing_names = [
            int(name.split("_")[1][:-7]) for name in os.listdir(path_brain_temp) if "raw" in name
        ]
        l_t_names = [x for x in l_t_names if x[0] not in existing_names]

    # Print the final list of names
    for t_names in l_t_names:
        print(t_names[0], t_names[1].split("/")[-1])

    ll_t_names.append(l_t_names)


1 20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25um
2 20210211_MouseBrainCMC_S2AB5_2Dpixelmode_370x214_Att25_25um
3 20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25um
4 20210214_MouseBrainCMC_S4AD3_2Dpixelmode_354x228_Att25_25um
5 20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25um
6 20210219_MouseBrainCMC_S6AE3_2Dpixelmode_423x282_Att25_25um
7 20210220_MouseBrainCMC_S7AF5_2Dpixelmode_427x263_Att25_25um
8 20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um
9 20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25um
10 20210210_MouseBrainCMC_S10(brain2_20)_394x282_Att30_25um
11 20210301_MouseBrainCMC_S11AK5_2Dpixelmode_448x277_Att25_25um
12 20210303_MouseBrainCMC_S12AL1_2Dpixelmode_393x266_Att25_25um
13 20210304_MouseBrainCMC_S13AM1_2Dpixelmode_413x310_Att25_25um
14 20210305_MouseBrainCMC_S14AN1_2Dpixelmode_409x285_Att25_25um
15 20210313_MouseBrainCMC_S15AO2_2Dpixelmode_451x292_Att25_25um
16 20210530_MouseBrainCMC_S16_duplicate_2Dpixelmode_454

In [3]:
brain_1 = False
if brain_1:
    l_t_names = ll_t_names[0]
else:
    l_t_names = ll_t_names[1]

# Print the final list of names
for t_names in l_t_names:
    print(t_names[0], t_names[1].split("/")[-1])


11 20211201_MouseBrain2_S11_306x248_Att30_25um
12 20211202_MouseBrain2_S12_332x246_Att30_25um
13 20211203_MouseBrain2_S13_319x262_Att30_25um
14 20211206_MouseBrain2_S14_354x299_Att30_25um
15 20211209_MouseBrain2_S15_359x281_Att30_25um
16 20220127_MouseBrain2_S16_398x303_Att30_25um
17 20220105_MouseBrain2_S17_395x294_Att32_25um
18 20220106_MouseBrain2_S18_393x309_Att32_25um
19 20211210_MouseBrain2_S19_423x320_Att32_25um
20 20220321_MouseBrain2_S20_Duplicate_443x313_Att30_25um
21 20211213_MouseBrain2_S21_412x360_Att30_25um
22 20220107_MouseBrain2_S22_417x310_Att32_25um
23 20220224_MouseBrain2_S23_427x319_Att30_25um
24 20220115_MouseBrain2_S24_427x322_Att30_25um
25 20220311_MouseBrain2_S25_duplicate_476x336_Att30_25um
26 20220120_MouseBrain2_S26_415x315_Att30_25um
27 20220122_MouseBrain2_S27_443x301_Att30_25um
28 20220125_MouseBrain2_S28_312x431_Att30_25um
29 20220126_MouseBrain2_S29_410x290_Att30_25um
30 20220111_MouseBrain2_S30_400x248_Att32_25um
31 20220124_MouseBrain2_S31_394x261_Att3


### Extract raw data into numpy arrays with multiprocessing

In [4]:
if False:
    multiprocessing = True
    if multiprocessing:
        with Pool(processes=3) as pool:
            [x for x in pool.imap_unordered(maldi_conversion.extract_raw_data, l_t_names)]
    else:
        # Normal (single-processed) map
        [x for x in map(maldi_conversion.extract_raw_data, l_t_names)]


### Remove slices already processed

In [5]:
if False:
    path_brain_temp = (
        "/data/lipidatlas/data/app/data/temp/brain_1"
        if brain_1
        else "/data/lipidatlas/data/app/data/temp/brain_2"
    )
    existing_names = [
        int(name.split("_")[1][:-4]) for name in os.listdir(path_brain_temp) if "raw" not in name
    ]
    l_t_names = [x for x in l_t_names if x[0] not in existing_names]
    # Print the final list of names
    for t_names in l_t_names:
        print(t_names[0], t_names[1].split("/")[-1])


### Process raw data into numpy arrays with multiprocessing

In [6]:
multiprocessing = True
if multiprocessing:
    with Pool(processes=12) as pool:
        [x for x in pool.imap_unordered(maldi_conversion.process_raw_data, l_t_names)]
else:
    # Normal (single-processed) map
    [x for x in map(maldi_conversion.process_raw_data, l_t_names)]


Compute and normalize pixels values according to TIC
Compute and normalize pixels values according to TIC
Compute and normalize pixels values according to TIC
Filtering out noise and matrix peaks
Compute and normalize pixels values according to TIC
Compute and normalize pixels values according to TIC
Compute and normalize pixels values according to TIC
Filtering out noise and matrix peaks
Filtering out noise and matrix peaks
Filtering out noise and matrix peaks
Filtering out noise and matrix peaks
Filtering out noise and matrix peaks
Prepare data for standardization
Prepare data for standardization
Compute and normalize pixels values according to TIC
Compute and normalize pixels values according to TIC
Prepare data for standardization
Prepare data for standardization
Filtering out noise and matrix peaks
Standardize data
Prepare data for standardization
Prepare data for standardization


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Standardize data
Sorting by m/z value for averaging


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Getting spectrums array averaged accross pixels
Filtering out noise and matrix peaks
Build the low-resolution averaged array from the high resolution averaged array
Standardize data
Getting spectrums array averaged accross pixels
Getting spectrums array averaged accross pixels
Sorting by m/z value for averaging after standardization


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Build the low-resolution averaged array from the high resolution averaged array
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Sorting by m/z value for averaging after standardization
Prepare data for standardization
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Compute and normalize pixels values according to TIC
Getting spectrums array averaged accross pixels
Standardize data
Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging
Getting spectrums array averaged accross pixels
Sorting by m/z value for averaging
Sorting by m/z value for averaging after standardization
Double sorting according to pixel and mz high-res array
Getting spectrums array averaged accross pixels
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20211201_MouseBrain2_S11_306x248_Att30_25um/20211201_MouseBrain2_S11_306x248_Att30_25um
Double sorting according to pixel and mz high-res array
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20211203_MouseBrain2_S13_319x262_Att30_25um/20211203_MouseBrain2_S13_319x262_Att30_25um
Filtering out noise and matrix peaks
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220127_MouseBr

  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging after standardization
Sorting by m/z value for averaging
Compute and normalize pixels values according to TIC
Getting spectrums array averaged accross pixels
Prepare data for standardization
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Build the low-resolution averaged array from the high resolution averaged array
Compute and normalize pixels values according to TIC
Sorting by m/z value for averaging after standardization
Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Filtering out noise and matrix peaks
Sorting by m/z value for averaging
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220105_MouseBrain2_S17_395x294_Att32_25um/20220105_MouseBrain2_S17_395x294_Att32_25um
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220321_MouseBrain2_S20_Duplicate_443x313_Att30_25um/20220321_MouseBrain2_S20_Duplicate_443x313_Att30_25um
Filtering out noise and matrix peaks
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Getting spectrums array averaged accross pixels
Compute and normalize pixels values according to TIC
Build the low-resolution averaged array from the high resolution averaged array
Standardize data
Sorting by m/z value for averaging after standardization


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Getting corresponding spectra arrays
Prepare data for standardization
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220106_MouseBrain2_S18_393x309_Att32_25um/20220106_MouseBrain2_S18_393x309_Att32_25um
Prepare data for standardization
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Filtering out noise and matrix peaks
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20211209_MouseBrain2_S15_359x281_Att30_25um/20211209_MouseBrain2_S15_359x281_Att30_25um
Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Getting spectrums array averaged accross pixels
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting according to pixel and mz high-res array
Sorting by m/z value for averaging after standardization
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Getting spectrums array averaged accross pixels
Sorting by m/z value for averaging after standardization
Double sorting according to pixel and mz high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220107_MouseBrain2_S22_417x310_Att32_25um/20220107_MouseBrain2_S22_417x310_Att32_25um
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20211206_MouseBrain2_S14_354x299_Att30_25um/20211206_MouseBrain2_S14_354x299_Att30_25um
Getting spectrums array averaged accross pi

  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Prepare data for standardization
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Compute and normalize pixels values according to TIC
Filtering out noise and matrix peaks
Compute and normalize pixels values according to TIC
Getting spectrums array averaged accross pixels
Getting spectrums array averaged accross pixels
Prepare data for standardization
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Double sorting according to pixel and mz high-res array
Filtering out noise and matrix peaks
Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Getting spectrums array averaged accross pixels
Sorting by m/z value for averaging
Double sorting according to pixel and mz high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20211213_MouseBrain2_S21_412x360_Att30_25um/20211213_MouseBrain2_S21_412x360_Att30_25um
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Prepare data for standardization
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220224_MouseBrain2_S23_427x319_Att30_25um/20220224_MouseBrain2_S23_427x319_Att30_25um
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220111_MouseBrain2_S30_400x248_Att32_25um/20220111_MouseBrain2_S30_400x248_Att32_25um
Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Compute and normalize pixels values according to TIC
Sorting by m/z value for averaging
Filtering out noise and matrix peaks
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Prepare data for standardization
Standardize data
Getting spectrums array averaged accross pixels


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Double sorting according to pixel and mz high-res array
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220115_MouseBrain2_S24_427x322_Att30_25um/20220115_MouseBrain2_S24_427x322_Att30_25um
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Compute and normalize pixels values according to TIC
Compute and normalize pixels values according to TIC
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220124_MouseBrain2_S31_394x261_Att30_25um/20220124_MouseBrain2_S31_394x261_Att30_25um
Filtering out noise and matrix peaks
Filtering out noise and matrix peaks
Compute and normalize pixels values according to TIC
Compute and normalize pixels values accordi

  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Filtering out noise and matrix peaks
Sorting by m/z value for averaging
Prepare data for standardization
Prepare data for standardization
Getting spectrums array averaged accross pixels
Standardize data
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Compute and normalize pixels values according to TIC


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Prepare data for standardization
Standardize data
Getting spectrums array averaged accross pixels


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Filtering out noise and matrix peaks
Double sorting according to pixel and mz high-res array
Sorting by m/z value for averaging
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Getting spectrums array averaged accross pixels
Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Double sorting according to pixel and mz high-res array
Sorting by m/z value for averaging
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220125_MouseBrain2_S28_312x431_Att30_25um/20220125_MouseBrain2_S28_312x431_Att30_25um
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Getting spectrums array averaged accross pixels
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220120_MouseBrain2_S26_415x315_Att30_25um/20220120_MouseBrain2_S26_415x315_Att30_25um
Compute and normalize pixels values according to TIC
Double sorting according to pixel and mz high-res array
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Prepare data for standardization
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Filtering out noise and matrix peaks
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220311

  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Double sorting according to pixel and mz high-res array
Sorting by m/z value for averaging
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Prepare data for standardization
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220130_MouseBrain2_S32_370x325_Att30_25um/20220130_MouseBrain2_S32_370x325_Att30_25um
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220122_MouseBrain2_S27_443x301_Att30_25um/20220122_MouseBrain2_S27_443x301_Att30_25um
Compute and normalize pixels values according to TIC
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Filtering out noise and matrix peaks
Standardize data


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo), dtype=np.float16


Sorting by m/z value for averaging
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220201_MouseBrain2_S33_359x314_Att30_25um/20220201_MouseBrain2_S33_359x314_Att30_25um
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Prepare data for standardization
Compute and normalize pixels values according to TIC
Standardize data
Sorting by m/z value for averaging
Filtering out noise and matrix peaks
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN2/20220203_MouseBrain2_S34_377x322_Att30_25um/20220203_MouseBr

### Build lookup tables

In [7]:
multiprocessing = False
if multiprocessing:
    # Multiprocessing
    with Pool(processes=12) as pool:
        [x for x in pool.map(lookup_tables.process_lookup_tables, l_t_names)]
else:
    # Normal (single-processed) map
    [x for x in map(lookup_tables.process_lookup_tables, l_t_names)]


Size (in mb) of lookup_table_spectra_high_res:  578.98
Shape of lookup_table_spectra_high_res:  (2000, 75888)
Size (in mb) of cumulated_image_lookup_table_high_res:  578.98
Shape of cumulated_image_lookup_table_high_res:  (2000, 248, 306)
Size (in mb) of lookup_table_averaged_spectrum_high_res:  0.01
Shape of lookup_table_averaged_spectrum_high_res:  (2000,)
Saving...
Size (in mb) of lookup_table_spectra_high_res:  623.11
Shape of lookup_table_spectra_high_res:  (2000, 81672)
Size (in mb) of cumulated_image_lookup_table_high_res:  623.11
Shape of cumulated_image_lookup_table_high_res:  (2000, 246, 332)
Size (in mb) of lookup_table_averaged_spectrum_high_res:  0.01
Shape of lookup_table_averaged_spectrum_high_res:  (2000,)
Saving...
Size (in mb) of lookup_table_spectra_high_res:  637.65
Shape of lookup_table_spectra_high_res:  (2000, 83578)
Size (in mb) of cumulated_image_lookup_table_high_res:  637.65
Shape of cumulated_image_lookup_table_high_res:  (2000, 262, 319)
Size (in mb) of loo

### Record everything and clean 

Record everything in memap files and a pickled dictonnary

In [9]:
if maldi_conversion.SAMPLE_APP:
    output_folder = "data_sample/whole_dataset/"
else :
    output_folder = "data/whole_dataset/"  

os.makedirs(output_folder, exist_ok=True)

dic_slices = {}
# Loop over input folders
for brain_1, input_folder in zip(
    [True, False],
    [
        "/data/lipidatlas/data/app/data/temp/brain_1/",
        "/data/lipidatlas/data/app/data/temp/brain_2/",
    ],
):

    # Loop over slice files
    for slice_name in os.listdir(input_folder):
        if "raw" in slice_name or "checkpoints" in slice_name:
            continue

        # Extract slice index
        slice_index = int(slice_name.split("_")[1][:-4])

        # Load slice arrays
        npzfile = np.load(input_folder + slice_name)
        array_pixel_indexes_high_res = npzfile["array_pixel_indexes_high_res"]
        array_spectra_high_res = npzfile["array_spectra_high_res"]
        array_averaged_mz_intensity_low_res = npzfile["array_averaged_mz_intensity_low_res"]
        array_averaged_mz_intensity_high_res = npzfile["array_averaged_mz_intensity_high_res"]
        array_averaged_mz_intensity_high_res_after_standardization = npzfile[
            "array_averaged_mz_intensity_high_res_after_standardization"
        ]
        image_shape = npzfile["image_shape"]
        divider_lookup = npzfile["divider_lookup"]
        lookup_table_spectra_high_res = npzfile["lookup_table_spectra_high_res"]
        cumulated_image_lookup_table_high_res = npzfile["cumulated_image_lookup_table_high_res"]
        lookup_table_averaged_spectrum_high_res = npzfile["lookup_table_averaged_spectrum_high_res"]
        array_peaks_corrected = npzfile["array_peaks_corrected"]
        array_corrective_factors = npzfile["array_corrective_factors"]

        # print size used by each array in mb
        print("array_pixel_indexes_high_res, dic",round(array_pixel_indexes_high_res.nbytes / 1024 / 1024, 2))
        print("array_spectra_high_res, mmap",round(array_spectra_high_res.nbytes / 1024 / 1024, 2))
        print("array_averaged_mz_intensity_low_res, dic",round(array_averaged_mz_intensity_low_res.nbytes / 1024 / 1024, 2))
        print("array_averaged_mz_intensity_high_res, mmap",round(array_averaged_mz_intensity_high_res.nbytes / 1024 / 1024, 2))
        print("array_averaged_mz_intensity_high_res_after_standardization, mmap",round(array_averaged_mz_intensity_high_res_after_standardization.nbytes / 1024 / 1024, 2))
        print("lookup_table_spectra_high_res, mmap",round(lookup_table_spectra_high_res.nbytes / 1024 / 1024, 2))
        print("cumulated_image_lookup_table_high_res, mmap",round(cumulated_image_lookup_table_high_res.nbytes / 1024 / 1024, 2))
        print("lookup_table_averaged_spectrum_high_res, dic",round(lookup_table_averaged_spectrum_high_res.nbytes / 1024 / 1024, 2))
        print("array_peaks_corrected, dic",round(array_peaks_corrected.nbytes / 1024 / 1024, 2))
        print("array_corrective_factors, dic",round(array_corrective_factors.nbytes / 1024 / 1024, 2))

        # Update slice index for brain 2
        if not brain_1:
            slice_index += 22

        print(slice_name)

        if not maldi_conversion.SAMPLE_APP:
            # Register the lightweights files in a pickled dictionnary
            dic_slices[slice_index] = {
                "image_shape": image_shape,
                "divider_lookup": divider_lookup,
                "array_avg_spectrum_downsampled": array_averaged_mz_intensity_low_res,
                "array_lookup_pixels": array_pixel_indexes_high_res,
                "array_lookup_mz_avg": lookup_table_averaged_spectrum_high_res,
                "array_peaks_transformed_lipids": array_peaks_corrected,
            }

            try:
                # Build a memap for each of the heavier files to save RAM, save the corresponding shape in the
                # pickled dictionnary
                fp = np.memmap(
                    output_folder + "array_spectra_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_spectra_high_res.shape,
                )
                fp[:] = array_spectra_high_res[:]
                fp.flush()
                dic_slices[slice_index]["array_spectra_shape"] = array_spectra_high_res.shape

                fp = np.memmap(
                    output_folder + "array_avg_spectrum_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_averaged_mz_intensity_high_res.shape,
                )
                fp[:] = array_averaged_mz_intensity_high_res[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_avg_spectrum_shape"
                ] = array_averaged_mz_intensity_high_res.shape

                fp = np.memmap(
                    output_folder
                    + "array_avg_spectrum_after_standardization_"
                    + str(slice_index)
                    + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_averaged_mz_intensity_high_res_after_standardization.shape,
                )
                fp[:] = array_averaged_mz_intensity_high_res_after_standardization[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_avg_spectrum_after_standardization_shape"
                ] = array_averaged_mz_intensity_high_res_after_standardization.shape

                fp = np.memmap(
                    output_folder + "array_lookup_mz_" + str(slice_index) + ".mmap",
                    dtype="int32",
                    mode="w+",
                    shape=lookup_table_spectra_high_res.shape,
                )
                fp[:] = lookup_table_spectra_high_res[:]
                fp.flush()
                dic_slices[slice_index]["array_lookup_mz_shape"] = lookup_table_spectra_high_res.shape

                fp = np.memmap(
                    output_folder + "array_cumulated_lookup_mz_image_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=cumulated_image_lookup_table_high_res.shape,
                )
                fp[:] = cumulated_image_lookup_table_high_res[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_cumulated_lookup_mz_image_shape"
                ] = cumulated_image_lookup_table_high_res.shape

                fp = np.memmap(
                    output_folder + "array_corrective_factors_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_corrective_factors.shape,
                )
                fp[:] = array_corrective_factors[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_corrective_factors_shape"
                ] = array_corrective_factors.shape

            except Exception as e:
                print(e)

        else:
            # Register all files in a pickled dictionnary
            dic_slices[slice_index] = {
                "image_shape": image_shape,
                "divider_lookup": divider_lookup,
                "array_avg_spectrum_downsampled": array_averaged_mz_intensity_low_res,
                "array_lookup_pixels": array_pixel_indexes_high_res,
                "array_lookup_mz_avg": lookup_table_averaged_spectrum_high_res,
                "array_peaks_transformed_lipids": array_peaks_corrected,
                "array_spectra": array_spectra_high_res,
                "array_avg_spectrum": array_averaged_mz_intensity_high_res,
                "array_avg_spectrum_after_standardization": array_averaged_mz_intensity_high_res_after_standardization,
                "array_lookup_mz": lookup_table_spectra_high_res,
                "array_cumulated_lookup_mz_image": cumulated_image_lookup_table_high_res,
                "array_corrective_factors": array_corrective_factors,
            }


if not maldi_conversion.SAMPLE_APP:
    # Pickle the dict of lightweight data
    with open(output_folder + "light_arrays.pickle", "wb") as handle:
        pickle.dump(dic_slices, handle)
else:
    with lzma.open(output_folder + "light_arrays.pickle", "wb") as handle:
        pickle.dump(dic_slices, handle)
print("Done")


array_pixel_indexes_high_res, dic 0.57
array_spectra_high_res, mmap 148.1
array_averaged_mz_intensity_low_res, dic 0.05
array_averaged_mz_intensity_high_res, mmap 0.98
array_averaged_mz_intensity_high_res_after_standardization, mmap 0.98
lookup_table_spectra_high_res, mmap 567.49
cumulated_image_lookup_table_high_res, mmap 567.49
lookup_table_averaged_spectrum_high_res, dic 0.01
array_peaks_corrected, dic 0.0
array_corrective_factors, dic 8.8
slice_1.npz
array_pixel_indexes_high_res, dic 0.55
array_spectra_high_res, mmap 71.81
array_averaged_mz_intensity_low_res, dic 0.05
array_averaged_mz_intensity_high_res, mmap 0.94
array_averaged_mz_intensity_high_res_after_standardization, mmap 0.94
lookup_table_spectra_high_res, mmap 551.95
cumulated_image_lookup_table_high_res, mmap 551.95
lookup_table_averaged_spectrum_high_res, dic 0.01
array_peaks_corrected, dic 0.0
array_corrective_factors, dic 8.56
slice_3.npz
array_pixel_indexes_high_res, dic 0.62
array_spectra_high_res, mmap 61.66
array_a

Clean temporary folder

In [None]:
clean = False
if clean:
    delete_all_files_in_folder(input_folder)
