# Notebook 1
Raw data export into np.memaps.

### Load important modules

In [1]:
# Standard modules
import numpy as np
import os
import lzma
import pickle

# Move to root directory for easier module handling
os.chdir("../..")
print(os.listdir("."))
from modules.tools import maldi_conversion 
from modules.tools import lookup_tables
from modules.tools.misc import delete_all_files_in_folder

# multithreading/multiprocessing
from multiprocessing import Pool
from threadpoolctl import threadpool_limits

# set thread limit
threadpool_limits(16)

# Define if the app uses only MAIA-transformed lipids
maldi_conversion.SAMPLE_APP = False
if maldi_conversion.SAMPLE_APP:
    lookup_tables.DIVIDER_LOOKUP = 600

['.git', '.gitattributes', '.vscode', 'LICENSE.md', 'TODO.py', 'app.py', 'assets', 'config.py', 'data_sample', 'documentation', 'index.py', 'js', 'main.py', 'modules', 'notebooks', 'pages', 'readme', 'requirements.txt', 'style', 'data', '__pycache__', 'README.md', '.ipynb_checkpoints', '.gitignore', 'Dockerfile', 'nohup.out']


### Create a list of raw data filenames

In [2]:
path_brain_1 = "/data/lipidatlas/data/data_raw/BRAIN1/"
path_brain_2 = "/data/lipidatlas/data/data_raw/BRAIN2/"
path_brain_1_temp = "/data/lipidatlas/data/app/data/temp/brain_1"
path_brain_2_temp = "/data/lipidatlas/data/app/data/temp/brain_2"
split_value_1 = "MouseBrainCMC_S"
split_value_2 = "MouseBrain2_S"
ll_t_names = []
for path_brain, path_brain_temp, split_value in zip(
    [path_brain_1, path_brain_2],
    [path_brain_1_temp, path_brain_2_temp],
    [split_value_1, split_value_2],
):
    # Load filenames
    l_t_names = sorted(
        [
            [
                int(name.split(split_value)[1].split("_")[0].split("A")[0].split("(")[0]),
                path_brain + name + "/" + name,
            ]
            for name in os.listdir(path_brain)
            if "MouseBrain" in name
        ]
    )

    # Correct for duplicates
    for t_names_1, t_names_2 in zip(l_t_names[:-1], l_t_names[1:]):
        if t_names_2[0] == t_names_1[0]:
            t_names_2.append("bis")
            print("WARNING: duplicate for slice " + str(t_names_1[0]))

    # Remove slices that have already been processed
    os.makedirs(path_brain_temp, exist_ok=True)
    remove_already_loaded = False
    if remove_already_loaded:
        existing_names = [
            int(name.split("_")[1][:-7]) for name in os.listdir(path_brain_temp) if "raw" in name
        ]
        l_t_names = [x for x in l_t_names if x[0] not in existing_names]

    # Print the final list of names
    for t_names in l_t_names:
        print(t_names[0], t_names[1].split("/")[-1])

    ll_t_names.append(l_t_names)


1 20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25um
2 20210211_MouseBrainCMC_S2AB5_2Dpixelmode_370x214_Att25_25um
3 20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25um
4 20210214_MouseBrainCMC_S4AD3_2Dpixelmode_354x228_Att25_25um
5 20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25um
6 20210219_MouseBrainCMC_S6AE3_2Dpixelmode_423x282_Att25_25um
7 20210220_MouseBrainCMC_S7AF5_2Dpixelmode_427x263_Att25_25um
8 20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um
9 20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25um
10 20210210_MouseBrainCMC_S10(brain2_20)_394x282_Att30_25um
11 20210301_MouseBrainCMC_S11AK5_2Dpixelmode_448x277_Att25_25um
12 20210303_MouseBrainCMC_S12AL1_2Dpixelmode_393x266_Att25_25um
13 20210304_MouseBrainCMC_S13AM1_2Dpixelmode_413x310_Att25_25um
14 20210305_MouseBrainCMC_S14AN1_2Dpixelmode_409x285_Att25_25um
15 20210313_MouseBrainCMC_S15AO2_2Dpixelmode_451x292_Att25_25um
16 20210530_MouseBrainCMC_S16_duplicate_2Dpixelmode_454

In [8]:
brain_1 = True
if brain_1:
    l_t_names = ll_t_names[0]
else:
    l_t_names = ll_t_names[1]

# Print the final list of names
for t_names in l_t_names:
    print(t_names[0], t_names[1].split("/")[-1])


1 20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25um
2 20210211_MouseBrainCMC_S2AB5_2Dpixelmode_370x214_Att25_25um
3 20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25um
4 20210214_MouseBrainCMC_S4AD3_2Dpixelmode_354x228_Att25_25um
5 20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25um
6 20210219_MouseBrainCMC_S6AE3_2Dpixelmode_423x282_Att25_25um
7 20210220_MouseBrainCMC_S7AF5_2Dpixelmode_427x263_Att25_25um
8 20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um
9 20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25um
10 20210210_MouseBrainCMC_S10(brain2_20)_394x282_Att30_25um
11 20210301_MouseBrainCMC_S11AK5_2Dpixelmode_448x277_Att25_25um
12 20210303_MouseBrainCMC_S12AL1_2Dpixelmode_393x266_Att25_25um
13 20210304_MouseBrainCMC_S13AM1_2Dpixelmode_413x310_Att25_25um
14 20210305_MouseBrainCMC_S14AN1_2Dpixelmode_409x285_Att25_25um
15 20210313_MouseBrainCMC_S15AO2_2Dpixelmode_451x292_Att25_25um
16 20210530_MouseBrainCMC_S16_duplicate_2Dpixelmode_454


### Extract raw data into numpy arrays with multiprocessing

In [9]:
if False:
    multiprocessing = True
    if multiprocessing:
        with Pool(processes=3) as pool:
            [x for x in pool.imap_unordered(maldi_conversion.extract_raw_data, l_t_names)]
    else:
        # Normal (single-processed) map
        [x for x in map(maldi_conversion.extract_raw_data, l_t_names)]


### Remove slices already processed

In [10]:
if False:
    path_brain_temp = (
        "/data/lipidatlas/data/app/data/temp/brain_1"
        if brain_1
        else "/data/lipidatlas/data/app/data/temp/brain_2"
    )
    existing_names = [
        int(name.split("_")[1][:-4]) for name in os.listdir(path_brain_temp) if "raw" not in name
    ]
    l_t_names = [x for x in l_t_names if x[0] not in existing_names]
    # Print the final list of names
    for t_names in l_t_names:
        print(t_names[0], t_names[1].split("/")[-1])


### Process raw data into numpy arrays with multiprocessing

In [11]:
multiprocessing = True
if multiprocessing:
    with Pool(processes=12) as pool:
        [x for x in pool.imap_unordered(maldi_conversion.process_raw_data, l_t_names)]
else:
    # Normal (single-processed) map
    [x for x in map(maldi_conversion.process_raw_data, l_t_names)]


Compute and normalize pixels values according to TIC
Compute and normalize pixels values according to TIC
Filtering out noise and matrix peaks
Filtering out noise and matrix peaks
Compute and normalize pixels values according to TIC
Compute and normalize pixels values according to TIC
Filtering out noise and matrix peaks
Filtering out noise and matrix peaks
Compute and normalize pixels values according to TIC
Compute and normalize pixels values according to TIC
Compute and normalize pixels values according to TIC
Filtering out noise and matrix peaks
Filtering out noise and matrix peaks
Compute and normalize pixels values according to TIC
Filtering out noise and matrix peaks
Filtering out noise and matrix peaks
Compute and normalize pixels values according to TIC
Compute and normalize pixels values according to TIC
Filtering out noise and matrix peaks
Filtering out noise and matrix peaks
Compute and normalize pixels values according to TIC
Compute and normalize pixels values according t

  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Sorting by m/z value for averaging
Prepare data for standardization
72320 have been transformed, with an average of  0.00031803097345132743 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Sorting by m/z value for averaging
Prepare data for standardization
Prepare data for standardization
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Prepare data for standardization
Getting spectrums array averaged accross pixels
Standardize data
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Prepare data for standardization
Sorting by m/z value for averaging after standardization
74358 have been transformed, with an average of  0.0030259017187121763 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Standardize data
Sorting by m/z value for averaging
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Getting spectrums array averaged accross pixels
Standardize data
Standardize data
Double sorting according to pixel and mz high-res array
106594 have been transformed, with an average of  0.0014072086609002383 peaks transformed
Getting corresponding spectra arrays


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Saving : /data/lipidatlas/data/data_raw/BRAIN1/20210214_MouseBrainCMC_S4AD3_2Dpixelmode_354x228_Att25_25um/20210214_MouseBrainCMC_S4AD3_2Dpixelmode_354x228_Att25_25um
Sorting by m/z value for averaging
There seems to be a problem with the computation of the integral
0.0014067147135542308 0.0006830153288319707
26 27
78699 have been transformed, with an average of  0.0031766604404122037 peaks transformed
Prepare data for standardization


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


122491 have been transformed, with an average of  0.0010041554073360493 peaks transformed
Getting corresponding spectra arrays


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Sorting by m/z value for averaging
Getting spectrums array averaged accross pixels
Saving : /data/lipidatlas/data/data_raw/BRAIN1/20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25um/20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25um
Sorting by m/z value for averaging
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Getting spectrums array averaged accross pixels
Standardize data
Build the low-resolution averaged array from the high resolution averaged array
Standardize data
Getting spectrums array averaged accross pixels
Sorting by m/z value for averaging after standardization
Build the low-resolution averaged array from the high resolution averaged array
There seems to be a problem with the computation of the integral
0.0009702225481845871 0.00046588905388489366
92 93
There seems to be a problem with the computation of the integral
0.0004100554520763327 0.00024888626649044454
230 2

  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Standardize data
Sorting by m/z value for averaging
There seems to be a problem with the computation of the integral
0.0005677073310425075 0.0003695602936204523
192 193
There seems to be a problem with the computation of the integral
0.0008965548409780162 0.0007818630547262728
3 4
Getting spectrums array averaged accross pixels
119217 have been transformed, with an average of  0.003917226570036152 peaks transformed
Sorting by m/z value for averaging after standardization


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging
Standardize data
Double sorting according to pixel and mz high-res array
There seems to be a problem with the computation of the integral
0.000482629534153792 0.00031197958742268384
218 219
Standardize data
Sorting by m/z value for averaging after standardization
There seems to be a problem with the computation of the integral
0.0002320161536097042 0.00010874131112359464
274 275
There seems to be a problem with the computation of the integral
0.0009376533236872066 0.0007922113873064518
4 5
112260 have been transformed, with an average of  0.00261892036344201 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Sorting by m/z value for averaging
There seems to be a problem with the computation of the integral
0.00021201297890043633 0.00010817863221745938
185 186
Getting spectrums array averaged accross pixels
There seems to be a problem with the computation of the integral
0.0008154795320149392 0.0004454445152077824
128 129
There seems to be a problem with the computation of the integral
0.0012419158096483162 0.001141816726885736
5 6
109811 have been transformed, with an average of  0.002904991303239202 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Double sorting according to pixel and mz high-res array
Sorting by m/z value for averaging
124095 have been transformed, with an average of  0.0022160441597163464 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Sorting by m/z value for averaging
Getting spectrums array averaged accross pixels
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN1/20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25um/20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25um
Double sorting according to pixel and mz high-res array
Getting spectrums array averaged accross pixels
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Getting spectrums array averaged accross pixels
Standardize data
Build the low-resolution averaged array from the high resolution averaged array
Double sorting according to pixel and mz high-res array
Sorting by m/z value for averaging after standardization
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/BRAIN1/20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25um/20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25um
Sorting by m/z valu

  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging
Sorting by m/z value for averaging after standardization
Build the low-resolution averaged array from the high resolution averaged array
Getting corresponding spectra arrays
Sorting by m/z value for averaging after standardization
Saving : /data/lipidatlas/data/data_raw/BRAIN1/20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um/20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Getting spectrums array averaged accross pixels
Getting spectrums array averaged accross pixels
Sorting by m/z value for averaging after standardization
Double sorting according to pixel and mz high-res array
Double sorting according to pixel and mz high-res array
Compute and normalize pixels val

  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Sorting by m/z value for averaging
Standardize data
Standardize data
Standardize data
93595 have been transformed, with an average of  0.00013889630856349163 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Standardize data
116556 have been transformed, with an average of  0.0022907443632245446 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Sorting by m/z value for averaging
Standardize data
Sorting by m/z value for averaging
128028 have been transformed, with an average of  0.0019761302215140436 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


There seems to be a problem with the computation of the integral
0.0003319448573548716 0.00016518424672540277
259 260
There seems to be a problem with the computation of the integral
0.00033357365107987934 0.00014835350157227367
88 89
There seems to be a problem with the computation of the integral
0.00031503307786420005 0.00014635373372584581
301 302
Sorting by m/z value for averaging
There seems to be a problem with the computation of the integral
0.00036749365023099284 0.00019300146959722042
260 261
84700 have been transformed, with an average of  0.000602125147579693 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Sorting by m/z value for averaging
129127 have been transformed, with an average of  0.000782175687501452 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Getting spectrums array averaged accross pixels
Standardize data
Sorting by m/z value for averaging
Standardize data
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Standardize data
Standardize data
Getting spectrums array averaged accross pixels
There seems to be a problem with the computation of the integral
0.00026041499820462447 0.00013092148583382368
68 69
There seems to be a problem with the computation of the integral
0.00029050520286642704 0.00013435957953333855
167 168
There seems to be a problem with the computation of the integral
0.0017282997073673908 0.0014533302746713161
199 200
107253 have been transformed, with an average of  0.0003263311981949223 peaks transformed
There seems to be a problem with the computation of the integral
0.0011602637527470504 0.0009850653586909175
187 

  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


188

  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16



There seems to be a problem with the computation of the integral
0.0012617347315725038 0.0010951795848086476
190 191
There seems to be a problem with the computation of the integral
0.0011083994706603774 0.0009604693041183054
220 221
There seems to be a problem with the computation of the integral
0.00143104451647948 0.0012206952087581158
218 219
86100 have been transformed, with an average of  0.0007200929152148664 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


There seems to be a problem with the computation of the integral
0.0011178998577457563 0.0009261689847335219
210 211
There seems to be a problem with the computation of the integral
0.0012864431115271178 0.00105525110848248
210 211
There seems to be a problem with the computation of the integral
0.0011276171463563704 0.0009781810222193599
224 225
There seems to be a problem with the computation of the integral
0.0003609788875658284 0.00017006098642013967
149 150
There seems to be a problem with the computation of the integral
0.001693679403053901 0.0014377617044374347
187 188
There seems to be a problem with the computation of the integral
0.0010534174006205145 0.0009357414674013853
226 227
There seems to be a problem with the computation of the integral
0.0014542194909305584 0.0011966293677687645
210 211
There seems to be a problem with the computation of the integral
0.0008187353839487337 0.000704381091054529
224 225
Build the low-resolution averaged array from the high resolution av

  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Sorting by m/z value for averaging
Sorting by m/z value for averaging
105204 have been transformed, with an average of  0.0018915630584388427 peaks transformed


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Getting spectrums array averaged accross pixels
Standardize data
Sorting by m/z value for averaging
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Getting spectrums array averaged accross pixels
Getting spectrums array averaged accross pixels
Getting spectrums array averaged accross pixels
Double sorting according to pixel and mz high-res array
Double sorting according to pixel and mz high-res array
Getting spectrums array averaged accross pixels
Getting spectrums array averaged accross pixels
131524 have been transformed, with an average of  0.0014217937410662693 peaks transformed
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization


  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16
  np.nan_to_num(arrays_after_transfo / arrays_before_transfo, nan = 1.), dtype=np.float16


Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Sorting by m/z value for averaging
Getting spectrums array averaged accross pixels
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Build the low-resolution averaged array from the high resolution averaged array
Sorting by m/z value for averaging after standardization
Getting corresponding spectra arrays
Getting spectrums array averaged accross pixels
Getting spectrums array averaged accross pixels
Saving : /data/lipidatlas/data/data_raw/BRAIN1/20210412_MouseBrainCMC_S23AZ1_2Dpixelmode_360x260_Att30_25um/20210412_MouseBrainCMC_S23AZ1_2Dpixelmode_360x260_Att30_25um
Build the low-resol

### Build lookup tables

In [12]:
multiprocessing = True
if multiprocessing:
    # Multiprocessing
    with Pool(processes=12) as pool:
        [x for x in pool.map(lookup_tables.process_lookup_tables, l_t_names)]
else:
    # Normal (single-processed) map
    [x for x in map(lookup_tables.process_lookup_tables, l_t_names)]


Size (in mb) of lookup_table_spectra_high_res:  551.95
Shape of lookup_table_spectra_high_res:  (2000, 72345)
Size (in mb) of lookup_table_spectra_high_res:  615.78
Shape of lookup_table_spectra_high_res:  (2000, 80712)
Size (in mb) of lookup_table_spectra_high_res:  567.49
Shape of lookup_table_spectra_high_res:  (2000, 74382)
Size (in mb) of lookup_table_spectra_high_res:  604.1
Shape of lookup_table_spectra_high_res:  (2000, 79180)
Size (in mb) of lookup_table_spectra_high_res:  821.78
Shape of lookup_table_spectra_high_res:  (2000, 107712)
Size (in mb) of lookup_table_spectra_high_res:  797.56
Shape of lookup_table_spectra_high_res:  (2000, 104538)
Size (in mb) of lookup_table_spectra_high_res:  856.79
Shape of lookup_table_spectra_high_res:  (2000, 112301)
Size (in mb) of lookup_table_spectra_high_res:  847.69
Shape of lookup_table_spectra_high_res:  (2000, 111108)Size (in mb) of lookup_table_spectra_high_res: 
 934.98
Shape of lookup_table_spectra_high_res:  (2000, 122550)
Size (

### Record everything and clean 

Record everything in memap files and a pickled dictonnary

In [13]:
if maldi_conversion.SAMPLE_APP:
    output_folder = "data_sample/whole_dataset/"
else :
    output_folder = "data/whole_dataset/"  

os.makedirs(output_folder, exist_ok=True)

dic_slices = {}
# Loop over input folders
for brain_1, input_folder in zip(
    [True, False],
    [
        "/data/lipidatlas/data/app/data/temp/brain_1/",
        "/data/lipidatlas/data/app/data/temp/brain_2/",
    ],
):

    # Loop over slice files
    for slice_name in os.listdir(input_folder):
        if "raw" in slice_name or "checkpoints" in slice_name:
            continue

        # Extract slice index
        slice_index = int(slice_name.split("_")[1][:-4])

        # Load slice arrays
        npzfile = np.load(input_folder + slice_name)
        array_pixel_indexes_high_res = npzfile["array_pixel_indexes_high_res"]
        array_spectra_high_res = npzfile["array_spectra_high_res"]
        array_averaged_mz_intensity_low_res = npzfile["array_averaged_mz_intensity_low_res"]
        array_averaged_mz_intensity_high_res = npzfile["array_averaged_mz_intensity_high_res"]
        array_averaged_mz_intensity_high_res_after_standardization = npzfile[
            "array_averaged_mz_intensity_high_res_after_standardization"
        ]
        image_shape = npzfile["image_shape"]
        divider_lookup = npzfile["divider_lookup"]
        lookup_table_spectra_high_res = npzfile["lookup_table_spectra_high_res"]
        cumulated_image_lookup_table_high_res = npzfile["cumulated_image_lookup_table_high_res"]
        lookup_table_averaged_spectrum_high_res = npzfile["lookup_table_averaged_spectrum_high_res"]
        array_peaks_corrected = npzfile["array_peaks_corrected"]
        array_corrective_factors = npzfile["array_corrective_factors"]

        # print size used by each array in mb
        print("array_pixel_indexes_high_res, dic",round(array_pixel_indexes_high_res.nbytes / 1024 / 1024, 2))
        print("array_spectra_high_res, mmap",round(array_spectra_high_res.nbytes / 1024 / 1024, 2))
        print("array_averaged_mz_intensity_low_res, dic",round(array_averaged_mz_intensity_low_res.nbytes / 1024 / 1024, 2))
        print("array_averaged_mz_intensity_high_res, mmap",round(array_averaged_mz_intensity_high_res.nbytes / 1024 / 1024, 2))
        print("array_averaged_mz_intensity_high_res_after_standardization, mmap",round(array_averaged_mz_intensity_high_res_after_standardization.nbytes / 1024 / 1024, 2))
        print("lookup_table_spectra_high_res, mmap",round(lookup_table_spectra_high_res.nbytes / 1024 / 1024, 2))
        print("cumulated_image_lookup_table_high_res, mmap",round(cumulated_image_lookup_table_high_res.nbytes / 1024 / 1024, 2))
        print("lookup_table_averaged_spectrum_high_res, dic",round(lookup_table_averaged_spectrum_high_res.nbytes / 1024 / 1024, 2))
        print("array_peaks_corrected, dic",round(array_peaks_corrected.nbytes / 1024 / 1024, 2))
        print("array_corrective_factors, dic",round(array_corrective_factors.nbytes / 1024 / 1024, 2))

        # Update slice index for brain 2
        if not brain_1:
            slice_index += 22

        print(slice_name)

        if not maldi_conversion.SAMPLE_APP:
            # Register the lightweights files in a pickled dictionnary
            dic_slices[slice_index] = {
                "image_shape": image_shape,
                "divider_lookup": divider_lookup,
                "array_avg_spectrum_downsampled": array_averaged_mz_intensity_low_res,
                "array_lookup_pixels": array_pixel_indexes_high_res,
                "array_lookup_mz_avg": lookup_table_averaged_spectrum_high_res,
                "array_peaks_transformed_lipids": array_peaks_corrected,
                "is_brain_1": brain_1,
            }

            try:
                # Build a memap for each of the heavier files to save RAM, save the corresponding shape in the
                # pickled dictionnary
                fp = np.memmap(
                    output_folder + "array_spectra_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_spectra_high_res.shape,
                )
                fp[:] = array_spectra_high_res[:]
                fp.flush()
                dic_slices[slice_index]["array_spectra_shape"] = array_spectra_high_res.shape

                fp = np.memmap(
                    output_folder + "array_avg_spectrum_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_averaged_mz_intensity_high_res.shape,
                )
                fp[:] = array_averaged_mz_intensity_high_res[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_avg_spectrum_shape"
                ] = array_averaged_mz_intensity_high_res.shape

                fp = np.memmap(
                    output_folder
                    + "array_avg_spectrum_after_standardization_"
                    + str(slice_index)
                    + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_averaged_mz_intensity_high_res_after_standardization.shape,
                )
                fp[:] = array_averaged_mz_intensity_high_res_after_standardization[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_avg_spectrum_after_standardization_shape"
                ] = array_averaged_mz_intensity_high_res_after_standardization.shape

                fp = np.memmap(
                    output_folder + "array_lookup_mz_" + str(slice_index) + ".mmap",
                    dtype="int32",
                    mode="w+",
                    shape=lookup_table_spectra_high_res.shape,
                )
                fp[:] = lookup_table_spectra_high_res[:]
                fp.flush()
                dic_slices[slice_index]["array_lookup_mz_shape"] = lookup_table_spectra_high_res.shape

                fp = np.memmap(
                    output_folder + "array_cumulated_lookup_mz_image_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=cumulated_image_lookup_table_high_res.shape,
                )
                fp[:] = cumulated_image_lookup_table_high_res[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_cumulated_lookup_mz_image_shape"
                ] = cumulated_image_lookup_table_high_res.shape

                fp = np.memmap(
                    output_folder + "array_corrective_factors_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_corrective_factors.shape,
                )
                fp[:] = array_corrective_factors[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_corrective_factors_shape"
                ] = array_corrective_factors.shape

            except Exception as e:
                print(e)

        else:
            # Register all files in a pickled dictionnary
            dic_slices[slice_index] = {
                "image_shape": image_shape,
                "divider_lookup": divider_lookup,
                "array_avg_spectrum_downsampled": array_averaged_mz_intensity_low_res,
                "array_lookup_pixels": array_pixel_indexes_high_res,
                "array_lookup_mz_avg": lookup_table_averaged_spectrum_high_res,
                "array_peaks_transformed_lipids": array_peaks_corrected,
                "array_spectra": array_spectra_high_res,
                "array_avg_spectrum": array_averaged_mz_intensity_high_res,
                "array_avg_spectrum_after_standardization": array_averaged_mz_intensity_high_res_after_standardization,
                "array_lookup_mz": lookup_table_spectra_high_res,
                "array_cumulated_lookup_mz_image": cumulated_image_lookup_table_high_res,
                "array_corrective_factors": array_corrective_factors,
                "is_brain_1": brain_1,
            }


if not maldi_conversion.SAMPLE_APP:
    # Pickle the dict of lightweight data
    with open(output_folder + "light_arrays.pickle", "wb") as handle:
        pickle.dump(dic_slices, handle)
else:
    with lzma.open(output_folder + "light_arrays.pickle", "wb") as handle:
        pickle.dump(dic_slices, handle)
print("Done")


array_pixel_indexes_high_res, dic 0.57
array_spectra_high_res, mmap 148.1
array_averaged_mz_intensity_low_res, dic 0.05
array_averaged_mz_intensity_high_res, mmap 0.98
array_averaged_mz_intensity_high_res_after_standardization, mmap 0.98
lookup_table_spectra_high_res, mmap 567.49
cumulated_image_lookup_table_high_res, mmap 567.49
lookup_table_averaged_spectrum_high_res, dic 0.01
array_peaks_corrected, dic 0.0
array_corrective_factors, dic 8.8
slice_1.npz
array_pixel_indexes_high_res, dic 0.55
array_spectra_high_res, mmap 71.81
array_averaged_mz_intensity_low_res, dic 0.05
array_averaged_mz_intensity_high_res, mmap 0.94
array_averaged_mz_intensity_high_res_after_standardization, mmap 0.94
lookup_table_spectra_high_res, mmap 551.95
cumulated_image_lookup_table_high_res, mmap 551.95
lookup_table_averaged_spectrum_high_res, dic 0.01
array_peaks_corrected, dic 0.0
array_corrective_factors, dic 8.56
slice_3.npz
array_pixel_indexes_high_res, dic 0.62
array_spectra_high_res, mmap 61.66
array_a

Clean temporary folder

In [None]:
clean = False
if clean:
    delete_all_files_in_folder(input_folder)
