# Notebook 1
Raw data export into np.memaps.

### Load important modules

In [1]:
# Standard modules
import numpy as np
import os
import lzma
import pickle

# Move to root directory for easier module handling
os.chdir("../..")
print(os.listdir("."))
from modules.tools import maldi_conversion 
from modules.tools import lookup_tables
from modules.tools.misc import delete_all_files_in_folder

# multithreading/multiprocessing
from multiprocessing import Pool
from threadpoolctl import threadpool_limits

# set thread limit
threadpool_limits(16)

# Define if the app uses only MAIA-transformed lipids
maldi_conversion.SAMPLE_APP = False
if maldi_conversion.SAMPLE_APP:
    lookup_tables.DIVIDER_LOOKUP = 600

['.git', '.gitattributes', '.gitignore', '.vscode', 'LICENSE.md', 'README.md', 'TODO.py', 'app.py', 'assets', 'config.py', 'data_sample', 'documentation', 'index.py', 'js', 'main.py', 'modules', 'notebooks', 'pages', 'readme', 'requirements.txt', 'style', 'data', '__pycache__']


### Create a list of raw data filenames

In [2]:
path_brain_1 = "/data/lipidatlas/data/data_raw/BRAIN1/"
path_brain_2 = "/data/lipidatlas/data/data_raw/BRAIN2/"
path_brain_1_temp = "/data/lipidatlas/data/app/data/temp/brain_1"
path_brain_2_temp = "/data/lipidatlas/data/app/data/temp/brain_2"
split_value_1 = "MouseBrainCMC_S"
split_value_2 = "MouseBrain2_S"
ll_t_names = []
for path_brain, path_brain_temp, split_value in zip(
    [path_brain_1, path_brain_2],
    [path_brain_1_temp, path_brain_2_temp],
    [split_value_1, split_value_2],
):
    # Load filenames
    l_t_names = sorted(
        [
            [
                int(name.split(split_value)[1].split("_")[0].split("A")[0].split("(")[0]),
                path_brain + name + "/" + name,
            ]
            for name in os.listdir(path_brain)
            if "MouseBrain" in name
        ]
    )

    # Correct for duplicates
    for t_names_1, t_names_2 in zip(l_t_names[:-1], l_t_names[1:]):
        if t_names_2[0] == t_names_1[0]:
            t_names_2.append("bis")
            print("WARNING: duplicate for slice " + str(t_names_1[0]))

    # Remove slices that have already been processed
    os.makedirs(path_brain_temp, exist_ok=True)
    remove_already_loaded = False
    if remove_already_loaded:
        existing_names = [
            int(name.split("_")[1][:-7]) for name in os.listdir(path_brain_temp) if "raw" in name
        ]
        l_t_names = [x for x in l_t_names if x[0] not in existing_names]

    # Print the final list of names
    for t_names in l_t_names:
        print(t_names[0], t_names[1].split("/")[-1])

    ll_t_names.append(l_t_names)


1 20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25um
2 20210211_MouseBrainCMC_S2AB5_2Dpixelmode_370x214_Att25_25um
3 20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25um
4 20210214_MouseBrainCMC_S4AD3_2Dpixelmode_354x228_Att25_25um
5 20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25um
6 20210219_MouseBrainCMC_S6AE3_2Dpixelmode_423x282_Att25_25um
7 20210220_MouseBrainCMC_S7AF5_2Dpixelmode_427x263_Att25_25um
8 20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um
9 20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25um
10 20210210_MouseBrainCMC_S10(brain2_20)_394x282_Att30_25um
11 20210301_MouseBrainCMC_S11AK5_2Dpixelmode_448x277_Att25_25um
12 20210303_MouseBrainCMC_S12AL1_2Dpixelmode_393x266_Att25_25um
13 20210304_MouseBrainCMC_S13AM1_2Dpixelmode_413x310_Att25_25um
14 20210305_MouseBrainCMC_S14AN1_2Dpixelmode_409x285_Att25_25um
15 20210313_MouseBrainCMC_S15AO2_2Dpixelmode_451x292_Att25_25um
16 20210530_MouseBrainCMC_S16_duplicate_2Dpixelmode_454

In [3]:
brain_1 = True
if brain_1:
    l_t_names = ll_t_names[0]
else:
    l_t_names = ll_t_names[1]

# Print the final list of names
for t_names in l_t_names:
    print(t_names[0], t_names[1].split("/")[-1])


1 20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25um
2 20210211_MouseBrainCMC_S2AB5_2Dpixelmode_370x214_Att25_25um
3 20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25um
4 20210214_MouseBrainCMC_S4AD3_2Dpixelmode_354x228_Att25_25um
5 20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25um
6 20210219_MouseBrainCMC_S6AE3_2Dpixelmode_423x282_Att25_25um
7 20210220_MouseBrainCMC_S7AF5_2Dpixelmode_427x263_Att25_25um
8 20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um
9 20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25um
10 20210210_MouseBrainCMC_S10(brain2_20)_394x282_Att30_25um
11 20210301_MouseBrainCMC_S11AK5_2Dpixelmode_448x277_Att25_25um
12 20210303_MouseBrainCMC_S12AL1_2Dpixelmode_393x266_Att25_25um
13 20210304_MouseBrainCMC_S13AM1_2Dpixelmode_413x310_Att25_25um
14 20210305_MouseBrainCMC_S14AN1_2Dpixelmode_409x285_Att25_25um
15 20210313_MouseBrainCMC_S15AO2_2Dpixelmode_451x292_Att25_25um
16 20210530_MouseBrainCMC_S16_duplicate_2Dpixelmode_454


### Extract raw data into numpy arrays with multiprocessing

In [4]:
if True:
    multiprocessing = True
    if multiprocessing:
        with Pool(processes=16) as pool:
            [x for x in pool.imap_unordered(maldi_conversion.extract_raw_data, l_t_names)]
    else:
        # Normal (single-processed) map
        [x for x in map(maldi_conversion.extract_raw_data, l_t_names)]


Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um/20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25umLoading files : /data/lipidatlas/data/data_raw/BRAIN1/20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25um/20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25umLoading files : /data/lipidatlas/data/data_raw/BRAIN1/20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25um/20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25umLoading files : /data/lipidatlas/data/data_raw/BRAIN1/20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25um/20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25umLoading files : /data/lipidatlas/data/data_raw/BRAIN1/20210219_MouseBrainCMC_S6AE3_2Dpixelmode_423x282_Att25_25um/20210219_MouseBrainCMC_S6AE3_2Dpixelmode_423x282_Att25_25umLoading files : /data/lipidatlas/data/data_raw/BRAIN1/20210211_MouseBrainCMC_S2AB5_2Dpixelmode_370x214_Att25_25um/2021021









Loading Sprectra at resolution 1e-05:   0%|          | 0/107712 [00:00<?, ?it/s]



Loading Sprectra at resolution 1e-05:   0%|          | 0/129826 [00:00<?, ?it/s]



Loading Sprectra at resolution 1e-05:   0%|          | 0/72345 [00:00<?, ?it/s]



Loading Sprectra at resolution 1e-05:   0%|          | 0/124096 [00:00<?, ?it/s]







Loading Sprectra at resolution 1e-05:   0%|          | 0/119286 [00:00<?, ?it/s]



Loading Sprectra at resolution 1e-05: 100%|██████████| 74382/74382 [00:52<00:00, 1408.94it/s]]
Loading Sprectra at resolution 1e-05: 100%|██████████| 72345/72345 [00:55<00:00, 1314.00it/s]it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 80712/80712 [00:57<00:00, 1411.65it/s]]t/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 79180/79180 [01:01<00:00, 1297.19it/s]it/s]]
Loading Sprectra at resolution 1e-05: 100%|██████████| 107712/107712 [01:14<00:00, 1449.34it/s]t/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 104538/104538 [01:23<00:00, 1258.27it/s]t/s]]
Loading Sprectra at resolution 1e-05: 100%|██████████| 122550/122550 [01:29<00:00, 1370.58it/s]it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 119286/119286 [01:31<00:00, 1306.24it/s]t/s]]
Loading Sprectra at resolution 1e-05: 100%|██████████| 111108/111108 [01:31<00:00, 1215.75it/s]t/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 112301/112301 [01:32<00:00, 1214.01it/s]t/s]]
Loa

Creating and sorting dataframes


100%|██████████| 79180/79180 [16:13<00:00, 81.30it/s]]
 53%|█████▎    | 59879/112301 [15:57<13:25, 65.07it/s]  

Creating and sorting dataframes


100%|██████████| 72345/72345 [18:13<00:00, 66.16it/s]]
 50%|█████     | 65439/129826 [16:49<16:26, 65.30it/s]

Creating and sorting dataframes


100%|██████████| 80712/80712 [19:33<00:00, 68.75it/s]]
 54%|█████▎    | 65761/122550 [18:43<16:47, 56.37it/s]

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210319_MouseBrainCMC_S17AQ2_2Dpixelmode_450x287_Att30_25um/20210319_MouseBrainCMC_S17AQ2_2Dpixelmode_450x287_Att30_25um


 61%|██████    | 68780/112301 [18:19<11:13, 64.65it/s]



 64%|██████▎   | 71476/112301 [19:01<10:23, 65.50it/s] 54210/129150 [00:41<00:41, 1797.30it/s]

Creating and sorting dataframes


Loading Sprectra at resolution 1e-05: 100%|██████████| 129150/129150 [01:46<00:00, 1217.16it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 129150/129150 [02:42<00:00, 792.86it/s] 
100%|██████████| 104538/104538 [26:47<00:00, 65.05it/s] 
 89%|████████▉ | 99934/112301 [26:25<03:25, 60.25it/s]] 

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210323_MouseBrainCMC_S18AR4_2Dpixelmode_474x291_Att30_25um/20210323_MouseBrainCMC_S18AR4_2Dpixelmode_474x291_Att30_25um


 89%|████████▉ | 99293/111108 [26:45<03:11, 61.84it/s]]



100%|██████████| 107712/107712 [27:45<00:00, 64.66it/s]23790/137934 [00:16<03:02, 627.02it/s] 
 92%|█████████▏| 103355/112301 [27:19<02:19, 64.30it/s]79389/137934 [00:51<00:27, 2105.89it/s]

Creating and sorting dataframes


 76%|███████▌  | 97061/128030 [27:08<08:12, 62.83it/s]]

Creating and sorting dataframes


Loading Sprectra at resolution 1e-05: 100%|██████████| 137934/137934 [01:44<00:00, 1318.70it/s]
100%|██████████| 111108/111108 [29:51<00:00, 62.01it/s]██▏ | 112339/137934 [01:21<00:21, 1189.91it/s]
100%|██████████| 112301/112301 [29:36<00:00, 63.20it/s]██▌ | 117398/137934 [01:26<00:17, 1161.00it/s]
 97%|█████████▋| 112851/116565 [29:57<00:57, 64.92it/s]███▏| 127186/137934 [01:45<00:21, 504.89it/s] 

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210325_MouseBrainCMC_S19AS4_2Dpixelmode_396x232_Att30_25um/20210325_MouseBrainCMC_S19AS4_2Dpixelmode_396x232_Att30_25um


 97%|█████████▋| 115862/119286 [30:10<00:52, 64.85it/s]███▏| 127307/137934 [01:45<00:19, 551.88it/s]



Loading the m/z values at resolution 1e-05: 100%|██████████| 137934/137934 [02:09<00:00, 1062.88it/s]
 79%|███████▉  | 104658/131692 [29:57<07:13, 62.33it/s]63672/91872 [00:45<00:59, 472.51it/s] 

Creating and sorting dataframes


100%|██████████| 119286/119286 [31:03<00:00, 64.00it/s]76580/91872 [00:52<00:08, 1773.29it/s]
100%|██████████| 116565/116565 [30:54<00:00, 62.85it/s]80213/91872 [00:57<00:37, 310.37it/s] 
Loading Sprectra at resolution 1e-05: 100%|██████████| 91872/91872 [01:03<00:00, 1437.75it/s]
 26%|██▋       | 34052/129150 [09:17<25:26, 62.28it/s]█▋   | 61644/91872 [01:24<00:42, 709.89it/s]

Creating and sorting dataframes


  5%|▌         | 7149/137934 [01:58<35:49, 60.84it/s]s]█▉  | 73413/91872 [01:40<00:20, 881.26it/s]

Creating and sorting dataframes


Loading the m/z values at resolution 1e-05: 100%|██████████| 91872/91872 [01:58<00:00, 777.40it/s] 
 97%|█████████▋| 120365/124096 [33:45<01:09, 53.73it/s] 

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210330_MouseBrainCMC_S20AT3_2Dpixelmode_396x266_Att30_25um/20210330_MouseBrainCMC_S20AT3_2Dpixelmode_396x266_Att30_25um

 31%|███       | 39743/129150 [10:50<25:45, 57.84it/s]]




 31%|███       | 39757/129150 [10:50<24:42, 60.32it/s]]



 97%|█████████▋| 120386/124096 [33:45<01:03, 58.39it/s]




100%|██████████| 122550/122550 [34:52<00:00, 58.56it/s]36667/105336 [00:30<00:35, 1914.58it/s]
100%|██████████| 124096/124096 [34:50<00:00, 59.36it/s]86763/105336 [01:04<00:10, 1721.19it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 105336/105336 [01:21<00:00, 1295.78it/s]
 93%|█████████▎| 121950/131692 [34:43<02:36, 62.12it/s]    | 14989/105336 [00:09<00:59, 1508.88it/s]

Creating and sorting dataframes


100%|██████████| 128030/128030 [35:41<00:00, 59.79it/s]    | 54687/105336 [00:35<00:31, 1586.41it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 105336/105336 [01:13<00:00, 1439.97it/s]
 97%|█████████▋| 127265/131692 [36:12<01:13, 60.62it/s]

Creating and sorting dataframes


 99%|█████████▉| 130362/131692 [37:05<00:22, 59.01it/s]

Creating and sorting dataframes


100%|██████████| 131692/131692 [37:28<00:00, 58.58it/s]
 25%|██▌       | 35091/137934 [09:46<28:35, 59.95it/s]

Creating and sorting dataframes


 23%|██▎       | 23902/105336 [05:18<18:27, 73.54it/s]

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210408_MouseBrainCMC_S21AU4_2Dpixelmode_394x215_Att30_25um/20210408_MouseBrainCMC_S21AU4_2Dpixelmode_394x215_Att30_25um

 30%|██▉       | 41084/137934 [11:26<27:26, 58.80it/s]




 30%|██▉       | 41098/137934 [11:26<26:54, 59.97it/s]



Loading Sprectra at resolution 1e-05: 100%|██████████| 84710/84710 [01:04<00:00, 1303.32it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 84710/84710 [01:14<00:00, 1137.89it/s]
 34%|███▎      | 35419/105336 [07:52<17:18, 67.35it/s]

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210409_MouseBrainCMC_S22AV1_2Dpixelmode_416x207_Att30_25um/20210409_MouseBrainCMC_S22AV1_2Dpixelmode_416x207_Att30_25um

 62%|██████▏   | 79477/129150 [21:35<15:18, 54.10it/s]




 34%|███▎      | 35435/105336 [07:52<16:32, 70.42it/s]



Loading Sprectra at resolution 1e-05: 100%|██████████| 86112/86112 [01:18<00:00, 1102.18it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 86112/86112 [01:43<00:00, 830.09it/s] 
 20%|██        | 17066/84710 [04:39<19:52, 56.71it/s]]

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210412_MouseBrainCMC_S23AZ1_2Dpixelmode_360x260_Att30_25um/20210412_MouseBrainCMC_S23AZ1_2Dpixelmode_360x260_Att30_25um


  7%|▋         | 5888/86112 [01:27<19:58, 66.94it/s]]]



Loading Sprectra at resolution 1e-05: 100%|██████████| 93600/93600 [01:20<00:00, 1166.91it/s]
 63%|██████▎   | 66467/105336 [14:51<10:23, 62.39it/s]     | 39414/93600 [00:55<01:01, 888.18it/s] 

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210413_MouseBrainCMC_S24_3_2Dpixelmode_327x328_Att30_25um/20210413_MouseBrainCMC_S24_3_2Dpixelmode_327x328_Att30_25um


 18%|█▊        | 15169/86112 [03:43<45:06, 26.21it/s]▏     | 39504/93600 [00:56<02:50, 317.37it/s]



Loading the m/z values at resolution 1e-05: 100%|██████████| 93600/93600 [01:41<00:00, 922.34it/s] 
Loading Sprectra at resolution 1e-05: 100%|██████████| 107256/107256 [01:13<00:00, 1460.54it/s]
 91%|█████████ | 83645/91872 [21:05<02:05, 65.73it/s]███▊  | 83698/107256 [01:31<00:23, 983.39it/s] 

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210414_MouseBrainCMC_S25_2_2Dpixelmode_358x238_Att30_25um/20210414_MouseBrainCMC_S25_2_2Dpixelmode_358x238_Att30_25um


 62%|██████▏   | 85745/137934 [23:46<28:58, 30.02it/s]██▊  | 83892/107256 [01:32<00:48, 480.05it/s]



Loading the m/z values at resolution 1e-05: 100%|██████████| 107256/107256 [01:55<00:00, 928.32it/s] 
Loading Sprectra at resolution 1e-05: 100%|██████████| 85204/85204 [01:04<00:00, 1318.19it/s]
100%|██████████| 91872/91872 [23:09<00:00, 66.13it/s]]████ | 77017/85204 [00:58<00:06, 1224.60it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 85204/85204 [01:06<00:00, 1290.34it/s]
 54%|█████▍    | 46003/84710 [12:07<10:20, 62.34it/s]] 

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210419_MouseBrainCMC_S26_3_2Dpixelmode_340x248_Att30_25um/20210419_MouseBrainCMC_S26_3_2Dpixelmode_340x248_Att30_25um


 54%|█████▍    | 46010/84710 [12:08<21:25, 30.11it/s]s]



 88%|████████▊ | 92854/105336 [20:48<02:44, 75.81it/s] 57631/84320 [00:43<00:15, 1702.08it/s]

Creating and sorting dataframes


Loading Sprectra at resolution 1e-05: 100%|██████████| 84320/84320 [01:04<00:00, 1300.21it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 84320/84320 [02:10<00:00, 645.77it/s]
100%|██████████| 105336/105336 [23:32<00:00, 74.60it/s]
 70%|███████   | 59720/84710 [15:51<06:14, 66.67it/s]]] 

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210603_MouseBrainCMC_S27_duplicate_2Dpixelmode_372x272_Att30_25um/20210603_MouseBrainCMC_S27_duplicate_2Dpixelmode_372x272_Att30_25um


 18%|█▊        | 15372/85204 [03:42<28:38, 40.63it/s]]]



  3%|▎         | 2272/84320 [00:35<20:39, 66.18it/s]]| 41931/101184 [00:34<00:38, 1526.76it/s]

Creating and sorting dataframes


Loading Sprectra at resolution 1e-05: 100%|██████████| 101184/101184 [01:31<00:00, 1105.23it/s]
 45%|████▌     | 42342/93600 [11:36<14:04, 60.67it/s]█████▊| 98656/101184 [02:15<00:05, 443.79it/s] 

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210423_MouseBrainCMC_S28_3_2Dpixelmode_390x244_Att30_25um/20210423_MouseBrainCMC_S28_3_2Dpixelmode_390x244_Att30_25um


 88%|████████▊ | 74819/84710 [19:39<05:55, 27.82it/s]s]███▊| 98702/101184 [02:15<00:10, 229.15it/s]



 87%|████████▋ | 119469/137934 [33:48<05:12, 59.12it/s]████| 101184/101184 [02:20<00:00, 721.07it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 95160/95160 [01:05<00:00, 1456.53it/s]
 53%|█████▎    | 49937/93600 [13:39<11:58, 60.77it/s]██▍   | 61722/95160 [00:57<00:34, 960.90it/s] 

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210424_MouseBrainCMC_S29_5_2Dpixelmode_330x277_Att30_25um/20210424_MouseBrainCMC_S29_5_2Dpixelmode_330x277_Att30_25um


 53%|█████▎    | 49944/93600 [13:39<21:57, 33.13it/s]]█▍   | 61820/95160 [00:57<01:06, 499.75it/s]



100%|██████████| 84710/84710 [22:12<00:00, 63.55it/s]| 35582/91410 [00:30<00:33, 1656.85it/s]3it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 95160/95160 [01:33<00:00, 1019.94it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 91410/91410 [01:10<00:00, 1292.94it/s]
  2%|▏         | 2017/95160 [00:26<20:09, 77.01it/s]]]     | 3460/91410 [00:04<01:59, 736.70it/s]

Creating and sorting dataframes


100%|██████████| 86112/86112 [20:40<00:00, 69.44it/s]]█▊   | 62729/91410 [00:58<00:21, 1332.94it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 91410/91410 [01:29<00:00, 1026.97it/s]
 45%|████▍     | 47972/107256 [14:02<18:05, 54.62it/s]]

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210429_MouseBrainCMC_S30_5_2Dpixelmode_367x278_Att30_25um/20210429_MouseBrainCMC_S30_5_2Dpixelmode_367x278_Att30_25um

 13%|█▎        | 13462/101184 [04:19<26:07, 55.96it/s]]




 64%|██████▍   | 60299/93600 [16:25<09:10, 60.47it/s]]]



 16%|█▌        | 16239/101184 [05:13<29:39, 47.74it/s] 46028/102026 [00:53<00:45, 1221.12it/s]

Creating and sorting dataframes


100%|██████████| 137934/137934 [39:41<00:00, 57.91it/s]64441/102026 [01:09<00:29, 1288.18it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 102026/102026 [01:48<00:00, 942.35it/s] 
 74%|███████▍  | 69489/93600 [18:52<06:30, 61.81it/s]      | 20678/102026 [00:39<02:24, 564.11it/s]

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210501_MouseBrainCMC_S31_3_2Dpixelmode_355x239_Att30_25um/20210501_MouseBrainCMC_S31_3_2Dpixelmode_355x239_Att30_25um


 73%|███████▎  | 62187/85204 [14:46<15:27, 24.81it/s]/s]   | 20735/102026 [00:40<06:59, 193.70it/s]



Loading Sprectra at resolution 1e-05: 100%|██████████| 84845/84845 [01:23<00:00, 1011.83it/s]3it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 102026/102026 [02:32<00:00, 671.20it/s]
 28%|██▊       | 27909/101184 [08:42<26:59, 45.24it/s]     | 10984/84845 [00:29<02:44, 448.29it/s]

Loading files : /data/lipidatlas/data/data_raw/BRAIN1/20210504_MouseBrainCMC_S32_3_2Dpixelmode_298x230_Att30_25um/20210504_MouseBrainCMC_S32_3_2Dpixelmode_298x230_Att30_25um


Loading the m/z values at resolution 1e-05:  13%|█▎        | 11030/84845 [00:30<07:52, 156.16it/s]



Loading Sprectra at resolution 1e-05: 100%|██████████| 68540/68540 [01:09<00:00, 984.62it/s] it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 84845/84845 [02:40<00:00, 528.43it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 68540/68540 [02:09<00:00, 528.03it/s]
 73%|███████▎  | 77831/107256 [22:05<08:02, 60.98it/s]
 51%|█████     | 48487/95160 [10:44<09:54, 78.52it/s]]

Creating and sorting dataframes

  7%|▋         | 6105/84845 [01:51<17:45, 73.88it/s]




100%|██████████| 93600/93600 [25:25<00:00, 61.34it/s]]
 15%|█▌        | 12873/84845 [03:24<16:08, 74.29it/s]]

Creating and sorting dataframes


100%|██████████| 84320/84320 [21:20<00:00, 65.84it/s]]
100%|██████████| 107256/107256 [31:45<00:00, 56.29it/s] 
100%|██████████| 95160/95160 [20:50<00:00, 76.13it/s]]
 95%|█████████▍| 86535/91410 [18:48<01:06, 73.73it/s]]

Creating and sorting dataframes


 60%|█████▉    | 50487/84845 [13:00<10:28, 54.71it/s]]

Creating and sorting dataframes


100%|██████████| 91410/91410 [19:51<00:00, 76.74it/s]]
 65%|██████▌   | 55469/84845 [14:30<06:43, 72.89it/s]]

Creating and sorting dataframes


100%|██████████| 68540/68540 [15:52<00:00, 71.95it/s]]
 84%|████████▍ | 71163/84845 [18:24<03:03, 74.42it/s]]

Creating and sorting dataframes


100%|██████████| 84845/84845 [21:44<00:00, 65.06it/s]s]
100%|██████████| 102026/102026 [24:34<00:00, 69.21it/s]


Creating and sorting dataframes


### Remove slices already processed

In [None]:
if False:
    path_brain_temp = (
        "/data/lipidatlas/data/app/data/temp/brain_1"
        if brain_1
        else "/data/lipidatlas/data/app/data/temp/brain_2"
    )
    existing_names = [
        int(name.split("_")[1][:-4]) for name in os.listdir(path_brain_temp) if "raw" not in name
    ]
    l_t_names = [x for x in l_t_names if x[0] not in existing_names]
    # Print the final list of names
    for t_names in l_t_names:
        print(t_names[0], t_names[1].split("/")[-1])


### Process raw data into numpy arrays with multiprocessing

In [None]:
multiprocessing = True
if multiprocessing:
    with Pool(processes=16) as pool:
        [x for x in pool.imap_unordered(maldi_conversion.process_raw_data, l_t_names)]
else:
    # Normal (single-processed) map
    [x for x in map(maldi_conversion.process_raw_data, l_t_names)]


### Build lookup tables

In [None]:
multiprocessing = True
if multiprocessing:
    # Multiprocessing
    with Pool(processes=16) as pool:
        [x for x in pool.map(lookup_tables.process_lookup_tables, l_t_names)]
else:
    # Normal (single-processed) map
    [x for x in map(lookup_tables.process_lookup_tables, l_t_names)]


### Record everything and clean 

Record everything in memap files and a pickled dictonnary

In [None]:
if maldi_conversion.SAMPLE_APP:
    output_folder = "data_sample/whole_dataset/"
else :
    output_folder = "data/whole_dataset/"  

os.makedirs(output_folder, exist_ok=True)

dic_slices = {}
# Loop over input folders
for brain_1, input_folder in zip(
    [True, False],
    [
        "/data/lipidatlas/data/app/data/temp/brain_1/",
        "/data/lipidatlas/data/app/data/temp/brain_2/",
    ],
):

    # Loop over slice files
    for slice_name in os.listdir(input_folder):
        if "raw" in slice_name or "checkpoints" in slice_name:
            continue

        # Extract slice index
        slice_index = int(slice_name.split("_")[1][:-4])

        # Load slice arrays
        npzfile = np.load(input_folder + slice_name)
        array_pixel_indexes_high_res = npzfile["array_pixel_indexes_high_res"]
        array_spectra_high_res = npzfile["array_spectra_high_res"]
        array_averaged_mz_intensity_low_res = npzfile["array_averaged_mz_intensity_low_res"]
        array_averaged_mz_intensity_high_res = npzfile["array_averaged_mz_intensity_high_res"]
        array_averaged_mz_intensity_high_res_after_standardization = npzfile[
            "array_averaged_mz_intensity_high_res_after_standardization"
        ]
        image_shape = npzfile["image_shape"]
        divider_lookup = npzfile["divider_lookup"]
        lookup_table_spectra_high_res = npzfile["lookup_table_spectra_high_res"]
        cumulated_image_lookup_table_high_res = npzfile["cumulated_image_lookup_table_high_res"]
        lookup_table_averaged_spectrum_high_res = npzfile["lookup_table_averaged_spectrum_high_res"]
        array_peaks_corrected = npzfile["array_peaks_corrected"]
        array_corrective_factors = npzfile["array_corrective_factors"]

        # print size used by each array in mb
        print("array_pixel_indexes_high_res, dic",round(array_pixel_indexes_high_res.nbytes / 1024 / 1024, 2))
        print("array_spectra_high_res, mmap",round(array_spectra_high_res.nbytes / 1024 / 1024, 2))
        print("array_averaged_mz_intensity_low_res, dic",round(array_averaged_mz_intensity_low_res.nbytes / 1024 / 1024, 2))
        print("array_averaged_mz_intensity_high_res, mmap",round(array_averaged_mz_intensity_high_res.nbytes / 1024 / 1024, 2))
        print("array_averaged_mz_intensity_high_res_after_standardization, mmap",round(array_averaged_mz_intensity_high_res_after_standardization.nbytes / 1024 / 1024, 2))
        print("lookup_table_spectra_high_res, mmap",round(lookup_table_spectra_high_res.nbytes / 1024 / 1024, 2))
        print("cumulated_image_lookup_table_high_res, mmap",round(cumulated_image_lookup_table_high_res.nbytes / 1024 / 1024, 2))
        print("lookup_table_averaged_spectrum_high_res, dic",round(lookup_table_averaged_spectrum_high_res.nbytes / 1024 / 1024, 2))
        print("array_peaks_corrected, dic",round(array_peaks_corrected.nbytes / 1024 / 1024, 2))
        print("array_corrective_factors, dic",round(array_corrective_factors.nbytes / 1024 / 1024, 2))

        # Update slice index for brain 2
        if not brain_1:
            slice_index += 22

        print(slice_name)

        if not maldi_conversion.SAMPLE_APP:
            # Register the lightweights files in a pickled dictionnary
            dic_slices[slice_index] = {
                "image_shape": image_shape,
                "divider_lookup": divider_lookup,
                "array_avg_spectrum_downsampled": array_averaged_mz_intensity_low_res,
                "array_lookup_pixels": array_pixel_indexes_high_res,
                "array_lookup_mz_avg": lookup_table_averaged_spectrum_high_res,
                "array_peaks_transformed_lipids": array_peaks_corrected,
            }

            try:
                # Build a memap for each of the heavier files to save RAM, save the corresponding shape in the
                # pickled dictionnary
                fp = np.memmap(
                    output_folder + "array_spectra_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_spectra_high_res.shape,
                )
                fp[:] = array_spectra_high_res[:]
                fp.flush()
                dic_slices[slice_index]["array_spectra_shape"] = array_spectra_high_res.shape

                fp = np.memmap(
                    output_folder + "array_avg_spectrum_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_averaged_mz_intensity_high_res.shape,
                )
                fp[:] = array_averaged_mz_intensity_high_res[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_avg_spectrum_shape"
                ] = array_averaged_mz_intensity_high_res.shape

                fp = np.memmap(
                    output_folder
                    + "array_avg_spectrum_after_standardization_"
                    + str(slice_index)
                    + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_averaged_mz_intensity_high_res_after_standardization.shape,
                )
                fp[:] = array_averaged_mz_intensity_high_res_after_standardization[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_avg_spectrum_after_standardization_shape"
                ] = array_averaged_mz_intensity_high_res_after_standardization.shape

                fp = np.memmap(
                    output_folder + "array_lookup_mz_" + str(slice_index) + ".mmap",
                    dtype="int32",
                    mode="w+",
                    shape=lookup_table_spectra_high_res.shape,
                )
                fp[:] = lookup_table_spectra_high_res[:]
                fp.flush()
                dic_slices[slice_index]["array_lookup_mz_shape"] = lookup_table_spectra_high_res.shape

                fp = np.memmap(
                    output_folder + "array_cumulated_lookup_mz_image_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=cumulated_image_lookup_table_high_res.shape,
                )
                fp[:] = cumulated_image_lookup_table_high_res[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_cumulated_lookup_mz_image_shape"
                ] = cumulated_image_lookup_table_high_res.shape

                fp = np.memmap(
                    output_folder + "array_corrective_factors_" + str(slice_index) + ".mmap",
                    dtype="float32",
                    mode="w+",
                    shape=array_corrective_factors.shape,
                )
                fp[:] = array_corrective_factors[:]
                fp.flush()
                dic_slices[slice_index][
                    "array_corrective_factors_shape"
                ] = array_corrective_factors.shape

            except Exception as e:
                print(e)

        else:
            # Register all files in a pickled dictionnary
            dic_slices[slice_index] = {
                "image_shape": image_shape,
                "divider_lookup": divider_lookup,
                "array_avg_spectrum_downsampled": array_averaged_mz_intensity_low_res,
                "array_lookup_pixels": array_pixel_indexes_high_res,
                "array_lookup_mz_avg": lookup_table_averaged_spectrum_high_res,
                "array_peaks_transformed_lipids": array_peaks_corrected,
                "array_spectra": array_spectra_high_res,
                "array_avg_spectrum": array_averaged_mz_intensity_high_res,
                "array_avg_spectrum_after_standardization": array_averaged_mz_intensity_high_res_after_standardization,
                "array_lookup_mz": lookup_table_spectra_high_res,
                "array_cumulated_lookup_mz_image": cumulated_image_lookup_table_high_res,
                "array_corrective_factors": array_corrective_factors,
            }

    # ! Careful, this break statement will have to be deleted when brain 2 is added
    break

if not maldi_conversion.SAMPLE_APP:
    # Pickle the dict of lightweight data
    with open(output_folder + "light_arrays.pickle", "wb") as handle:
        pickle.dump(dic_slices, handle)
else:
    with lzma.open(output_folder + "light_arrays.pickle", "wb") as handle:
        pickle.dump(dic_slices, handle)
print("Done")


Clean temporary folder

In [None]:
clean = False
if clean:
    delete_all_files_in_folder(input_folder)
