# Notebook 1
Raw data export into np.arrays stored in a HDF5 file.

### Load important modules

In [1]:
# Standard imports
import sys
import numpy as np
import pandas as pd
from numba import jit
import os
import shutil
import tables
import pickle

# Move to root directory for easier module import
os.chdir("../../")
from notebooks.server.modules.maldi_conversion import process_raw_data
from notebooks.server.modules.lookup_tables import process_lookup_tables

# multithreading/multiprocessing
from multiprocessing import Pool
from threadpoolctl import threadpool_limits

# set thread limit
threadpool_limits(16)


<threadpoolctl.threadpool_limits at 0x7f024c71b430>

### Create a list of raw data filenames

In [2]:
# Load filenames
l_t_names = sorted(
    [
        [
            int(name.split("MouseBrainCMC_S")[1].split("_")[0].split("A")[0].split("(")[0]),
            "/data/lipidatlas/data/data_raw/" + name + "/" + name,
        ]
        for name in os.listdir("/data/lipidatlas/data/data_raw/")
        if "MouseBrain" in name
    ]
)

# Correct for duplicates
for t_names_1, t_names_2 in zip(l_t_names[:-1], l_t_names[1:]):
    if t_names_2[0] == t_names_1[0]:
        t_names_2.append("bis")
        print("WARNING: duplicate for slice " + str(t_names_1[0]))

# Remove slices that have already been processed
path = "notebooks/server/data/temp/"
os.makedirs(path, exist_ok=True)
remove_already_loaded = False
if remove_already_loaded:
    existing_names = [int(name.split("_")[1]) for name in os.listdir(path) if "slice" in name]
    l_t_names = [x for x in l_t_names if x[0] not in existing_names]

# Print the final list of names
for t_names in l_t_names:
    print(t_names[0], t_names[1].split("/")[-1])


1 20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25um
2 20210211_MouseBrainCMC_S2AB5_2Dpixelmode_370x214_Att25_25um
3 20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25um
4 20210214_MouseBrainCMC_S4AD3_2Dpixelmode_354x228_Att25_25um
5 20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25um
6 20210219_MouseBrainCMC_S6AE3_2Dpixelmode_423x282_Att25_25um
7 20210220_MouseBrainCMC_S7AF5_2Dpixelmode_427x263_Att25_25um
8 20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um
9 20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25um
10 20210210_MouseBrainCMC_S10(brain2_20)_394x282_Att30_25um
11 20210301_MouseBrainCMC_S11AK5_2Dpixelmode_448x277_Att25_25um
12 20210303_MouseBrainCMC_S12AL1_2Dpixelmode_393x266_Att25_25um
13 20210304_MouseBrainCMC_S13AM1_2Dpixelmode_413x310_Att25_25um
14 20210305_MouseBrainCMC_S14AN1_2Dpixelmode_409x285_Att25_25um
15 20210313_MouseBrainCMC_S15AO2_2Dpixelmode_451x292_Att25_25um
16 20210530_MouseBrainCMC_S16_duplicate_2Dpixelmode_454

### Process raw data into numpy arrays with multiprocessing

In [3]:
multiprocessing = True
if multiprocessing:
    # Multiprocessing
    with Pool(processes=12) as pool:
        [x for x in pool.imap_unordered(process_raw_data, l_t_names)]
else:
    # Normal (single-processed) map
    [x for x in map(process_raw_data, l_t_names[3:4])]


Loading files : /data/lipidatlas/data/data_raw/20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25um/20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25umLoading files : /data/lipidatlas/data/data_raw/20210220_MouseBrainCMC_S7AF5_2Dpixelmode_427x263_Att25_25um/20210220_MouseBrainCMC_S7AF5_2Dpixelmode_427x263_Att25_25umLoading files : /data/lipidatlas/data/data_raw/20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25um/20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25umLoading files : /data/lipidatlas/data/data_raw/20210210_MouseBrainCMC_S10(brain2_20)_394x282_Att30_25um/20210210_MouseBrainCMC_S10(brain2_20)_394x282_Att30_25umLoading files : /data/lipidatlas/data/data_raw/20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um/20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25umLoading files : /data/lipidatlas/data/data_raw/20210211_MouseBrainCMC_S2AB5_2Dpixelmode_370x214_Att25_25um/20210211_MouseBrainCMC_S2AB5_2Dpixelmode_370x214_Att25_





Loading Sprectra at resolution 1e-05:   0%|          | 0/107712 [00:00<?, ?it/s]



Loading Sprectra at resolution 1e-05:   0%|          | 0/104538 [00:00<?, ?it/s]



Loading Sprectra at resolution 1e-05:   0%|          | 0/112301 [00:00<?, ?it/s]




Loading Sprectra at resolution 1e-05:   0%|          | 0/111108 [00:00<?, ?it/s]



Loading Sprectra at resolution 1e-05:   0%|          | 1/74382 [00:00<13:58:23,  1.48it/s]




Loading Sprectra at resolution 1e-05:   0%|          | 236/74382 [00:00<03:00, 410.97it/s]



Loading Sprectra at resolution 1e-05: 100%|██████████| 72345/72345 [00:35<00:00, 2054.78it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 80712/80712 [00:36<00:00, 2188.78it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 74382/74382 [00:38<00:00, 1951.70it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 79180/79180 [00:40<00:00, 1933.44it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 107712/107712 [00:46<00:00, 2304.97it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 104538/104538 [00:51<00:00, 2031.85it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 122550/122550 [00:54<00:00, 2263.47it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 111108/111108 [00:55<00:00, 2015.24it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 119286/119286 [01:02<00:00, 1894.87it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 112301/112301 [01:05<00:00, 1723.29it/s]
Loading Sprectra at resolution 1e-05: 100%|█████

Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Creating and sorting dataframes
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25um/20210210_MouseBrainCMC_S1AA1_2Dpixelmode_322x231_Att25_25um
Creating and sorting dataframes
Loading files : /data/lipidatlas/data/data_raw/20210304_MouseBrainCMC_S13AM1_2Dpixelmode_413x310_Att25_25um/20210304_MouseBrainCMC_S13AM1_2Dpixelmode_413x310_Att25_25um


Loading Sprectra at resolution 1e-05:  75%|███████▌  | 96442/128030 [00:51<00:15, 2030.39it/s]

Creating and sorting dataframes


Loading Sprectra at resolution 1e-05: 100%|██████████| 128030/128030 [01:08<00:00, 1855.98it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 128030/128030 [01:26<00:00, 1481.09it/s]


Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210211_MouseBrainCMC_S2AB5_2Dpixelmode_370x214_Att25_25um/20210211_MouseBrainCMC_S2AB5_2Dpixelmode_370x214_Att25_25um
Loading files : /data/lipidatlas/data/data_raw/20210305_MouseBrainCMC_S14AN1_2Dpixelmode_409x285_Att25_25um/20210305_MouseBrainCMC_S14AN1_2Dpixelmode_409x285_Att25_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 116565/116565 [01:03<00:00, 1848.49it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 116565/116565 [01:19<00:00, 1473.56it/s]


Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25um/20210213_MouseBrainCMC_S3AC4_2Dpixelmode_371x195_Att25_25um
Loading files : /data/lipidatlas/data/data_raw/20210313_MouseBrainCMC_S15AO2_2Dpixelmode_451x292_Att25_25um/20210313_MouseBrainCMC_S15AO2_2Dpixelmode_451x292_Att25_25um


Loading Sprectra at resolution 1e-05:  31%|███       | 40190/131692 [00:21<01:05, 1392.23it/s]

Getting spectrums array averaged accross pixels


Loading Sprectra at resolution 1e-05:  35%|███▍      | 45833/131692 [00:24<00:45, 1900.92it/s]

Build the low-resolution averaged array from the high resolution averaged array


Loading Sprectra at resolution 1e-05:  35%|███▌      | 46617/131692 [00:24<00:44, 1904.01it/s]

Double sorting high-res array


Loading Sprectra at resolution 1e-05:  70%|███████   | 92693/131692 [00:50<00:29, 1331.72it/s]

Getting corresponding spectra arrays


Loading Sprectra at resolution 1e-05:  71%|███████▏  | 94147/131692 [00:51<00:21, 1784.95it/s]

Saving : /data/lipidatlas/data/data_raw/20210214_MouseBrainCMC_S4AD3_2Dpixelmode_354x228_Att25_25um/20210214_MouseBrainCMC_S4AD3_2Dpixelmode_354x228_Att25_25um


Loading Sprectra at resolution 1e-05:  77%|███████▋  | 101926/131692 [00:56<00:16, 1830.92it/s]

Loading files : /data/lipidatlas/data/data_raw/20210530_MouseBrainCMC_S16_duplicate_2Dpixelmode_454x295_Att30_25um/20210530_MouseBrainCMC_S16_duplicate_2Dpixelmode_454x295_Att30_25um


Loading Sprectra at resolution 1e-05:  78%|███████▊  | 102295/131692 [00:56<00:16, 1805.93it/s]



Loading Sprectra at resolution 1e-05: 100%|██████████| 131692/131692 [01:16<00:00, 1727.50it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 133930/133930 [01:22<00:00, 1627.04it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 131692/131692 [01:53<00:00, 1164.84it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 133930/133930 [01:43<00:00, 1300.13it/s]


Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210303_MouseBrainCMC_S12AL1_2Dpixelmode_393x266_Att25_25um/20210303_MouseBrainCMC_S12AL1_2Dpixelmode_393x266_Att25_25um
Loading files : /data/lipidatlas/data/data_raw/20210319_MouseBrainCMC_S17AQ2_2Dpixelmode_450x287_Att30_25um/20210319_MouseBrainCMC_S17AQ2_2Dpixelmode_450x287_Att30_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 129150/129150 [01:15<00:00, 1700.60it/s]
Loading the m/z values at resolution 1e-05:  73%|███████▎  | 94548/129150 [01:05<00:24, 1428.08it/s]

Getting spectrums array averaged accross pixels


Loading the m/z values at resolution 1e-05:  78%|███████▊  | 100814/129150 [01:10<00:20, 1395.25it/s]

Build the low-resolution averaged array from the high resolution averaged array


Loading the m/z values at resolution 1e-05:  78%|███████▊  | 101270/129150 [01:10<00:19, 1396.36it/s]

Double sorting high-res array


Loading the m/z values at resolution 1e-05: 100%|██████████| 129150/129150 [01:38<00:00, 1310.95it/s]


Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25um/20210218_MouseBrainCMC_S5AE3_2Dpixelmode_396x272_Att25_25um
Loading files : /data/lipidatlas/data/data_raw/20210323_MouseBrainCMC_S18AR4_2Dpixelmode_474x291_Att30_25um/20210323_MouseBrainCMC_S18AR4_2Dpixelmode_474x291_Att30_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 137934/137934 [01:08<00:00, 2023.79it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 137934/137934 [01:25<00:00, 1609.33it/s]


Creating and sorting dataframes
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210210_MouseBrainCMC_S10(brain2_20)_394x282_Att30_25um/20210210_MouseBrainCMC_S10(brain2_20)_394x282_Att30_25um
Loading files : /data/lipidatlas/data/data_raw/20210325_MouseBrainCMC_S19AS4_2Dpixelmode_396x232_Att30_25um/20210325_MouseBrainCMC_S19AS4_2Dpixelmode_396x232_Att30_25um


Loading Sprectra at resolution 1e-05:  34%|███▎      | 30833/91872 [00:15<00:30, 1994.99it/s]

Creating and sorting dataframes


Loading Sprectra at resolution 1e-05:  58%|█████▊    | 53266/91872 [00:26<00:18, 2081.33it/s]

Getting corresponding spectra arrays


Loading Sprectra at resolution 1e-05:  65%|██████▍   | 59648/91872 [00:30<00:18, 1775.24it/s]

Saving : /data/lipidatlas/data/data_raw/20210220_MouseBrainCMC_S7AF5_2Dpixelmode_427x263_Att25_25um/20210220_MouseBrainCMC_S7AF5_2Dpixelmode_427x263_Att25_25um


Loading Sprectra at resolution 1e-05:  84%|████████▍ | 77532/91872 [00:38<00:06, 2169.13it/s]

Loading files : /data/lipidatlas/data/data_raw/20210330_MouseBrainCMC_S20AT3_2Dpixelmode_396x266_Att30_25um/20210330_MouseBrainCMC_S20AT3_2Dpixelmode_396x266_Att30_25um


Loading Sprectra at resolution 1e-05:  85%|████████▍ | 77981/91872 [00:38<00:06, 2202.73it/s]



Loading Sprectra at resolution 1e-05: 100%|██████████| 91872/91872 [00:45<00:00, 2005.81it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 105336/105336 [00:54<00:00, 1931.26it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 91872/91872 [00:54<00:00, 1701.30it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 105336/105336 [01:02<00:00, 1689.87it/s]


Getting spectrums array averaged accross pixels




Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210219_MouseBrainCMC_S6AE3_2Dpixelmode_423x282_Att25_25um/20210219_MouseBrainCMC_S6AE3_2Dpixelmode_423x282_Att25_25um
Loading files : /data/lipidatlas/data/data_raw/20210408_MouseBrainCMC_S21AU4_2Dpixelmode_394x215_Att30_25um/20210408_MouseBrainCMC_S21AU4_2Dpixelmode_394x215_Att30_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 84710/84710 [00:43<00:00, 1954.11it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 84710/84710 [00:52<00:00, 1627.07it/s]


Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um/20210531_MouseBrainCMC_S8_duplicate_2Dpixelmode_430x285_Att30_25um
Loading files : /data/lipidatlas/data/data_raw/20210409_MouseBrainCMC_S22AV1_2Dpixelmode_416x207_Att30_25um/20210409_MouseBrainCMC_S22AV1_2Dpixelmode_416x207_Att30_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 86112/86112 [01:00<00:00, 1420.60it/s]
Loading the m/z values at resolution 1e-05:  18%|█▊        | 15817/86112 [00:16<01:10, 1000.24it/s]

Creating and sorting dataframes


Loading the m/z values at resolution 1e-05:  66%|██████▌   | 56973/86112 [01:00<00:27, 1043.99it/s]

Getting spectrums array averaged accross pixels


Loading the m/z values at resolution 1e-05:  90%|█████████ | 77680/86112 [01:26<00:13, 625.62it/s]

Build the low-resolution averaged array from the high resolution averaged array


Loading the m/z values at resolution 1e-05:  91%|█████████ | 78034/86112 [01:27<00:15, 514.08it/s]

Double sorting high-res array


Loading the m/z values at resolution 1e-05: 100%|██████████| 86112/86112 [01:39<00:00, 868.96it/s]


Creating and sorting dataframes
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Getting corresponding spectra arrays
Double sorting high-res array
Saving : /data/lipidatlas/data/data_raw/20210301_MouseBrainCMC_S11AK5_2Dpixelmode_448x277_Att25_25um/20210301_MouseBrainCMC_S11AK5_2Dpixelmode_448x277_Att25_25um
Loading files : /data/lipidatlas/data/data_raw/20210412_MouseBrainCMC_S23AZ1_2Dpixelmode_360x260_Att30_25um/20210412_MouseBrainCMC_S23AZ1_2Dpixelmode_360x260_Att30_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 93600/93600 [00:53<00:00, 1746.19it/s]
Loading the m/z values at resolution 1e-05:  87%|████████▋ | 81152/93600 [00:58<00:12, 963.93it/s]

Getting corresponding spectra arrays


Loading the m/z values at resolution 1e-05:  92%|█████████▏| 86478/93600 [01:02<00:04, 1678.28it/s]

Saving : /data/lipidatlas/data/data_raw/20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25um/20210224_MouseBrainCMC_S9AH4_2Dpixelmode_467x278_Att25_25um


Loading the m/z values at resolution 1e-05: 100%|██████████| 93600/93600 [01:06<00:00, 1403.33it/s]


Loading files : /data/lipidatlas/data/data_raw/20210413_MouseBrainCMC_S24_3_2Dpixelmode_327x328_Att30_25um/20210413_MouseBrainCMC_S24_3_2Dpixelmode_327x328_Att30_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 107256/107256 [00:54<00:00, 1965.28it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 107256/107256 [01:06<00:00, 1619.46it/s]


Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Creating and sorting dataframes
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210305_MouseBrainCMC_S14AN1_2Dpixelmode_409x285_Att25_25um/20210305_MouseBrainCMC_S14AN1_2Dpixelmode_409x285_Att25_25um
Loading files : /data/lipidatlas/data/data_raw/20210414_MouseBrainCMC_S25_2_2Dpixelmode_358x238_Att30_25um/20210414_MouseBrainCMC_S25_2_2Dpixelmode_358x238_Att30_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 85204/85204 [00:45<00:00, 1861.43it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 85204/85204 [01:00<00:00, 1414.60it/s]


Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array




Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210304_MouseBrainCMC_S13AM1_2Dpixelmode_413x310_Att25_25um/20210304_MouseBrainCMC_S13AM1_2Dpixelmode_413x310_Att25_25um
Loading files : /data/lipidatlas/data/data_raw/20210419_MouseBrainCMC_S26_3_2Dpixelmode_340x248_Att30_25um/20210419_MouseBrainCMC_S26_3_2Dpixelmode_340x248_Att30_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 84320/84320 [00:53<00:00, 1572.12it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 84320/84320 [01:09<00:00, 1216.25it/s]


Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210325_MouseBrainCMC_S19AS4_2Dpixelmode_396x232_Att30_25um/20210325_MouseBrainCMC_S19AS4_2Dpixelmode_396x232_Att30_25um
Loading files : /data/lipidatlas/data/data_raw/20210603_MouseBrainCMC_S27_duplicate_2Dpixelmode_372x272_Att30_25um/20210603_MouseBrainCMC_S27_duplicate_2Dpixelmode_372x272_Att30_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 101184/101184 [01:04<00:00, 1570.09it/s]
Loading the m/z values at resolution 1e-05:  43%|████▎     | 43521/101184 [00:33<00:47, 1203.40it/s]

Getting spectrums array averaged accross pixels


Loading the m/z values at resolution 1e-05:  44%|████▍     | 44759/101184 [00:35<00:47, 1192.99it/s]

Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array


Loading the m/z values at resolution 1e-05:  57%|█████▋    | 57513/101184 [00:46<00:38, 1123.96it/s]

Getting corresponding spectra arrays


Loading the m/z values at resolution 1e-05:  57%|█████▋    | 57869/101184 [00:46<00:38, 1120.86it/s]

Saving : /data/lipidatlas/data/data_raw/20210408_MouseBrainCMC_S21AU4_2Dpixelmode_394x215_Att30_25um/20210408_MouseBrainCMC_S21AU4_2Dpixelmode_394x215_Att30_25um


Loading the m/z values at resolution 1e-05:  59%|█████▉    | 60184/101184 [00:48<00:36, 1120.75it/s]

Loading files : /data/lipidatlas/data/data_raw/20210423_MouseBrainCMC_S28_3_2Dpixelmode_390x244_Att30_25um/20210423_MouseBrainCMC_S28_3_2Dpixelmode_390x244_Att30_25um


Loading the m/z values at resolution 1e-05:  60%|█████▉    | 60326/101184 [00:48<00:33, 1203.20it/s]



Loading the m/z values at resolution 1e-05: 100%|██████████| 101184/101184 [01:34<00:00, 1070.51it/s]
Loading Sprectra at resolution 1e-05: 100%|██████████| 95160/95160 [00:49<00:00, 1906.29it/s]
Loading the m/z values at resolution 1e-05:  73%|███████▎  | 69349/95160 [00:41<00:14, 1792.34it/s]

Getting spectrums array averaged accross pixels


Loading the m/z values at resolution 1e-05:  76%|███████▋  | 72695/95160 [00:43<00:12, 1756.29it/s]

Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array


Loading the m/z values at resolution 1e-05: 100%|██████████| 95160/95160 [00:59<00:00, 1608.77it/s]


Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210330_MouseBrainCMC_S20AT3_2Dpixelmode_396x266_Att30_25um/20210330_MouseBrainCMC_S20AT3_2Dpixelmode_396x266_Att30_25um
Loading files : /data/lipidatlas/data/data_raw/20210424_MouseBrainCMC_S29_5_2Dpixelmode_330x277_Att30_25um/20210424_MouseBrainCMC_S29_5_2Dpixelmode_330x277_Att30_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 91410/91410 [00:56<00:00, 1622.00it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 91410/91410 [01:13<00:00, 1240.60it/s]


Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Getting spectrums array averaged accross pixels
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210313_MouseBrainCMC_S15AO2_2Dpixelmode_451x292_Att25_25um/20210313_MouseBrainCMC_S15AO2_2Dpixelmode_451x292_Att25_25um
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Loading files : /data/lipidatlas/data/data_raw/20210429_MouseBrainCMC_S30_5_2Dpixelmode_367x278_Att30_25um/20210429_MouseBrainCMC_S30_5_2Dpixelmode_367x278_Att30_25um


Loading Sprectra at resolution 1e-05:  98%|█████████▊| 99775/102026 [01:18<00:01, 1131.78it/s]

Getting corresponding spectra arrays


Loading Sprectra at resolution 1e-05: 100%|██████████| 102026/102026 [01:20<00:00, 1268.10it/s]
Loading the m/z values at resolution 1e-05:   1%|▏         | 1447/102026 [00:01<02:07, 791.16it/s]

Saving : /data/lipidatlas/data/data_raw/20210409_MouseBrainCMC_S22AV1_2Dpixelmode_416x207_Att30_25um/20210409_MouseBrainCMC_S22AV1_2Dpixelmode_416x207_Att30_25um


Loading the m/z values at resolution 1e-05:   7%|▋         | 7156/102026 [00:10<02:32, 622.26it/s]

Loading files : /data/lipidatlas/data/data_raw/20210501_MouseBrainCMC_S31_3_2Dpixelmode_355x239_Att30_25um/20210501_MouseBrainCMC_S31_3_2Dpixelmode_355x239_Att30_25um


Loading the m/z values at resolution 1e-05:   7%|▋         | 7234/102026 [00:10<02:22, 666.81it/s]



Loading Sprectra at resolution 1e-05: 100%|██████████| 84845/84845 [01:00<00:00, 1402.56it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 102026/102026 [02:08<00:00, 796.88it/s]
Loading the m/z values at resolution 1e-05:  64%|██████▍   | 54236/84845 [01:00<00:30, 1000.73it/s]

Getting spectrums array averaged accross pixels


Loading the m/z values at resolution 1e-05:  72%|███████▏  | 60934/84845 [01:06<00:21, 1098.40it/s]

Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array


Loading the m/z values at resolution 1e-05:  98%|█████████▊| 82763/84845 [01:29<00:02, 772.15it/s]

Creating and sorting dataframes


Loading the m/z values at resolution 1e-05: 100%|██████████| 84845/84845 [01:32<00:00, 918.27it/s]


Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210530_MouseBrainCMC_S16_duplicate_2Dpixelmode_454x295_Att30_25um/20210530_MouseBrainCMC_S16_duplicate_2Dpixelmode_454x295_Att30_25um
Loading files : /data/lipidatlas/data/data_raw/20210504_MouseBrainCMC_S32_3_2Dpixelmode_298x230_Att30_25um/20210504_MouseBrainCMC_S32_3_2Dpixelmode_298x230_Att30_25um


Loading Sprectra at resolution 1e-05: 100%|██████████| 68540/68540 [00:48<00:00, 1411.39it/s]
Loading the m/z values at resolution 1e-05: 100%|██████████| 68540/68540 [01:14<00:00, 915.56it/s]


Creating and sorting dataframes
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210412_MouseBrainCMC_S23AZ1_2Dpixelmode_360x260_Att30_25um/20210412_MouseBrainCMC_S23AZ1_2Dpixelmode_360x260_Att30_25um
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array
Creating and sorting dataframes
Getting corresponding spectra arrays
Saving : /data/lipidatlas/data/data_raw/20210319_MouseBrainCMC_S17AQ2_2Dpixelmode_450x287_Att30_25um/20210319_MouseBrainCMC_S17AQ2_2Dpixelmode_450x287_Att30_25um
Creating and sorting dataframes
Creating and sorting dataframes
Getting spectrums array averaged accross pixels
Build the low-resolution averaged array from the high resolution averaged array
Double sorting high-res array


### Build lookup tables

In [3]:
multiprocessing = True
if multiprocessing:
    # Multiprocessing
    with Pool(processes=12) as pool:
        [x for x in pool.map(process_lookup_tables, l_t_names)]
else:
    # Normal (single-processed) map
    [x for x in map(process_lookup_tables, l_t_names)]



Size (in mb) of lookup_table_spectra_high_res:  61.58
Shape of lookup_table_spectra_high_res:  (200, 80712)
Size (in mb) of lookup_table_spectra_high_res:  56.75
Shape of lookup_table_spectra_high_res:  (200, 74382)
Size (in mb) of lookup_table_spectra_high_res:  55.19
Shape of lookup_table_spectra_high_res:  (200, 72345)
Size (in mb) of cumulated_image_lookup_table_high_res:  56.75
Shape of cumulated_image_lookup_table_high_res:  (200, 231, 322)
Size (in mb) of cumulated_image_lookup_table_high_res:  61.58
Shape of cumulated_image_lookup_table_high_res:  (200, 228, 354)
Size (in mb) of lookup_table_spectra_high_res:  60.41
Shape of lookup_table_spectra_high_res:  (200, 79180)
Size (in mb) of lookup_table_averaged_spectrum_high_res:  0.01
Shape of lookup_table_averaged_spectrum_high_res:  (2000,)
Size (in mb) of lookup_table_averaged_spectrum_high_res:  0.01
Shape of lookup_table_averaged_spectrum_high_res:  (2000,)
Size (in mb) of cumulated_image_lookup_table_high_res:  55.19
Shape of

### Record everything as a HDF5 file and clean 

Record everything in a HDF5 file

In [4]:
output_folder = "lbae/data/whole_dataset/"
input_folder = "notebooks/server/data/temp/"
os.makedirs(output_folder, exist_ok=True)

dic_slices = {}
# Open the hdf5 file
with tables.open_file(output_folder + 'slices.hdf5', mode="w") as hdf5_file:
    # Loop over slice files
    for slice_name in os.listdir(input_folder):

        # Load slice arrays
        npzfile = np.load(input_folder + slice_name)
        array_pixel_indexes_high_res = npzfile["array_pixel_indexes_high_res"]
        array_spectra_high_res = npzfile["array_spectra_high_res"]
        array_averaged_mz_intensity_low_res = npzfile["array_averaged_mz_intensity_low_res"]
        array_averaged_mz_intensity_high_res = npzfile["array_averaged_mz_intensity_high_res"]
        image_shape = npzfile["image_shape"]
        divider_lookup = npzfile["divider_lookup"]
        lookup_table_spectra_high_res = npzfile["lookup_table_spectra_high_res"]
        cumulated_image_lookup_table_high_res = npzfile["cumulated_image_lookup_table_high_res"]
        lookup_table_averaged_spectrum_high_res = npzfile["lookup_table_averaged_spectrum_high_res"]

        # Create a new group in the hdf5 file
        slice_index = slice_name.split("_")[1]
        group = hdf5_file.create_group("/", "s" + slice_index, "Slice " + slice_index)

        # Register heavy arrays and lookup table in the newly created group
        hdf5_file.create_array(group, "array_spectra", array_spectra_high_res)
        hdf5_file.create_array(group, "array_avg_intensity", array_averaged_mz_intensity_high_res)
        hdf5_file.create_array(group, "array_lookup_mz", lookup_table_spectra_high_res)
        hdf5_file.create_array(group, "array_cumulated_lookup_mz_image", cumulated_image_lookup_table_high_res)

        # Register the rest in a pickle dictionnary
        dic_slices["s" + slice_index] = {
            "image_shape": image_shape,
            "divider_lookup": divider_lookup,
            "array_avg_intensity_downsampled": array_averaged_mz_intensity_low_res,
            "array_lookup_pixels": array_pixel_indexes_high_res,
            "array_lookup_mz_avg": lookup_table_averaged_spectrum_high_res,
        }

        # Pytables is not happy with recording very low rows (m/z values and intensities) but taking the transpose means
        # that loading just the m/z values or intensities will jump every odd/even index... Making it very slow.

with open(output_folder + 'slices.pickle', 'wb') as handle:
    pickle.dump(dic_slices, handle)

be ready to see PyTables asking for *lots* of memory and possibly slow
I/O.  You may want to reduce the rowsize by trimming the value of
dimensions that are orthogonal (and preferably close) to the *main*
dimension of this leave.  Alternatively, in case you have specified a
very small/large chunksize, you may want to increase/decrease it.
be ready to see PyTables asking for *lots* of memory and possibly slow
I/O.  You may want to reduce the rowsize by trimming the value of
dimensions that are orthogonal (and preferably close) to the *main*
dimension of this leave.  Alternatively, in case you have specified a
very small/large chunksize, you may want to increase/decrease it.
be ready to see PyTables asking for *lots* of memory and possibly slow
I/O.  You may want to reduce the rowsize by trimming the value of
dimensions that are orthogonal (and preferably close) to the *main*
dimension of this leave.  Alternatively, in case you have specified a
very small/large chunksize, you may want to

Test that everything went alright

In [10]:
with tables.open_file(output_folder + 'slices.hdf5', mode='r') as hdf5_file:
    n_slices = len(list(hdf5_file.root))
    print('Number of slices recorder: ', n_slices)
    print('Array spectra of slice 1: ', hdf5_file.root['s1'].array_spectra)
    print('List all arrays recorded in the file: ')
    for group in hdf5_file.walk_groups():
        for array in hdf5_file.list_nodes(group, classname='Array'):
            print(array)

Number of slices recorder:  32
Array spectra of slice 1:  /s1/array_spectra (Array(2, 102305653)) ''
List all arrays recorded in the file: 
/s1/array_avg_intensity (Array(2, 1021129)) ''
/s1/array_cumulated_lookup_mz_image (Array(200, 231, 322)) ''
/s1/array_lookup_mz (Array(200, 74382)) ''
/s1/array_spectra (Array(2, 102305653)) ''
/s10/array_avg_intensity (Array(2, 1898870)) ''
/s10/array_cumulated_lookup_mz_image (Array(200, 282, 394)) ''
/s10/array_lookup_mz (Array(200, 111108)) ''
/s10/array_spectra (Array(2, 151226720)) ''
/s11/array_avg_intensity (Array(2, 2262659)) ''
/s11/array_cumulated_lookup_mz_image (Array(200, 277, 448)) ''
/s11/array_lookup_mz (Array(200, 124096)) ''
/s11/array_spectra (Array(2, 216433078)) ''
/s12/array_avg_intensity (Array(2, 1793930)) ''
/s12/array_cumulated_lookup_mz_image (Array(200, 266, 393)) ''
/s12/array_lookup_mz (Array(200, 104538)) ''
/s12/array_spectra (Array(2, 146793259)) ''
/s13/array_avg_intensity (Array(2, 466769)) ''
/s13/array_cumulat

Clean temporary folder

In [5]:
clean = True
if clean:
    for filename in os.listdir(input_folder):
        file_path = os.path.join(input_folder, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))
