In [None]:
#| default_exp wrappers.alphatims_wrapper

# MSData_Base to AlphaTims

In [None]:
#| export
import numpy as np
import pandas as pd

import alphatims
from alphatims.bruker import TimsTOF

from alpharaw.ms_data_base import MSData_Base



In [None]:
#| export

class AlphaTimsWrapper(TimsTOF):
    """Create a AlphaTims object that contains 
    all data in-memory (or memory mapping).

    Parameters
    ----------
    msdata : MSData_Base
        The AlphaRaw data object.

    dda : bool
        If DDA, precursor indices will be equal to scan numbers.
        If not DDA (i.e. DIA), precursor indices will be equal to the
        scan number within a DIA cycle.
        
    slice_as_dataframe : bool
        If True, slicing returns a pd.DataFrame by default.
        If False, slicing provides a np.int64[:] with raw indices.
        This value can also be modified after creation.
        Default is True.
    """
    def __init__(
        self,
        msdata: MSData_Base,
        dda: bool,
        slice_as_dataframe: bool = True
    ):
        self._use_calibrated_mz_values_as_default = False
        self._import_alpharaw_object(msdata, dda)
        self.thermo_raw_file_name = msdata.raw_file_path
        self.bruker_d_folder_name = self.thermo_raw_file_name
        self.slice_as_dataframe = slice_as_dataframe
        # Precompile
        self[0, "raw"]

    def _import_alpharaw_object(
        self,
        msdata: MSData_Base,
        dda: bool,
    ):
        self._version = alphatims.__version__
        mz_values = msdata.peak_df.mz.values
        self._intensity_values = msdata.peak_df.intensity.values

        if (msdata.spectrum_df.peak_start_idx<0).any():
            spectrum_df = msdata.spectrum_df.query('peak_start_idx!=-1')
        else:
            spectrum_df = msdata.spectrum_df
        
        self._push_indptr = np.zeros(
            len(spectrum_df)+1, dtype=np.int64
        )
        self._push_indptr[1:] = spectrum_df.peak_end_idx.values
        self._rt_values = spectrum_df.rt.values*60
        self._quad_mz_values = spectrum_df[
            ['isolation_lower_mz','isolation_upper_mz']
        ].values
        if dda:
            self._precursor_indices = np.zeros_like(
                self._rt_values, dtype=np.int64
            )
            ms2s = spectrum_df.ms_level.values==2
            self._precursor_indices[ms2s] = np.cumsum(
                ms2s, dtype=np.int64
            )[ms2s]
        else:
            precursor_indices = []
            prev_mz = -1
            prev_idx = 0
            for mz, ms_level in spectrum_df[
                ['precursor_mz','ms_level']
            ].values:
                if ms_level == 1:
                    precursor_indices.append(0)
                elif prev_mz >= mz: # TODO if DIA mz windows are not in order
                    prev_mz = mz
                    prev_idx = 1
                    precursor_indices.append(prev_idx)
                else:
                    prev_idx += 1
                    prev_mz = mz
                    precursor_indices.append(prev_idx)
            self._precursor_indices = np.array(
                precursor_indices, dtype=np.int64
            )

        scan_count = len(self._precursor_indices)
        self._frame_max_index = scan_count
        self._scan_max_index = 1
        self._mobility_max_value = 0.0
        self._mobility_min_value = 0.0
        self._mobility_values = np.array([0.0])
        self._quad_indptr = self._push_indptr
        self._raw_quad_indptr = np.arange(scan_count + 1)
        self._intensity_min_value = float(np.min(self._intensity_values))
        self._intensity_max_value = float(np.max(self._intensity_values))
        self._intensity_corrections = np.ones(self._frame_max_index)
        self._quad_min_mz_value = float(
            np.min(
                self._quad_mz_values[self._quad_mz_values != -1]
            )
        )
        self._quad_max_mz_value = float(np.max(self._quad_mz_values))
        self._precursor_max_index = int(np.max(self._precursor_indices)) + 1
        self._acquisition_mode = msdata.file_type + ' ' + (
            "DDA" if dda else "DIA"
        ) # TODO
        self._mz_min_value = int(np.min(mz_values))
        self._mz_max_value = int(np.max(mz_values)) + 1
        self._decimals = 4
        self._mz_values = np.arange(
            10**self._decimals * self._mz_min_value,
            10**self._decimals * (self._mz_max_value + 1)
        ) / 10**self._decimals
        self._tof_indices = (
            mz_values * 10**self._decimals
        ).astype(np.int32) - 10**self._decimals * self._mz_min_value
        self._tof_max_index = len(self._mz_values)
        self._meta_data = {
            "SampleName": msdata.raw_file_path
        }
        msmstype = np.array(
            [0 if s == -1 else 1 for s, e in self._quad_mz_values]
        )
        summed_intensities_ = np.cumsum(self._intensity_values)
        summed_intensities = -summed_intensities_[self._push_indptr[:-1]]
        summed_intensities[:-1] += summed_intensities_[self._push_indptr[1:-1]]
        summed_intensities[-1] += summed_intensities_[-1]
        max_intensities = [
            np.max(self._intensity_values[
                self._push_indptr[i]:self._push_indptr[i+1]
            ]) if self._push_indptr[i+1]!=-1 and 
                  self._push_indptr[i]!=-1 and 
                  self._push_indptr[i]!=self._push_indptr[i+1] 
              else 0
            for i in range(len(self._rt_values))
        ]
        self._frames = pd.DataFrame(
            {
                'MsMsType': msmstype,
                'Time': self._rt_values,
                'SummedIntensities': summed_intensities,
                'MaxIntensity': max_intensities,
                'Id': np.arange(len(self._rt_values)),
            }
        )
        frame_numbers = np.arange(len(self._rt_values), dtype=np.int32)
        isolation_widths = self._quad_mz_values[:,1]+self._quad_mz_values[:,0]
        isolation_centers = self._quad_mz_values[:,1]-self._quad_mz_values[:,0]
        self._fragment_frames = pd.DataFrame(
            {
                "Frame": frame_numbers[msmstype==1],
                "ScanNumBegin": 0,
                "ScanNumEnd": 0,
                "IsolationWidth": isolation_widths[msmstype==1],
                "IsolationMz": isolation_centers[msmstype==1],
                "Precursor": self._precursor_indices[msmstype==1],
            }
        )
        self._zeroth_frame = False
        offset = int(self.zeroth_frame)
        cycle_index = np.searchsorted(
            self.raw_quad_indptr,
            (self.scan_max_index) * (self.precursor_max_index + offset),
            "r"
        ) + 1
        repeats = np.diff(self.raw_quad_indptr[: cycle_index])
        if self.zeroth_frame:
            repeats[0] -= self.scan_max_index
        cycle_length = self.scan_max_index * self.precursor_max_index
        repeat_length = np.sum(repeats)
        if repeat_length != cycle_length:
            repeats[-1] -= repeat_length - cycle_length
        self._dia_mz_cycle = np.empty((cycle_length, 2))
        self._dia_mz_cycle[:, 0] = np.repeat(
            self.quad_mz_values[: cycle_index - 1, 0],
            repeats
        )
        self._dia_mz_cycle[:, 1] = np.repeat(
            self.quad_mz_values[: cycle_index - 1, 1],
            repeats
        )
        self._dia_precursor_cycle = np.repeat(
            self.precursor_indices[: cycle_index - 1],
            repeats
        )

In [None]:
#| hide
import io
from alpharaw.legacy_msdata.mgf import ms_reader_provider

In [None]:
#| hide
mgf = io.StringIO("""
BEGIN IONS
TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.8.8.3.0.dta
CHARGE=3+
RTINSECONDS=3.5418930
PEPMASS=272.276336
103.92207 5457.3
104.20045 5051.4
108.70090 5891.7
113.94175 6442.6
116.92975 40506.3
116.93716 8945.5
128.37773 6427.8
131.95308 288352.6
133.93259 7344.6
138.44611 7326.1
139.00072 41556.8
140.00319 16738.8
140.99719 9493.8
145.93156 10209.3
145.94897 10497.8
147.94559 8206.3
147.96396 30552.8
148.95543 14654.7
149.96338 234207.8
150.95096 8306.0
157.01089 84638.9
158.01357 27925.7
159.00627 16084.7
163.94281 24751.1
163.95915 32203.3
165.95605 44458.0
165.97186 11530.2
166.99500 26432.2
167.97302 9216.7
181.95230 13858.8
191.95448 66152.7
192.95538 8408.9
193.07185 9092.8
193.95313 660574.9
194.95674 23452.8
194.99008 143940.9
200.00568 19510.8
200.99942 23678.7
204.30894 9406.1
209.96466 21853.6
211.96245 65351.0
218.90355 9149.6
223.91072 11300.2
238.89684 12108.8
243.93825 10150.2
243.97040 10987.7
244.94121 8744.2
246.90314 11556.3
271.93225 29430.0
271.99219 51184.4
272.19150 31960.4
272.98602 35844.1
273.94431 11031.8
284.47998 8191.3
290.00125 66212.4
290.99539 54064.7
293.89490 10005.0
407.06372 10838.2
464.36697 9715.4
698.81390 9711.7
END IONS
BEGIN IONS
TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.11.11.2.0.dta
CHARGE=2+
RTINSECONDS=6.6455220
PEPMASS=287.427959
103.34669 5304.0
104.66884 5639.7
113.42419 6258.3
118.84039 5837.5
119.93203 13977.3
130.69589 6876.2
133.94824 43094.3
134.30524 7671.5
135.96359 9031.3
138.99994 8329.7
146.95573 31143.9
147.96323 12176.5
150.95151 65859.3
151.95818 24384.2
157.01105 19241.5
157.34985 7532.5
161.08838 7843.9
161.94234 20119.7
162.95146 60110.4
163.95877 183305.5
164.96657 13647.5
174.95139 150331.9
175.95258 21393.4
178.94460 11433.1
179.95316 13650.5
180.96204 15353.5
190.94572 30418.9
191.95422 61914.1
192.61461 8642.1
192.94395 12331.4
192.96207 132342.5
193.96318 19303.0
209.04164 25149.6
209.96368 154185.0
209.98361 12353.5
213.86244 11541.3
224.93071 12903.0
228.92879 8773.6
241.86043 135357.5
242.86113 20805.2
242.94327 26679.4
243.95219 29569.9
244.92361 12153.5
246.90300 16650.3
252.96521 73484.3
253.96646 11527.5
286.85858 10166.4
287.94186 18763.2
303.87665 39189.3
304.88116 11976.0
321.89087 97122.5
322.88867 28020.8
370.28696 9008.2
389.82578 13277.0
407.83545 12220.4
425.84872 13236.5
482.54852 10940.2
END IONS
""")
reader = ms_reader_provider.get_reader('mgf')
reader.import_raw(mgf)
reader.spectrum_df

Unnamed: 0,spec_idx,peak_start_idx,peak_end_idx,rt,charge,ms_level,precursor_mz,isolation_lower_mz,isolation_upper_mz,rt_sec
0,0,-1,-1,0.0,0.0,2,0.0,0.0,0.0,0.0
1,1,-1,-1,0.0,0.0,2,0.0,0.0,0.0,0.0
2,2,-1,-1,0.0,0.0,2,0.0,0.0,0.0,0.0
3,3,-1,-1,0.0,0.0,2,0.0,0.0,0.0,0.0
4,4,-1,-1,0.0,0.0,2,0.0,0.0,0.0,0.0
5,5,-1,-1,0.0,0.0,2,0.0,0.0,0.0,0.0
6,6,-1,-1,0.0,0.0,2,0.0,0.0,0.0,0.0
7,7,0,60,0.059032,3.0,2,272.276336,270.276336,274.276336,3.541893
8,8,-1,-1,0.0,0.0,2,0.0,0.0,0.0,0.0
9,9,-1,-1,0.0,0.0,2,0.0,0.0,0.0,0.0


In [None]:
#| hide
tims_data = AlphaTimsWrapper(reader, True)
tims_data[3.0:5.0]

Unnamed: 0,raw_indices,frame_indices,scan_indices,precursor_indices,push_indices,tof_indices,rt_values,rt_values_min,mobility_values,quad_low_mz_values,quad_high_mz_values,mz_values,intensity_values,corrected_intensity_values
0,0,0,0,1,0,9220,3.541893,0.059032,0.0,270.276336,274.276336,103.922,5457.3,5457
1,1,0,0,1,0,12004,3.541893,0.059032,0.0,270.276336,274.276336,104.2004,5051.4,5051
2,2,0,0,1,0,57009,3.541893,0.059032,0.0,270.276336,274.276336,108.7009,5891.7,5891
3,3,0,0,1,0,109417,3.541893,0.059032,0.0,270.276336,274.276336,113.9417,6442.6,6442
4,4,0,0,1,0,139297,3.541893,0.059032,0.0,270.276336,274.276336,116.9297,40506.3,40506
5,5,0,0,1,0,139371,3.541893,0.059032,0.0,270.276336,274.276336,116.9371,8945.5,8945
6,6,0,0,1,0,253777,3.541893,0.059032,0.0,270.276336,274.276336,128.3777,6427.8,6427
7,7,0,0,1,0,289530,3.541893,0.059032,0.0,270.276336,274.276336,131.953,288352.6,288352
8,8,0,0,1,0,309325,3.541893,0.059032,0.0,270.276336,274.276336,133.9325,7344.6,7344
9,9,0,0,1,0,354461,3.541893,0.059032,0.0,270.276336,274.276336,138.4461,7326.1,7326
