In [None]:
#default_exp mass_spec.match

In [1]:
#export

import numpy as np
import numba

@numba.njit
def mass_match(
    spec_masses:np.array, 
    query_masses:np.array, 
    Da_tols:np.array
)->np.array:
    """
    Matched query masses against sorted MS2/spec masses.
    Args:
        spec_masses (np.array): MS2 or spec masses, 1-D float array
        query_masses (np.array): query masses, n-D float array
        Da_tols (np.array): Da tolerance array, same shape as spec_mass

    Returns:
        np.array: np.array of int32, the shape is the same as query_masses.
          -1 means no peaks are matched for the query mass
    """
    idxes = np.searchsorted(spec_masses, query_masses)
    ret_indices = np.empty_like(query_masses, dtype=np.int32)
    # ret_indices[:] = -1
    for i,idx in np.ndenumerate(idxes):
        if abs(spec_masses[idx-1]-query_masses[i]) <= Da_tols[idx-1]: 
            ret_indices[i] = idx-1
        elif idx == len(spec_masses): 
            ret_indices[i] = -1
        elif abs(spec_masses[idx]-query_masses[i]) <= Da_tols[idx]: 
            ret_indices[i] = idx
        else: 
            ret_indices[i] = -1
    return ret_indices


In [16]:
#hide
spec_masses = np.arange(10)*100
Da_tols = spec_masses*20*1e-6
Da_tols

array([0.   , 0.002, 0.004, 0.006, 0.008, 0.01 , 0.012, 0.014, 0.016,
       0.018])

In [17]:
#hide
#unittests

query_masses = spec_masses
assert np.all(
    np.arange(10, dtype=np.int32)==
    mass_match(spec_masses, query_masses, Da_tols)
)

In [18]:
#hide
#unittests
query_masses = np.arange(20).reshape((10,2))*100
target = np.arange(20, dtype=np.int32)
target[10:] = -1
assert np.all(
    target.reshape((10,2))==
    mass_match(spec_masses, query_masses, Da_tols)
)

In [20]:
#hide
spec_masses = np.sort(np.random.uniform(100,1000,400))
Da_tols = spec_masses*20*1e-6
query_masses = np.random.uniform(100,1200,240).reshape((30,8))
%timeit mass_match(spec_masses, query_masses, Da_tols)

4.89 µs ± 8.18 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
