In [None]:
#default_exp speclib.decoy_library

In [None]:
import copy
import numba
from alphabase.speclib.library_base import SpecLibBase

class DecoyLib(SpecLibBase):
    def __init__(self, 
        target_lib:SpecLibBase,
        fix_C_term = True,
    ):
        self.__dict__ = copy.deepcopy(target_lib.__dict__)
        self.fix_C_term = fix_C_term

    def translate_to_decoy(self):
        self._decoy_seq()
        self._decoy_mod()
        self._decoy_meta()
        self._decoy_frag()

    def _decoy_meta(self):
        """
        Decoy for CCS/RT or other meta data
        """
        pass

    def _decoy_mod(self):
        """
        Decoy for modifications and modification sits
        """
        pass

    def _decoy_frag(self):
        """
        Decoy for fragment masses and intensities
        """
        self._decoy_fragment_mass()
        self._decoy_fragment_inten()
    
    def _decoy_fragment_mass(self):
        del self._precursor_df['precursor_mz']
        del self._precursor_df['frag_start_idx']
        del self._precursor_df['frag_end_idx']

        self.load_fragment_mass_df()
        
    def _decoy_fragment_inten(self):
        pass

    def _decoy_seq(self):
        (
            self._precursor_df.sequence
        ) = self._precursor_df.sequence.apply(
            lambda x: (x[:-1][::-1]+x[-1])
             if self.fix_C_term else x[::-1]
        )

class DiaNNDecoyLib(DecoyLib):
    def __init__(self, 
        target_lib:SpecLibBase,
        fix_C_term = True,
        raw_AAs:str = 'GAVLIFMPWSCTYHKRQEND',
        mutated_AAs:str = 'LLLVVLLLLTSSSSLLNDQE', #DiaNN
    ):  
        super().__init__(target_lib, fix_C_term)
        self.raw_AAs = raw_AAs
        self.mutated_AAs = mutated_AAs

    def _decoy_seq(self):
        (
            self._precursor_df.sequence
        ) = self._precursor_df.sequence.apply(
            lambda x:
                x[0]+self.mutated_AAs[self.raw_AAs.index(x[1])]+
                x[2:-2]+self.mutated_AAs[self.raw_AAs.index(x[-2])]+x[-1]
        )

In [None]:
#export
class DecoyProvider(object):
    def __init__(self):
        self.decoy_dict = {}

    def register(self, name, decoy_class):
        self.decoy_dict[name.lower()] = decoy_class

    def get_decoy(self, name, 
        target_lib, fix_C_term=True
    )->DecoyLib:
        return self.decoy_dict[name.lower()](
            target_lib, fix_C_term
        )

decoy_provider = DecoyProvider()
decoy_provider.register('reverse', DecoyLib)
decoy_provider.register('diann', DiaNNDecoyLib)

In [None]:
#hide
import pandas as pd
repeat = 3
peptides = ['AGHCEWQMK']*repeat
mods = ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*repeat
sites = ['0;4;8']*repeat
peptides += ['AGHCEWQMKAADER']*repeat
mods += ['']*repeat
sites += ['']*repeat

precursor_df = pd.DataFrame({
    'sequence': peptides,
    'mods': mods,
    'mod_sites': sites
})
precursor_df['nAA'] = precursor_df['sequence'].str.len()
precursor_df['charge'] = 2
precursor_df

Unnamed: 0,sequence,mods,mod_sites,nAA,charge
0,AGHCEWQMK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2
1,AGHCEWQMK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2
2,AGHCEWQMK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2
3,AGHCEWQMKAADER,,,14,2
4,AGHCEWQMKAADER,,,14,2
5,AGHCEWQMKAADER,,,14,2


In [None]:
#hide
target_lib = SpecLibBase(['b_1','b_2','y_1','y_2'])
target_lib._precursor_df = precursor_df
target_lib.load_fragment_mass_df()
target_lib.fragment_mass_df

Unnamed: 0,b_1,b_2,y_1,y_2
0,114.054950,57.531113,1091.439662,546.223469
1,171.076411,86.041843,1034.418201,517.712738
2,308.135317,154.571296,897.359295,449.183285
3,468.165958,234.586617,737.328653,369.167965
4,597.208546,299.107911,608.286066,304.646671
...,...,...,...,...
58,1071.449834,536.228555,561.262688,281.134982
59,1142.486944,571.747110,490.225578,245.616427
60,1213.524054,607.265665,419.188468,210.097872
61,1328.550993,664.779134,304.161529,152.584403


In [None]:
#hide
decoy_lib = decoy_provider.get_decoy('reverse', target_lib)
decoy_lib.translate_to_decoy()
decoy_lib.precursor_df

Unnamed: 0,sequence,mods,mod_sites,nAA,charge,precursor_mz,frag_start_idx,frag_end_idx
0,MQWECHGAK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2,602.747306,0,8
1,MQWECHGAK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2,602.747306,8,16
2,MQWECHGAK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2,602.747306,16,24
3,EDAAKMQWECHGAR,,,14,2,816.356261,24,37
4,EDAAKMQWECHGAR,,,14,2,816.356261,37,50
5,EDAAKMQWECHGAR,,,14,2,816.356261,50,63


In [None]:
#hide
decoy_lib = decoy_provider.get_decoy('diann', target_lib, fix_C_term=False)
decoy_lib.translate_to_decoy()
decoy_lib.precursor_df

Unnamed: 0,sequence,mods,mod_sites,nAA,charge,precursor_mz,frag_start_idx,frag_end_idx
0,ALHCEWQLK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2,621.800394,0,8
1,ALHCEWQLK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2,621.800394,8,16
2,ALHCEWQLK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2,621.800394,16,24
3,ALHCEWQMKAADDR,,,14,2,837.379735,24,37
4,ALHCEWQMKAADDR,,,14,2,837.379735,37,50
5,ALHCEWQMKAADDR,,,14,2,837.379735,50,63
