In [None]:
#| default_exp spectral_library.decoy_library

# Decoy Libraries

In [None]:
#| export

import copy
from alphabase.spectral_library.library_base import SpecLibBase
from alphabase.io.hdf import HDF_File

In [None]:
#| export

class DecoyLib(SpecLibBase):
    def __init__(self, 
        target_lib:SpecLibBase,
        fix_C_term = True,
        **kwargs,
    ):
        """Pseudo-reverse peptide decoy generator
        Currently, only sequence-level decoy is implemented,
        but AlphaPeptDeep will add modifications onto both target and decoy sequences,
        so it is enough for practical uses.

        Parameters
        ----------
        target_lib : SpecLibBase
            Target library to decoy.

        fix_C_term : bool, optional
            If fix C-term AA when decoy. 
            Defaults to True.
        
        Attributes
        ----------
        target_lib : SpecLibBase
            same as 'target_lib' in Args.
        """
        self.__dict__ = copy.deepcopy(target_lib.__dict__)
        self.target_lib = target_lib
        self.fix_C_term = fix_C_term

    def translate_to_decoy(self):
        """Main entry of this class, it calls follows methods:
            self.decoy_sequence()
            self._decoy_mods()
            self._decoy_meta()
            self._decoy_frags()
        """
        self.decoy_sequence()
        self._decoy_mods()
        self._decoy_meta()
        self._decoy_frags()

    def decoy_sequence(self):
        """Generate decoy sequences from `self.target_lib`"""
        self._decoy_seq()
        self._remove_target_seqs()

    def append_decoy_sequence(self):
        pass

    def _decoy_seq(self):
        (
            self._precursor_df.sequence
        ) = self._precursor_df.sequence.apply(
            lambda x: (x[:-1][::-1]+x[-1])
             if self.fix_C_term else x[::-1]
        )

    def _remove_target_seqs(self):
        target_seqs = set(
            self.target_lib._precursor_df.sequence.values
        )
        self._precursor_df.drop(
            self._precursor_df.loc[
                self._precursor_df.sequence.isin(target_seqs)
            ].index, inplace=True
        )

    def _decoy_meta(self):
        """
        Decoy for CCS/RT or other meta data
        """
        pass

    def _decoy_mods(self):
        """
        Decoy for modifications and modification sites
        """
        pass

    def _decoy_frags(self):
        """
        Decoy for fragment masses and intensities
        """
        self._decoy_fragment_mz()
        self._decoy_fragment_intensity()
    
    def _decoy_fragment_mz(self):
        pass
        
    def _decoy_fragment_intensity(self):
        pass

    def _get_hdf_to_save(self, 
        hdf_file, 
        delete_existing=False
    ):
        _hdf = HDF_File(
            hdf_file, 
            read_only=False, 
            truncate=True,
            delete_existing=delete_existing
        )
        return _hdf.library.decoy

    def _get_hdf_to_load(self,
        hdf_file, 
    ):
        _hdf = HDF_File(
            hdf_file,
        )
        return _hdf.library.decoy

    def save_hdf(self, hdf_file):
        _hdf = HDF_File(
            hdf_file, 
            read_only=False, 
            truncate=True,
            delete_existing=False
        )
        _hdf.library.decoy = {
            'precursor_df': self._precursor_df,
            'fragment_mz_df': self._fragment_mz_df,
            'fragment_intensity_df': self._fragment_intensity_df,
        }

    def load_hdf(self, hdf_file):
        _hdf = HDF_File(
            hdf_file,
        )
        _hdf_lib = _hdf.library
        self._precursor_df = _hdf_lib.decoy.precursor_df.values
        self._fragment_mz_df = _hdf_lib.decoy.fragment_mz_df.values
        self._fragment_intensity_df = _hdf_lib.decoy.fragment_intensity_df.values

class DiaNNDecoyLib(DecoyLib):
    def __init__(self, 
        target_lib:SpecLibBase,
        raw_AAs:str = 'GAVLIFMPWSCTYHKRQENDU',
        mutated_AAs:str = 'LLLVVLLLLTSSSSLLNDQEL', #DiaNN
        **kwargs,
    ):  
        """DiaNN-like decoy peptide generator

        Parameters
        ----------
        target_lib : SpecLibBase
            Target library object

        raw_AAs : str, optional
            AAs those DiaNN decoy from. 
            Defaults to 'GAVLIFMPWSCTYHKRQENDU'.

        mutated_AAs : str, optional
            AAs those DiaNN decoy to. 
            Defaults to 'LLLVVLLLLTSSSSLLNDQEL'.
            
        """
        super().__init__(target_lib)
        self.raw_AAs = raw_AAs
        self.mutated_AAs = mutated_AAs

    def _decoy_seq(self):
        (
            self._precursor_df.sequence
        ) = self._precursor_df.sequence.apply(
            lambda x:
                x[0]+self.mutated_AAs[self.raw_AAs.index(x[1])]+
                x[2:-2]+self.mutated_AAs[self.raw_AAs.index(x[-2])]+x[-1]
        )

In [None]:
#|hide
from nbdev.showdoc import show_doc

In [None]:
show_doc(DecoyLib.decoy_sequence)

In [None]:
#| export
class DecoyLibProvider(object):
    def __init__(self):
        self.decoy_dict = {}

    def register(self, name:str, decoy_class:DecoyLib):
        """Register a new decoy class"""
        self.decoy_dict[name.lower()] = decoy_class

    def get_decoy_lib(self, name:str, 
        target_lib:SpecLibBase, **kwargs
    )->DecoyLib:
        """Get an object of a subclass of `DecoyLib` based on 
        registered name.

        Parameters
        ----------
        name : str
            Registered decoy class name
        target_lib : SpecLibBase
            Target library for decoy generation

        Returns
        -------
        DecoyLib
            Decoy library object
        """
        if name is None: return None
        name = name.lower()
        if name in self.decoy_dict:
            return self.decoy_dict[name](
                target_lib, **kwargs
            )
        else:
            return None

decoy_lib_provider = DecoyLibProvider()
decoy_lib_provider.register('pseudo_reverse', DecoyLib)
decoy_lib_provider.register('diann', DiaNNDecoyLib)

In [None]:
#| hide

from nbdev.showdoc import show_doc

In [None]:
show_doc(DecoyLibProvider.get_decoy_lib)

### Registered decoy methods

In [None]:
decoy_lib_provider.decoy_dict

In [None]:
#| hide
import os
import numpy as np
import pandas as pd

In [None]:
#| hide
repeat = 3
peptides = ['AGHCEWQMK']*repeat
mods = ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*repeat
sites = ['0;4;8']*repeat
peptides += ['AGHCEWQMKAADER']*repeat
mods += ['']*repeat
sites += ['']*repeat

precursor_df = pd.DataFrame({
    'sequence': peptides,
    'mods': mods,
    'mod_sites': sites
})
precursor_df['nAA'] = precursor_df['sequence'].str.len()
precursor_df['charge'] = 2
precursor_df

In [None]:
#| hide
target_lib = SpecLibBase(['b_z1','b_z2','y_z1','y_z2'])
target_lib._precursor_df = precursor_df
target_lib.calc_precursor_mz()
target_lib._fragment_mz_df = pd.DataFrame()
target_lib._fragment_intensity_df = pd.DataFrame()
if not os.path.isdir('sandbox'):
    os.makedirs('sandbox')
target_lib.save_hdf('sandbox/decoy_lib.hdf')
target_lib.precursor_df

In [None]:
#| hide
decoy_lib = decoy_lib_provider.get_decoy_lib('pseudo_reverse', target_lib)
decoy_lib.translate_to_decoy()
decoy_lib.calc_precursor_mz()
assert np.allclose(decoy_lib.precursor_df.precursor_mz, target_lib.precursor_df.precursor_mz)

In [None]:
#| hide
decoy_lib = decoy_lib_provider.get_decoy_lib('diann', target_lib, fix_C_term=False)
decoy_lib.translate_to_decoy()
if not os.path.isdir('sandbox'):
    os.makedirs('sandbox')
decoy_lib.save_hdf('sandbox/decoy_lib.hdf')
assert len(decoy_lib.precursor_df) > 0

In [None]:
#| hide
_hdf = HDF_File('sandbox/decoy_lib.hdf')
assert len(_hdf.library.precursor_df.values) > 0
assert len(_hdf.library.fragment_mz_df.values) == 0

In [None]:
#| hide
assert len(_hdf.library.decoy.precursor_df.values) > 0
assert len(_hdf.library.decoy.fragment_mz_df.values) == 0

In [None]:
#| hide
test_lib = DecoyLib(target_lib)
test_lib.load_hdf('sandbox/decoy_lib.hdf')
assert len(test_lib._precursor_df) > 0

In [None]:
#| hide
os.remove('sandbox/decoy_lib.hdf')

In [None]:
#| hide
assert decoy_lib_provider.get_decoy_lib(None, target_lib) is None