In [None]:
#---#| default_exp spectral_library.decoy

# Decoy Libraries

In [None]:
from alphabase.spectral_library.decoy import *

In [None]:
#|hide
from nbdev.showdoc import show_doc

In [None]:
show_doc(DecoyLib.decoy_sequence)

---

[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/spectral_library/decoy_library.py#L53){target="_blank" style="float:right; font-size:smaller"}

### DecoyLib.decoy_sequence

>      DecoyLib.decoy_sequence ()

Generate decoy sequences from `self.target_lib`

In [None]:
#| hide

from nbdev.showdoc import show_doc

In [None]:
show_doc(DecoyLibProvider.get_decoy_lib)

---

[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/spectral_library/decoy_library.py#L191){target="_blank" style="float:right; font-size:smaller"}

### DecoyLibProvider.get_decoy_lib

>      DecoyLibProvider.get_decoy_lib (name:str,
>                                      target_lib:alphabase.spectral_library.lib
>                                      rary_base.SpecLibBase, **kwargs)

Get an object of a subclass of `DecoyLib` based on 
registered name.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| name | str | Registered decoy class name |
| target_lib | SpecLibBase | Target library for decoy generation |
| kwargs |  |  |
| **Returns** | **DecoyLib** | **Decoy library object** |

### Registered decoy methods

In [None]:
decoy_lib_provider.decoy_dict

{'pseudo_reverse': __main__.DecoyLib, 'diann': __main__.DiaNNDecoyLib}

In [None]:
#| hide
import os
import numpy as np
import pandas as pd

In [None]:
#| hide
repeat = 3
peptides = ['AGHCEWQMK']*repeat
mods = ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*repeat
sites = ['0;4;8']*repeat
peptides += ['AGHCEWQMKAADER']*repeat
mods += ['']*repeat
sites += ['']*repeat

precursor_df = pd.DataFrame({
    'sequence': peptides,
    'mods': mods,
    'mod_sites': sites
})
precursor_df['nAA'] = precursor_df['sequence'].str.len()
precursor_df['charge'] = 2
precursor_df

Unnamed: 0,sequence,mods,mod_sites,nAA,charge
0,AGHCEWQMK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2
1,AGHCEWQMK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2
2,AGHCEWQMK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2
3,AGHCEWQMKAADER,,,14,2
4,AGHCEWQMKAADER,,,14,2
5,AGHCEWQMKAADER,,,14,2


In [None]:
#| hide
target_lib = SpecLibBase(['b_z1','b_z2','y_z1','y_z2'])
target_lib._precursor_df = precursor_df
target_lib.calc_precursor_mz()
target_lib._fragment_mz_df = pd.DataFrame()
target_lib._fragment_intensity_df = pd.DataFrame()
if not os.path.isdir('sandbox'):
    os.makedirs('sandbox')
target_lib.save_hdf('sandbox/decoy_lib.hdf')
target_lib.precursor_df

Unnamed: 0,sequence,mods,mod_sites,nAA,charge,precursor_mz,mod_seq_hash,mod_seq_charge_hash
0,AGHCEWQMK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2,602.747333,-5783464648586361190,-5783464648586361188
1,AGHCEWQMK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2,602.747333,-5783464648586361190,-5783464648586361188
2,AGHCEWQMK,Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat...,0;4;8,9,2,602.747333,-5783464648586361190,-5783464648586361188
3,AGHCEWQMKAADER,,,14,2,816.356299,-1606275412423975023,-1606275412423975021
4,AGHCEWQMKAADER,,,14,2,816.356299,-1606275412423975023,-1606275412423975021
5,AGHCEWQMKAADER,,,14,2,816.356299,-1606275412423975023,-1606275412423975021


In [None]:
#| hide
decoy_lib = decoy_lib_provider.get_decoy_lib('pseudo_reverse', target_lib)
decoy_lib.translate_to_decoy()
decoy_lib.calc_precursor_mz()
assert np.allclose(decoy_lib.precursor_df.precursor_mz, target_lib.precursor_df.precursor_mz)

In [None]:
#| hide
decoy_lib = decoy_lib_provider.get_decoy_lib('diann', target_lib, fix_C_term=False)
decoy_lib.translate_to_decoy()
if not os.path.isdir('sandbox'):
    os.makedirs('sandbox')
decoy_lib.save_hdf('sandbox/decoy_lib.hdf')
assert len(decoy_lib.precursor_df) > 0

In [None]:
#| hide
_hdf = HDF_File('sandbox/decoy_lib.hdf')
assert len(_hdf.library.precursor_df.values) > 0
assert len(_hdf.library.fragment_mz_df.values) == 0

In [None]:
#| hide
assert len(_hdf.library.decoy.precursor_df.values) > 0
assert len(_hdf.library.decoy.fragment_mz_df.values) == 0

In [None]:
#| hide
test_lib = DecoyLib(target_lib)
test_lib.load_hdf('sandbox/decoy_lib.hdf')
assert len(test_lib._precursor_df) > 0

In [None]:
#| hide
os.remove('sandbox/decoy_lib.hdf')

In [None]:
#| hide
assert decoy_lib_provider.get_decoy_lib(None, target_lib) is None