In [None]:
%reload_ext autoreload
%autoreload 2

# Testing fasta

Use FastaLib to build a library (database)

#### Init fasta lib

In [None]:
from alphabase.protein.fasta import FastaLib

protein_dict = {
    'xx': {
        'protein_id': 'xx',
        'sequence': 'MACDESTYKBKFGHIKLMNPQRST'
    },
    'yy': {
        'protein_id': 'yy',
        'sequence': 'FGHIKLMNPQR'
    }
}

fastalib = FastaLib(
    ['b_z1','b_z2','y_z1','y_z2'], 
    var_mods=['Oxidation@M','Acetyl@Protein N-term'],
    fix_mods=['Carbamidomethyl@C'],
    decoy='pseudo_reverse',
    I_to_L=False, 
)


Call `import_protein_dict` or `import_fasta` to load proteins, append decoys, add modifications and add charge states.

```
fastalib.import_fasta([fasta1, fasta2])
```

In [None]:
fastalib.import_and_process_protein_dict(protein_dict)
fastalib.protein_df

Unnamed: 0,protein_id,sequence
0,xx,MACDESTYKBKFGHIKLMNPQRST
1,yy,FGHIKLMNPQR


In [None]:
assert 'decoy' in fastalib.precursor_df.columns
assert 'mods' in fastalib.precursor_df.columns
assert 'mod_sites' in fastalib.precursor_df.columns
assert 'charge' in fastalib.precursor_df.columns

Call `calc_precursor_isotope` to calculate the precursor_mz, and M1/M2 isotope mz and intensity.

In [None]:
fastalib.calc_precursor_isotope()
assert 'precursor_mz' in fastalib.precursor_df.columns
assert 'isotope_apex_mz' in fastalib.precursor_df.columns
assert 'isotope_apex_intensity' in fastalib.precursor_df.columns
assert 'isotope_apex_offset' in fastalib.precursor_df.columns
assert 'isotope_right_most_mz' in fastalib.precursor_df.columns
assert 'isotope_right_most_intensity' in fastalib.precursor_df.columns
assert 'isotope_right_most_offset' in fastalib.precursor_df.columns
assert 'isotope_m1_mz' in fastalib.precursor_df.columns
assert 'isotope_m1_intensity' in fastalib.precursor_df.columns

Call `calc_fragment_mz_df` to calculate the fragment dataframe

In [None]:
fastalib.calc_fragment_mz_df()
assert 'frag_start_idx' in fastalib.precursor_df.columns
assert 'frag_end_idx' in fastalib.precursor_df.columns
import numpy as np
assert len(fastalib.fragment_mz_df) == (fastalib.precursor_df.nAA.values-1).sum()

Use `save_hdf` to save as hdf file:
```
fastalib.save_hdf(hdf_file_path)
```

Then use `load_hdf` to load precursor and fragment dataframes:
```
fastalib.load_df(hdf_file_path, load_mod_seq=True)
```