In [None]:
#| default_exp constants.modification

# Modification information

`alphabase.constants.modification`

The default modification TSV is stored in `alphabase/constants/const_files/modification.tsv`. Please check it to add more modifications. 

First, we load `modification.tsv` into `MOD_DF`. 

Then, we extract information of `MOD_CHEM` (dict), `MOD_MASS` (dict), `MOD_LOSS_MASS` (dict), `MOD_INFO_DICT` (dict) ... from `MOD_DF`. This step is done in `update_all_by_MOD_DF()`.

All these steps are done by `load_mod_df()`.

In [None]:
#| export

import os
import numba
import numpy as np
import pandas as pd
from typing import Union, List

from alphabase.constants.element import (
    calc_mass_from_formula, parse_formula,
)

from alphabase.constants._const import CONST_FILE_FOLDER

In [None]:
#| export

MOD_DF:pd.DataFrame = pd.DataFrame()

MOD_INFO_DICT:dict = {}
#: Modification to formula str dict. {mod_name: formula str ('H(1)C(2)O(3)')}
MOD_CHEM:dict = {}
#: Modification to mass dict.
MOD_MASS:dict = {}
#: Modification to modification neutral loss dict.
MOD_LOSS_MASS:dict = {}
#: Modification to formula dict of dict. i.e. {modname: {'C': n, 'H': m, ...}}
MOD_formula:dict = {}
#: Modification loss importance
MOD_LOSS_IMPORTANCE:dict = {}

def update_all_by_MOD_DF():
    """
    As DataFrame is more conveneint in data operation, 
    we can also process MOD_DF and then update all global 
    modification variables from MOD_DF
    """
    
    MOD_INFO_DICT.clear()
    MOD_INFO_DICT.update(MOD_DF.to_dict(orient='index'))
    MOD_CHEM.clear()
    MOD_CHEM.update(MOD_DF['composition'].to_dict())
    MOD_MASS.clear()
    MOD_MASS.update(MOD_DF['mass'].to_dict())
    MOD_LOSS_MASS.clear()
    MOD_LOSS_MASS.update(MOD_DF['modloss'].to_dict())
    MOD_LOSS_IMPORTANCE.clear()
    MOD_LOSS_IMPORTANCE.update(MOD_DF['modloss_importance'].to_dict())

    MOD_formula.clear()
    for mod, chem in MOD_CHEM.items():
        MOD_formula[mod] = dict(parse_formula(chem))

def add_modifications_for_lower_case_AA():
    """ Add modifications for lower-case AAs for advanced usages """
    global MOD_DF
    lower_case_df = MOD_DF.copy()
    
    def _mod_lower_case(modname):
        modname, site = modname.split('@')
        if len(site) == 1:
            return modname+'@'+site.lower()
        elif '^' in site:
            site = site[0].lower()+site[1:]
            return modname+'@'+site
        else:
            return ''
    lower_case_df['mod_name'] = lower_case_df['mod_name'].apply(_mod_lower_case)
    lower_case_df = lower_case_df[lower_case_df['mod_name']!='']
    lower_case_df.set_index('mod_name', drop=False, inplace=True)
    lower_case_df['lower_case_AA'] = True
    MOD_DF['lower_case_AA'] = False
    MOD_DF = pd.concat([MOD_DF, lower_case_df])
    update_all_by_MOD_DF()

In [None]:
#| export
def keep_modloss_by_importance(modloss_importance_level:float=1.0):
    MOD_DF['modloss'] = MOD_DF['modloss_original']
    MOD_DF.loc[MOD_DF.modloss_importance<modloss_importance_level,"modloss"] = 0
    MOD_LOSS_MASS.clear()
    MOD_LOSS_MASS.update(MOD_DF['modloss'].to_dict())

def load_mod_df(
    tsv:str=os.path.join(CONST_FILE_FOLDER, 'modification.tsv'),
    *,
    modloss_importance_level=1,
):
    global MOD_DF
    MOD_DF = pd.read_table(tsv)
    MOD_DF.fillna('',inplace=True)
    MOD_DF['unimod_id'] = MOD_DF.unimod_id.astype(np.int32)
    MOD_DF.set_index('mod_name', drop=False, inplace=True)
    MOD_DF['mass'] = MOD_DF.composition.apply(calc_mass_from_formula)
    MOD_DF['modloss_original'] = MOD_DF.modloss_composition.apply(calc_mass_from_formula)
    MOD_DF['modloss'] = MOD_DF['modloss_original']
    keep_modloss_by_importance(modloss_importance_level)
    update_all_by_MOD_DF()
    
load_mod_df()

In [None]:
MOD_DF

Unnamed: 0_level_0,mod_name,avge_mass,classification,composition,modloss_composition,mono_mass,unimod_id,unimod_mass,unimod_modloss,modloss_importance,mass,modloss_original,modloss
mod_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
GlyGly@K,GlyGly@K,114.042927,Post-translational,H(6)C(4)N(2)O(2),H(6)C(4)N(2)O(2),114.042927,121,114.042927,114.042927,1000000.0,114.042927,114.042927,114.042927
15N-oxobutanoic@C^Any N-term,15N-oxobutanoic@C^Any N-term,-18.023900,Artefact,H(-3)15N(-1),,-18.023584,1419,-18.023584,0.000000,0.0,-18.023584,0.000000,0.000000
15N-oxobutanoic@S^Protein N-term,15N-oxobutanoic@S^Protein N-term,-18.023900,Post-translational,H(-3)15N(-1),,-18.023584,1419,-18.023584,0.000000,0.0,-18.023584,0.000000,0.000000
15N-oxobutanoic@T^Protein N-term,15N-oxobutanoic@T^Protein N-term,-18.023900,Post-translational,H(-3)15N(-1),,-18.023584,1419,-18.023584,0.000000,0.0,-18.023584,0.000000,0.000000
2-dimethylsuccinyl@C,2-dimethylsuccinyl@C,144.125300,Chemical derivative,H(8)C(6)O(4),,144.042259,1262,144.042259,0.000000,0.0,144.042259,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
spermidine@Q,spermidine@Q,128.215300,Chemical derivative,H(16)C(7)N(2),,128.131349,1421,128.131349,0.000000,0.0,128.131349,0.000000,0.000000
spermine@Q,spermine@Q,185.309700,Chemical derivative,H(23)C(10)N(3),,185.189198,1420,185.189198,0.000000,0.0,185.189198,0.000000,0.000000
sulfo+amino@Y,sulfo+amino@Y,95.077800,Chemical derivative,H(1)N(1)O(3)S(1),,94.967714,997,94.967714,0.000000,0.0,94.967714,0.000000,0.000000
thioacylPA@K,thioacylPA@K,159.206200,Chemical derivative,H(9)C(6)N(1)O(2)S(1),,159.035399,967,159.035399,0.000000,0.0,159.035400,0.000000,0.000000


### With NIST elements, all calculated masses are the same as unimod_mass now.

In [None]:
#| hide
for mod, unimod_mass, mass in MOD_DF[['mod_name','unimod_mass','mass']].values:
    if abs(unimod_mass-mass) > 1e-5:
        print(f"{mod}: unimod mod={unimod_mass}, formula mass={mass}")

Delta:Hg(1)@C: unimod mod=201.970617, formula mass=201.9706434


# Mod site representation
* `site=0` refers to an N-term modification
* `site=-1` refers to a C-term modification
* `1<=site<=peplen` refers to a normal modification

For example: \_0A1B2C3D4E5F6G7H8I9J10K11\_-1

`calc_modification_mass()` and `calc_modification_mass_sum()` are base functions to calculate masses. But for large sets of peptides, we recommend to use `calc_mod_masses_for_same_len_seqs()` instead of `calc_modification_mass()`.

In [None]:
#| export
def calc_modification_mass(
    nAA:int, 
    mod_names:List[str], 
    mod_sites:List[int]
)->np.ndarray:
    '''
    Calculate modification masses for the given peptide length (`nAA`), 
    and modified site list.
    
    Parameters
    ----------
    nAA : int
        Peptide length

    mod_names : list
        List[str]. Modification name list

    mod_sites : list
        List[int]. Modification site list corresponding to `mod_names`.
        * `site=0` refers to an N-term modification
        * `site=-1` refers to a C-term modification
        * `1<=site<=peplen` refers to a normal modification
    
    Returns
    -------
    np.ndarray
        1-D array with length=`nAA`. 
        Masses of modifications through the peptide, 
        `0` if sites has no modifications
    '''
    masses = np.zeros(nAA)
    for site, mod in zip(mod_sites, mod_names):
        if site == 0:
            masses[site] += MOD_MASS[mod]
        elif site == -1:
            masses[site] += MOD_MASS[mod]
        else:
            masses[site-1] += MOD_MASS[mod]
    return masses

def calc_mod_masses_for_same_len_seqs(
    nAA:int, 
    mod_names_list:List[List[str]], 
    mod_sites_list:List[List[int]]
)->np.ndarray:
    '''
    Calculate modification masses for the given peptides with same peptide length (`nAA`).
    
    Parameters
    ----------
    nAA : int
    
        Peptide length

    mod_names_list : List[List[str]]
        List (pep_count) of modification list (n_mod on each peptide)

    mod_sites_list : List[List[int]]
        List of modification site list corresponding to `mod_names_list`.
        * `site=0` refers to an N-term modification
        * `site=-1` refers to a C-term modification
        * `1<=site<=peplen` refers to a normal modification
    
    Returns
    -------
    np.ndarray
        2-D array with shape=`(nAA, pep_count or len(mod_names_list)))`. 
        Masses of modifications through all the peptides, 
        `0` if sites without modifications.
    '''
    masses = np.zeros((len(mod_names_list),nAA))
    for i, (mod_names, mod_sites) in enumerate(
        zip(mod_names_list, mod_sites_list)
    ):
        for mod, site in zip(mod_names, mod_sites): 
            if site == 0:
                masses[i,site] += MOD_MASS[mod]
            elif site == -1:
                masses[i,site] += MOD_MASS[mod]
            else:
                masses[i,site-1] += MOD_MASS[mod]
    return masses

def calc_modification_mass_sum(
    mod_names:List[str]
)->float:
    """
    Calculate summed mass of the given modification 
    without knowing the sites and peptide length.
    It is useful to calculate peptide mass.
    
    Parameters
    ----------
    mod_names : List[str]
        Modification name list

    Returns
    -------
    float
        Total mass
    """
    return np.sum([
        MOD_MASS[mod] for mod in mod_names
    ])


In [None]:
#| hide
seq = 'AGHCEWQMK'
mod_names = ['Acetyl@Protein N-term', 'Carbamidomethyl@C', 'Oxidation@M']
mod_sites = [0, 4, 8]

assert np.allclose(
    calc_modification_mass(len(seq), mod_names, mod_sites),
    [42.01056468, 0, 0, 57.02146372, 0, 0, 0, 15.99491462, 0]
)

In [None]:
#| export
@numba.jit(nopython=True, nogil=True)
def _calc_modloss_with_importance(
    mod_losses: np.ndarray, 
    _loss_importance: np.ndarray
)->np.ndarray:
    '''
    Calculate modification loss masses (e.g. -98 Da for Phospho@S/T). 
    Modification with higher `_loss_importance` has higher priorities. 
    For example, `AM(Oxidation@M)S(Phospho@S)...`,
    importance of Phospho@S > importance of Oxidation@M, so the modloss of 
    b3 ion will be -98 Da, not -64 Da.
    
    Parameters
    ----------
    mod_losses : np.ndarray
        Mod loss masses of each AA position

    _loss_importance : np.ndarray
        Mod loss importance of each AA position
    
    Returns
    -------
    np.ndarray
        New mod_loss masses selected by `_loss_importance`
    '''
    prev_importance = _loss_importance[0]
    prev_most = 0
    for i, _curr_imp in enumerate(_loss_importance[1:],1):
        if _curr_imp > prev_importance:
            prev_most = i
            prev_importance = _curr_imp
        else:
            mod_losses[i] = mod_losses[prev_most]
    return mod_losses

def calc_modloss_mass_with_importance(
    nAA: int, 
    mod_names: List, 
    mod_sites: List,
    for_nterm_frag: bool,
)->np.ndarray:
    '''
    Calculate modification loss masses (e.g. -98 Da for Phospho@S/T, 
    -64 Da for Oxidation@M). Modifications with higher `MOD_LOSS_IMPORTANCE` 
    have higher priorities. For example, `AS(Phospho@S)M(Oxidation@M)...`,
    importance of Phospho@S > importance of Oxidation@M, so the modloss of 
    b3 ion will be -98 Da, not -64 Da.
    
    Parameters
    ----------
    nAA : int
        Peptide length

    mod_names : List[str]
        Modification name list

    mod_sites : List[int]
        Modification site list  

    for_nterm_frag : bool
        If `True`, the loss will be on the 
        N-term fragments (mainly `b` ions); 
        If `False`, the loss will be on the 
        C-term fragments (mainly `y` ions)
    
    Returns
    -------
    np.ndarray
        mod_loss masses
    '''
    if not mod_names: return np.zeros(nAA-1)
    mod_losses = np.zeros(nAA+2)
    mod_losses[mod_sites] = [MOD_LOSS_MASS[mod] for mod in mod_names]
    _loss_importance = np.zeros(nAA+2)
    _loss_importance[mod_sites] = [
        MOD_LOSS_IMPORTANCE[mod] if mod in MOD_LOSS_IMPORTANCE else 0 
        for mod in mod_names
    ]
    
    # Will not consider the modloss if the corresponding modloss_importance is 0
    mod_losses[_loss_importance==0] = 0

    if for_nterm_frag:
        return _calc_modloss_with_importance(mod_losses, _loss_importance)[1:-2]
    else:
        return _calc_modloss_with_importance(mod_losses[::-1], _loss_importance[::-1])[-3:0:-1]

@numba.njit
def _calc_modloss(
    mod_losses: np.ndarray
)->np.ndarray:
    '''
    Calculate modification loss masses (e.g. -98 Da for Phospho@S/T). 
    
    Parameters
    ----------
    mod_losses : np.ndarray
        Mod loss masses of each AA position
    
    Returns
    -------
    np.ndarray
        New mod_loss masses 
    '''
    for i, _curr_loss in enumerate(mod_losses[1:],1):
        if _curr_loss == 0:
            mod_losses[i] = mod_losses[i-1]
        else:
            mod_losses[i] = _curr_loss
    return mod_losses
    
def calc_modloss_mass(
    nAA: int, 
    mod_names: List, 
    mod_sites: List,
    for_nterm_frag: bool,
)->np.ndarray:
    '''
    Calculate modification loss masses (e.g. -98 Da for Phospho@S/T, 
    -64 Da for Oxidation@M). The mod loss mass is calculated by the
    modification closer to the fragment sites. For example, 
    the modloss of the b3 ion for `AS(Phospho@S)M(Oxidation@M)...`
    will be -64 Da.
    
    Parameters
    ----------
    nAA : int
        Peptide length

    mod_names : List[str]
        Modification name list

    mod_sites : List[int]
        Modification site list corresponding 

    for_nterm_frag : bool
        If `True`, the loss will be on the 
        N-term fragments (mainly `b` ions); 
        If `False`, the loss will be on the 
        C-term fragments (mainly `y` ions)
    
    Returns
    -------
    np.ndarray
        mod_loss masses
    '''
    if len(mod_names) == 0: return np.zeros(nAA-1)
    mod_losses = np.zeros(nAA+2)
    mod_losses[mod_sites] = [MOD_LOSS_MASS[mod] for mod in mod_names]

    if for_nterm_frag:
        return _calc_modloss(mod_losses)[1:-2]
    else:
        return _calc_modloss(mod_losses[::-1])[-3:0:-1]

In [None]:
#| hide
mod_names = ['Oxidation@M', 'Phospho@S', 'Carbamidomethyl@C']
mod_sites = [0, 4, 8]

load_mod_df(modloss_importance_level=0)
assert np.allclose(
    calc_modloss_mass(10, mod_names, mod_sites, True),
    [63.99828592, 63.99828592, 63.99828592, 97.97689557, 97.97689557,
    97.97689557, 97.97689557, 97.97689557, 97.97689557]
)

load_mod_df(tsv=os.path.join(CONST_FILE_FOLDER, 'modification.tsv'), modloss_importance_level=1)
assert np.allclose(
    calc_modloss_mass(10, mod_names, mod_sites, True),
    [0, 0, 0, 97.97689557, 97.97689557,
    97.97689557, 97.97689557, 97.97689557, 97.97689557]
)

assert np.allclose(
    calc_modloss_mass(10, mod_names, mod_sites, False),
    [97.97689557, 97.97689557, 97.97689557,  0 ,  0,
        0,  0 ,  0,  0]
)

### Note that get_modloss_mass is a little bit time comsuming
`%timeit get_modloss_mass(10, mod_names, mod_sites, False)`

`Results (12 seconds in total): 12.6 µs ± 96.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)`

In [None]:
MOD_DF.drop_duplicates('classification')

Unnamed: 0_level_0,mod_name,avge_mass,classification,composition,modloss_composition,mono_mass,unimod_id,unimod_mass,unimod_modloss,modloss_importance,mass,modloss_original,modloss
mod_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
GlyGly@K,GlyGly@K,114.042927,Post-translational,H(6)C(4)N(2)O(2),H(6)C(4)N(2)O(2),114.042927,121,114.042927,114.042927,1000000.0,114.042927,114.042927,114.042927
15N-oxobutanoic@C^Any N-term,15N-oxobutanoic@C^Any N-term,-18.0239,Artefact,H(-3)15N(-1),,-18.023584,1419,-18.023584,0.0,0.0,-18.023584,0.0,0.0
2-dimethylsuccinyl@C,2-dimethylsuccinyl@C,144.1253,Chemical derivative,H(8)C(6)O(4),,144.042259,1262,144.042259,0.0,0.0,144.042259,0.0,0.0
3-deoxyglucosone@R,3-deoxyglucosone@R,144.1253,Multiple,H(8)C(6)O(4),,144.042259,949,144.042259,0.0,0.0,144.042259,0.0,0.0
ADP-Ribosyl@C,ADP-Ribosyl@C,541.3005,Other glycosylation,H(21)C(15)N(5)O(13)P(2),,541.06111,213,541.06111,0.0,0.0,541.06111,0.0,0.0
ADP-Ribosyl@N,ADP-Ribosyl@N,541.3005,N-linked glycosylation,H(21)C(15)N(5)O(13)P(2),H(21)C(15)N(5)O(13)P(2),541.06111,213,541.06111,541.06111,0.0,541.06111,541.06111,0.0
ADP-Ribosyl@S,ADP-Ribosyl@S,541.3005,O-linked glycosylation,H(21)C(15)N(5)O(13)P(2),H(21)C(15)N(5)O(13)P(2),541.06111,213,541.06111,541.06111,0.0,541.06111,541.06111,0.0
AEC-MAEC:2H(4)@S,AEC-MAEC:2H(4)@S,63.158,Isotopic label,H(1)2H(4)C(2)N(1)O(-1)S(1),,63.044462,792,63.044462,0.0,0.0,63.044463,0.0,0.0
Ahx2+Hsl@Any C-term,Ahx2+Hsl@Any C-term,309.4039,Non-standard residue,H(27)C(16)N(3)O(3),,309.205242,1015,309.205242,0.0,0.0,309.205242,0.0,0.0
Ala->Arg@A,Ala->Arg@A,85.1078,AA substitution,H(7)C(3)N(3)O(0)S(0),,85.063997,1052,85.063997,0.0,0.0,85.063997,0.0,0.0


## We can update modification list for differet requirements, for example:

In [None]:
add_modifications_for_lower_case_AA()
MOD_DF = MOD_DF[
    (MOD_DF['classification'].isin(['Post-translational','O-linked glycosylation','AA substitution','Multiple','Non-standard residue','Pre-translational']))
    & MOD_DF['lower_case_AA']
] # we only need PTMs
update_all_by_MOD_DF()
# MOD_INFO_DICT is also updated
pd.DataFrame().from_dict(MOD_INFO_DICT, orient='index')

Unnamed: 0,mod_name,avge_mass,classification,composition,modloss_composition,mono_mass,unimod_id,unimod_mass,unimod_modloss,modloss_importance,mass,modloss_original,modloss,lower_case_AA
GlyGly@k,GlyGly@k,114.042927,Post-translational,H(6)C(4)N(2)O(2),H(6)C(4)N(2)O(2),114.042927,121,114.042927,114.042927,1000000.0,114.042927,114.042927,114.042927,True
15N-oxobutanoic@s^Protein N-term,15N-oxobutanoic@s^Protein N-term,-18.023900,Post-translational,H(-3)15N(-1),,-18.023584,1419,-18.023584,0.000000,0.0,-18.023584,0.000000,0.000000,True
15N-oxobutanoic@t^Protein N-term,15N-oxobutanoic@t^Protein N-term,-18.023900,Post-translational,H(-3)15N(-1),,-18.023584,1419,-18.023584,0.000000,0.0,-18.023584,0.000000,0.000000,True
3-deoxyglucosone@r,3-deoxyglucosone@r,144.125300,Multiple,H(8)C(6)O(4),,144.042259,949,144.042259,0.000000,0.0,144.042259,0.000000,0.000000,True
3-phosphoglyceryl@k,3-phosphoglyceryl@k,168.042000,Post-translational,H(5)C(3)O(6)P(1),,167.982375,1387,167.982375,0.000000,0.0,167.982375,0.000000,0.000000,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
pyrophospho@t,pyrophospho@t,159.959800,Post-translational,H(2)O(6)P(2),H(3)O(7)P(2),159.932662,898,159.932662,176.935402,0.0,159.932662,176.935401,0.000000,True
s-GlcNAc@s,s-GlcNAc@s,283.255700,O-linked glycosylation,H(13)C(8)N(1)O(8)S(1),H(13)C(8)N(1)O(8)S(1),283.036187,1412,283.036187,283.036187,0.0,283.036188,283.036188,0.000000,True
s-GlcNAc@t,s-GlcNAc@t,283.255700,O-linked glycosylation,H(13)C(8)N(1)O(8)S(1),H(13)C(8)N(1)O(8)S(1),283.036187,1412,283.036187,283.036187,0.0,283.036188,283.036188,0.000000,True
serotonylation@q,serotonylation@q,159.184600,Post-translational,H(9)C(10)N(1)O(1),,159.068414,1992,159.068414,0.000000,0.0,159.068414,0.000000,0.000000,True


In [None]:
#| hide
load_mod_df()
add_modifications_for_lower_case_AA()
MOD_DF = MOD_DF[
    MOD_DF['lower_case_AA']
] # we only need PTMs
assert MOD_DF['mod_name'].apply(lambda x: x[x.find('@')+1].islower()).all()

In [None]:
#| export
def add_new_modifications(new_mods:list):
    """Add new modifications into MOD_DF

    Parameters
    ----------
    new_mods : list 
    
        list of tuples. Tuple example:
        (
            modname@site:str (e.g. Mod@S), 
            chemical compositions:str (e.g. "H(4)O(2)"),
            [optional] modloss compositions:str (e.g. "H(2)O(1)"),
        )
    """
    for items in new_mods:
        if len(items) == 2:
            mod, comp = items
            modloss_comp = ''
        else:
            mod, comp, modloss_comp = items
        MOD_DF.loc[mod,[
            'mod_name','composition','modloss_composition',
            'classification','unimod_id'
        ]] = [
            mod, comp, modloss_comp,
            'User-added', 0
        ]
        MOD_DF.loc[mod,['mass','modloss']] = (
            calc_mass_from_formula(comp),
            calc_mass_from_formula(modloss_comp)
        )
        if MOD_DF.loc[mod, 'modloss'] > 0:
            MOD_DF.loc[mod, 'modloss_importance'] = 1e6
    MOD_DF.fillna(0, inplace=True)
    update_all_by_MOD_DF()

In [None]:
#| hide
add_new_modifications([
    ("Hello@S","H(2)"),
    ("World@S","O(10)","O(3)")
])
assert (MOD_DF.classification=='User-added').sum()==2
assert 'Hello@S' in MOD_DF.mod_name
assert 'World@S' in MOD_DF.mod_name
assert MOD_DF.loc['World@S','modloss'] > 0
assert MOD_DF.loc['World@S','modloss_importance'] > 0
assert 'Hello@S' in MOD_formula
assert 'World@S' in MOD_MASS