In [1]:
# default_exp settings

# Settings

> A template for settings

AlphaPept stores all settings in `*.yaml`-files. This notebook contains functions to load, save, and print settings. Additionally, a settings template is defined. Here we define parameters, default values, and a range and what kind of parameter this is (e.g., float value, list, etc.). The idea here is to have definitions to automatically create graphical user interfaces for the settings.

In [2]:
#hide
from nbdev.showdoc import *

## Settings

### Saving and Loading

The default scheme for saving settings are `*.yaml`-files. These files can be easily modified when opening with a text editor.

In [3]:
#export
import yaml
import os

def print_settings(settings: dict):
    """Print a yaml settings file

    Args:
        settings (dict): A yaml dictionary.
    """
    print(yaml.dump(settings, default_flow_style=False))


def load_settings(path: str):
    """Load a yaml settings file.

    Args:
        path (str): Path to the settings file.
    """
    with open(path, "r") as settings_file:
        SETTINGS_LOADED = yaml.load(settings_file, Loader=yaml.FullLoader)
        return SETTINGS_LOADED
    
    
def load_settings_as_template(path: str):
    """Loads settings but removes fields that contain summary information.

    Args:
        path (str): Path to the settings file.
    """
    settings = load_settings(path)
    
    for _ in ['summary','failed']:
        if _ in settings:
            settings.pop(_)

    _ = 'prec_tol_calibrated'
    if 'search' in settings:
        if _ in settings['search']:
            settings['search'].pop(_)

    return settings
    

def save_settings(settings: dict, path: str):
    """Save settings file to path.

    Args:
        settings (dict): A yaml dictionary.
        path (str): Path to the settings file.
    """
    
    base_dir = os.path.dirname(path)
    
    if base_dir != '':
        os.makedirs(os.path.dirname(path), exist_ok=True)
    
    with open(path, "w") as file:
        yaml.dump(settings, file, sort_keys=False)

In [4]:
settings = {'field1': 0,'summary':123}
dummy_path = 'to_delete.yaml'

print('--- print_settings ---')
print_settings(settings)

save_settings(settings, dummy_path)

print('--- load_settings ---')

print_settings(load_settings(dummy_path))

print('--- load_settings_as_template ---')

print_settings(load_settings_as_template(dummy_path))

--- print_settings ---
field1: 0
summary: 123

--- load_settings ---
field1: 0
summary: 123

--- load_settings_as_template ---
field1: 0



In [5]:
#hide
def test_settings_utils():
    settings = {'field1': 0,'summary':123}
    dummy_path = 'to_delete.yaml'

    save_settings(settings, dummy_path)

    s = load_settings(dummy_path)

    assert s==settings

    s_ = load_settings_as_template(dummy_path)

    assert 'summary' not in s_
    assert 'failed' not in s_
    
test_settings_utils()

## Settings Template

The settings template defines individual settings. The idea is to provide a template so that a graphical user interface can be automatically generated. The list below represents what each item would be when using `streamlit`. This could be adapted for any kind of GUI library.

Each entry has a type, default values, and a description.

* spinbox -> `st.range`, range with minimum and maximum values (int)
* doublespinbox -> `st.range`, range with minimum and maximum values (float)
* path -> `st.button`, clickable button to select a path to save / load files.
* combobox -> `st.selectbox`, dropdown menu with values to choose from
* checkbox -> `st.checkbox`, checkbox that can be selected
* checkgroup -> `st.multiselect`, creates a list of options that can be selected
* string -> `st.text_input`, generic string input
* list -> Creates a list that is displayed
* placeholder -> This just prints the parameter and cannot be changed


### Worfklow settings

Workflow settings regarding the workflow - which algorithmic steps should be performed.

In [6]:
#export
import pandas as pd
from alphapept.constants import protease_dict

SETTINGS_TEMPLATE = {}

# Workflow
workflow = {}

workflow["continue_runs"] = {'type':'checkbox', 'default':False, 'description':"Flag to continue previously computated runs. If False existing ms_data will be deleted."}
workflow["create_database"] = {'type':'checkbox', 'default':True, 'description':"Flag to create a database."}
workflow["import_raw_data"] = {'type':'checkbox', 'default':True, 'description':"Flag to import the raw data."}
workflow["find_features"] = {'type':'checkbox', 'default':True, 'description':"Flag to perform feature finding."}
workflow["search_data"] = {'type':'checkbox', 'default':True, 'description':"Flag to perform search."}
workflow["recalibrate_data"] = {'type':'checkbox', 'default':True, 'description':"Flag to perform recalibration."}
workflow["align"] = {'type':'checkbox', 'default':False, 'description':"Flag to align the data."}
workflow["match"] = {'type':'checkbox', 'default':False, 'description':"Flag to perform match-between runs."}
workflow["lfq_quantification"] = {'type':'checkbox', 'default':True, 'description':"Flag to perfrom lfq normalization."}

SETTINGS_TEMPLATE["workflow"] = workflow

In [7]:
print(yaml.dump(SETTINGS_TEMPLATE['workflow']))

align:
  default: false
  description: Flag to align the data.
  type: checkbox
continue_runs:
  default: false
  description: Flag to continue previously computated runs. If False existing ms_data
    will be deleted.
  type: checkbox
create_database:
  default: true
  description: Flag to create a database.
  type: checkbox
find_features:
  default: true
  description: Flag to perform feature finding.
  type: checkbox
import_raw_data:
  default: true
  description: Flag to import the raw data.
  type: checkbox
lfq_quantification:
  default: true
  description: Flag to perfrom lfq normalization.
  type: checkbox
match:
  default: false
  description: Flag to perform match-between runs.
  type: checkbox
recalibrate_data:
  default: true
  description: Flag to perform recalibration.
  type: checkbox
search_data:
  default: true
  description: Flag to perform search.
  type: checkbox



In [8]:
#export
general = {}

general['n_processes'] = {'type':'spinbox', 'min':1, 'max':60, 'default':60, 'description':"Maximum number of processes for multiprocessing. If larger than number of processors it will be capped."}

SETTINGS_TEMPLATE["general"] = general

In [9]:
print(yaml.dump(SETTINGS_TEMPLATE['general']))

n_processes:
  default: 60
  description: Maximum number of processes for multiprocessing. If larger than number
    of processors it will be capped.
  max: 60
  min: 1
  type: spinbox



### Experimental Settings

Core defintions of the experiment, regarding the filepaths..

In [10]:
#export
experiment = {}

experiment["results_path"] = {'type':'path','default': None, 'filetype':['hdf'], 'folder':False, 'description':"Path where the results should be stored."}
experiment["shortnames"] = {'type':'list','default':[], 'description':"List of shortnames for the raw files."}
experiment["file_paths"] = {'type':'list','default':[], 'description':"Filepaths of the experiments."}
experiment["sample_group"] = {'type':'list','default':[], 'description':"Sample group, for raw files that should be quanted together."}
experiment["matching_group"] = {'type':'list','default':[], 'description':"List of macthing groups for the raw files. This only allows match-between-runs of files within the same groups."}
experiment["fraction"] = {'type':'list','default':[], 'description':"List of fraction numbers for fractionated samples."}
experiment["database_path"] = {'type':'path','default':None, 'filetype':['hdf'], 'folder':False, 'description':"Path to library file (.hdf)."}
experiment["fasta_paths"] = {'type':'list','default':[], 'description':"List of paths for FASTA files."}

SETTINGS_TEMPLATE["experiment"] = experiment

In [11]:
print(yaml.dump(SETTINGS_TEMPLATE['experiment']))

database_path:
  default: null
  description: Path to library file (.hdf).
  filetype:
  - hdf
  folder: false
  type: path
fasta_paths:
  default: []
  description: List of paths for FASTA files.
  type: list
file_paths:
  default: []
  description: Filepaths of the experiments.
  type: list
fraction:
  default: []
  description: List of fraction numbers for fractionated samples.
  type: list
matching_group:
  default: []
  description: List of macthing groups for the raw files. This only allows match-between-runs
    of files within the same groups.
  type: list
results_path:
  default: null
  description: Path where the results should be stored.
  filetype:
  - hdf
  folder: false
  type: path
sample_group:
  default: []
  description: Sample group, for raw files that should be quanted together.
  type: list
shortnames:
  default: []
  description: List of shortnames for the raw files.
  type: list



### Raw file handling


In [12]:
#export
raw = {}

raw["n_most_abundant"] = {'type':'spinbox', 'min':-1, 'max':1000, 'default':400, 'description':"Number of most abundant peaks to be isolated from raw spectra."}
raw["use_profile_ms1"] = {'type':'checkbox', 'default':False, 'description':"Use profile data for MS1 and perform own centroiding."}

SETTINGS_TEMPLATE["raw"] = raw

In [13]:
print(yaml.dump(SETTINGS_TEMPLATE['raw']))

n_most_abundant:
  default: 400
  description: Number of most abundant peaks to be isolated from raw spectra.
  max: 1000
  min: -1
  type: spinbox
use_profile_ms1:
  default: false
  description: Use profile data for MS1 and perform own centroiding.
  type: checkbox



### FASTA settings

In [14]:
#export
import os
fasta = {}

## Read modifications from modifications file

try:
    base = os.path.dirname(os.path.abspath(__file__)) #Cant do this in notebook
except NameError:
    base = os.path.join(os.pardir, 'alphapept')

if not os.path.isfile(os.path.join(base, "modifications.tsv")):
    #Calling nbdev build_docs from parent dir
    base = os.path.join(base, 'alphapept')
    
modfile_path = os.path.join(base, "modifications.tsv")  

mod_db = pd.read_csv(modfile_path, sep='\t')

mods = {}
mods_terminal = {}
mods_protein = {}

for i in range(len(mod_db)):
    mod = mod_db.iloc[i]
    if 'terminus' in mod['Type']:
        if 'peptide' in mod['Type']:
            mods_terminal[mod['Identifier']] = mod['Description']
        elif 'protein' in mod['Type']:
            mods_protein[mod['Identifier']] = mod['Description']
        else:
            print('Not understood')
            print(mod['Type'])
    else:
        mods[mod['Identifier']] = mod['Description']

fasta["mods_fixed"] = {'type':'checkgroup', 'value':mods.copy(), 'default':['cC'],'description':"Fixed modifications."}
fasta["mods_fixed_terminal"] = {'type':'checkgroup', 'value':mods_terminal.copy(), 'default':[],'description':"Fixed terminal modifications."}
fasta["mods_variable"] = {'type':'checkgroup', 'value':mods.copy(), 'default':['oxM'],'description':"Variable modifications."}
fasta["mods_variable_terminal"]  = {'type':'checkgroup', 'value':mods_terminal.copy(), 'default':[], 'description':"Varibale terminal modifications."}

fasta["mods_fixed_terminal_prot"] = {'type':'checkgroup', 'value':mods_protein.copy(), 'default':[],'description':"Fixed terminal modifications on proteins."}
fasta["mods_variable_terminal_prot"]  = {'type':'checkgroup', 'value':mods_protein.copy(), 'default':['a<^'], 'description':"Varibale terminal modifications on proteins."}

fasta["n_missed_cleavages"] = {'type':'spinbox', 'min':0, 'max':99, 'default':2, 'description':"Number of missed cleavages."}
fasta["pep_length_min"] = {'type':'spinbox', 'min':7, 'max':99, 'default':7, 'description':"Minimum peptide length."}
fasta["pep_length_max"] = {'type':'spinbox', 'min':7, 'max':99, 'default':27, 'description':"Maximum peptide length."}
fasta["isoforms_max"] = {'type':'spinbox', 'min':1, 'max':4096, 'default':1024, 'description':"Maximum number of isoforms per peptide."}
fasta["n_modifications_max"] = {'type':'spinbox', 'min':1, 'max':10, 'default':3, 'description':"Limit the number of modifications per peptide."}

fasta["pseudo_reverse"] = {'type':'checkbox', 'default':True, 'description':"Use pseudo-reverse strategy instead of reverse."}
fasta["AL_swap"] = {'type':'checkbox', 'default':False, 'description':"Swap A and L for decoy generation."}
fasta["KR_swap"] = {'type':'checkbox', 'default':False, 'description':"Swap K and R (only if terminal) for decoy generation."}

proteases = [_ for _ in protease_dict.keys()]
fasta["protease"] = {'type':'combobox', 'value':proteases, 'default':'trypsin', 'description':"Protease for digestions."}

fasta["spectra_block"] = {'type':'spinbox', 'min':1000, 'max':1000000, 'default':100000, 'description':"Maximum number of sequences to be collected before theoretical spectra are generated."}
fasta["fasta_block"] = {'type':'spinbox', 'min':100, 'max':10000, 'default':1000, 'description':"Number of fasta entries to be processed in one block."}
fasta["save_db"] = {'type':'checkbox', 'default':True, 'description':"Save DB or create on the fly."}
fasta["fasta_size_max"] = {'type':'spinbox', 'min':1, 'max':1000000, 'default':100, 'description':"Maximum size of FASTA (MB) when switching on-the-fly."}

SETTINGS_TEMPLATE["fasta"] = fasta

In [15]:
print(yaml.dump(SETTINGS_TEMPLATE['fasta']))

AL_swap:
  default: false
  description: Swap A and L for decoy generation.
  type: checkbox
KR_swap:
  default: false
  description: Swap K and R (only if terminal) for decoy generation.
  type: checkbox
fasta_block:
  default: 1000
  description: Number of fasta entries to be processed in one block.
  max: 10000
  min: 100
  type: spinbox
fasta_size_max:
  default: 100
  description: Maximum size of FASTA (MB) when switching on-the-fly.
  max: 1000000
  min: 1
  type: spinbox
isoforms_max:
  default: 1024
  description: Maximum number of isoforms per peptide.
  max: 4096
  min: 1
  type: spinbox
mods_fixed:
  default:
  - cC
  description: Fixed modifications.
  type: checkgroup
  value:
    aK: acetylation of lysine
    cC: carbamidomethylation of C
    deamN: deamidation of N
    deamQ: deamidation of Q
    eK: EASItag 6-plex on K
    itraq4K: iTRAQ 4-plex on K
    itraq4Y: iTRAQ 4-plex on Y
    itraq8K: iTRAQ 8-plex on K
    itraq8Y: iTRAQ 8-plex on Y
    oxM: oxidation of M
    p

### Feature Finding

In [16]:
#export

features = {}
# Thermo FF settings

features["max_gap"] = {'type':'spinbox', 'min':1, 'max':10, 'default':2}
features["centroid_tol"] = {'type':'spinbox', 'min':1, 'max':25, 'default':8}
features["hill_length_min"] = {'type':'spinbox', 'min':1, 'max':10, 'default':3}
features["hill_split_level"] = {'type':'doublespinbox', 'min':0.1, 'max':10.0, 'default':1.3}
features["iso_split_level"] = {'type':'doublespinbox', 'min':0.1, 'max':10.0, 'default':1.3}

features["hill_smoothing"] = {'type':'spinbox', 'min':1, 'max':10, 'default':1}
features["hill_check_large"] = {'type':'spinbox', 'min':1, 'max':100, 'default':40}

features["iso_charge_min"] = {'type':'spinbox', 'min':1, 'max':6, 'default':1}
features["iso_charge_max"] = {'type':'spinbox', 'min':1, 'max':6, 'default':6}
features["iso_n_seeds"] = {'type':'spinbox', 'min':1, 'max':500, 'default':100}

features["hill_nboot_max"] = {'type':'spinbox', 'min':1, 'max':500, 'default':300}
features["hill_nboot"] = {'type':'spinbox', 'min':1, 'max':500, 'default':150}

features["iso_mass_range"] = {'type':'spinbox', 'min':1, 'max':10, 'default':5}
features["iso_corr_min"] = {'type':'doublespinbox', 'min':0.1, 'max':1, 'default':0.6}

features["map_mz_range"] = {'type':'doublespinbox', 'min':0.1, 'max':2, 'default':1.5}
features["map_rt_range"] = {'type':'doublespinbox', 'min':0.1, 'max':1, 'default':0.5}
features["map_mob_range"] = {'type':'doublespinbox', 'min':0.1, 'max':1, 'default':0.3}
features["map_n_neighbors"] = {'type':'spinbox', 'min':1, 'max':10, 'default':5}

features["search_unidentified"] = {'type':'checkbox', 'default':False, 'description':"Search MSMS w/o feature."}

SETTINGS_TEMPLATE["features"] = features

In [17]:
print(yaml.dump(SETTINGS_TEMPLATE['features']))

centroid_tol:
  default: 8
  max: 25
  min: 1
  type: spinbox
hill_check_large:
  default: 40
  max: 100
  min: 1
  type: spinbox
hill_length_min:
  default: 3
  max: 10
  min: 1
  type: spinbox
hill_nboot:
  default: 150
  max: 500
  min: 1
  type: spinbox
hill_nboot_max:
  default: 300
  max: 500
  min: 1
  type: spinbox
hill_smoothing:
  default: 1
  max: 10
  min: 1
  type: spinbox
hill_split_level:
  default: 1.3
  max: 10.0
  min: 0.1
  type: doublespinbox
iso_charge_max:
  default: 6
  max: 6
  min: 1
  type: spinbox
iso_charge_min:
  default: 1
  max: 6
  min: 1
  type: spinbox
iso_corr_min:
  default: 0.6
  max: 1
  min: 0.1
  type: doublespinbox
iso_mass_range:
  default: 5
  max: 10
  min: 1
  type: spinbox
iso_n_seeds:
  default: 100
  max: 500
  min: 1
  type: spinbox
iso_split_level:
  default: 1.3
  max: 10.0
  min: 0.1
  type: doublespinbox
map_mob_range:
  default: 0.3
  max: 1
  min: 0.1
  type: doublespinbox
map_mz_range:
  default: 1.5
  max: 2
  min: 0.1
  type: do

### Search

In [18]:
#export
# Search Settings
search = {}

search["prec_tol"] = {'type':'spinbox', 'min':1, 'max':500, 'default':20, 'description':"Maximum allowed precursor mass offset."}
search["frag_tol"] = {'type':'spinbox', 'min':1, 'max':500, 'default':50, 'description':"Maximum fragment mass tolerance."}
search["min_frag_hits"] = {'type':'spinbox', 'min':1, 'max':99, 'default':7, 'description':"Minimum number of fragment hits."}
search["ppm"] = {'type':'checkbox', 'default':True, 'description':"Use ppm instead of Dalton."}
search["calibrate"] = {'type':'checkbox', 'default':True, 'description':"Recalibrate masses."}
search["calibration_std_prec"] = {'type':'spinbox', 'min':1, 'max':10, 'default':5, 'description':"Std range for precursor tolerance after calibration."}
search["calibration_std_frag"] = {'type':'spinbox', 'min':1, 'max':10, 'default':5, 'description':"Std range for fragment tolerance after calibration."}
search["parallel"] = {'type':'checkbox', 'default':True, 'description':"Use parallel processing."}
search["peptide_fdr"] = {'type':'doublespinbox', 'min':0.0, 'max':1.0, 'default':0.01, 'description':"FDR level for peptides."}
search["protein_fdr"] = {'type':'doublespinbox', 'min':0.0, 'max':1.0, 'default':0.01, 'description':"FDR level for proteins."}
search['recalibration_min'] = {'type':'spinbox', 'min':100, 'max':10000, 'default':100, 'description':"Minimum number of datapoints to perform calibration."}

SETTINGS_TEMPLATE["search"] = search

In [19]:
print(yaml.dump(SETTINGS_TEMPLATE['search']))

calibrate:
  default: true
  description: Recalibrate masses.
  type: checkbox
calibration_std_frag:
  default: 5
  description: Std range for fragment tolerance after calibration.
  max: 10
  min: 1
  type: spinbox
calibration_std_prec:
  default: 5
  description: Std range for precursor tolerance after calibration.
  max: 10
  min: 1
  type: spinbox
frag_tol:
  default: 50
  description: Maximum fragment mass tolerance.
  max: 500
  min: 1
  type: spinbox
min_frag_hits:
  default: 7
  description: Minimum number of fragment hits.
  max: 99
  min: 1
  type: spinbox
parallel:
  default: true
  description: Use parallel processing.
  type: checkbox
peptide_fdr:
  default: 0.01
  description: FDR level for peptides.
  max: 1.0
  min: 0.0
  type: doublespinbox
ppm:
  default: true
  description: Use ppm instead of Dalton.
  type: checkbox
prec_tol:
  default: 20
  description: Maximum allowed precursor mass offset.
  max: 500
  min: 1
  type: spinbox
protein_fdr:
  default: 0.01
  descrip

### Score

In [20]:
#export
# Score
score = {}

score["method"] = {'type':'combobox', 'value':['x_tandem','random_forest'], 'default':'random_forest', 'description':"Scoring method."}
SETTINGS_TEMPLATE["score"] = score

In [21]:
print(yaml.dump(SETTINGS_TEMPLATE['score']))

method:
  default: random_forest
  description: Scoring method.
  type: combobox
  value:
  - x_tandem
  - random_forest



### Calibration

In [22]:
#export
# Calibration
calibration = {}

calibration["outlier_std"] = {'type':'spinbox', 'min':1, 'max':5, 'default':3, 'description':"Number of std. deviations to filter outliers in psms."}
calibration["calib_n_neighbors"] = {'type':'spinbox', 'min':1, 'max':1000, 'default':100, 'description':"Number of neighbors that are used for offset interpolation."}
calibration["calib_mz_range"] = {'type':'spinbox', 'min':1, 'max':10000, 'default':2000, 'description':"Scaling factor for mz axis in ppm."}
calibration["calib_rt_range"] = {'type':'doublespinbox', 'min':0.0, 'max':10, 'default':0.5, 'description':"Scaling factor for rt axis."}
calibration["calib_mob_range"] = {'type':'doublespinbox', 'min':0.0, 'max':1.0, 'default':0.3, 'description':"Scaling factor for mobility axis."}

SETTINGS_TEMPLATE["calibration"] = calibration

In [23]:
print(yaml.dump(SETTINGS_TEMPLATE['calibration']))

calib_mob_range:
  default: 0.3
  description: Scaling factor for mobility axis.
  max: 1.0
  min: 0.0
  type: doublespinbox
calib_mz_range:
  default: 2000
  description: Scaling factor for mz axis in ppm.
  max: 10000
  min: 1
  type: spinbox
calib_n_neighbors:
  default: 100
  description: Number of neighbors that are used for offset interpolation.
  max: 1000
  min: 1
  type: spinbox
calib_rt_range:
  default: 0.5
  description: Scaling factor for rt axis.
  max: 10
  min: 0.0
  type: doublespinbox
outlier_std:
  default: 3
  description: Number of std. deviations to filter outliers in psms.
  max: 5
  min: 1
  type: spinbox



### Matching

In [24]:
#export
# Matching

matching = {}

matching["match_p_min"] = {'type':'doublespinbox', 'min':0.001, 'max':1.0, 'default':0.05, 'description':"Minimum probability cutoff for matching."}
matching["match_d_min"] = {'type':'doublespinbox', 'min':0.001, 'max':10.0, 'default':3, 'description': "Minimum distance cutoff for matching."}
matching["match_group_tol"] = {'type':'spinbox', 'min':0, 'max':100, 'default':0, 'description': "When having matching groups, match neighboring groups."}


SETTINGS_TEMPLATE["matching"] = matching

In [25]:
print(yaml.dump(SETTINGS_TEMPLATE['matching']))

match_d_min:
  default: 3
  description: Minimum distance cutoff for matching.
  max: 10.0
  min: 0.001
  type: doublespinbox
match_group_tol:
  default: 0
  description: When having matching groups, match neighboring groups.
  max: 100
  min: 0
  type: spinbox
match_p_min:
  default: 0.05
  description: Minimum probability cutoff for matching.
  max: 1.0
  min: 0.001
  type: doublespinbox



### Isobaric Labeling  

In [26]:
isobaric_label = {}

isobaric_label["label"] = {'type':'combobox', 'value':['None','TMT10plex'], 'default':'None', 'description':"Type of isobaric label present."}
isobaric_label["reporter_frag_tolerance"] = {'type':'spinbox', 'min':1, 'max':500, 'default':15, 'description':"Maximum fragment mass tolerance for a reporter."}
isobaric_label["reporter_frag_tolerance_ppm"] = {'type':'checkbox', 'default':True, 'description':"Use ppm instead of Dalton."}

SETTINGS_TEMPLATE["isobaric_label"] = isobaric_label

### Quantification  

In [27]:
#export
# Quantification

quantification = {}
quantification["max_lfq"] = {'type':'checkbox', 'default':True, 'description':"Perform max lfq type quantification."}
quantification["lfq_ratio_min"] = {'type':'spinbox', 'min':1, 'max':10, 'default':1, 'description':"Minimum number of ratios for LFQ."}
quantification["mode"] = {'type':'combobox', 'value':['ms1_int_sum_apex'], 'default':'ms1_int_sum_apex', 'description':"Column to perform quantification on."}

SETTINGS_TEMPLATE["quantification"] = quantification

In [28]:
print(yaml.dump(SETTINGS_TEMPLATE['quantification']))

lfq_ratio_min:
  default: 1
  description: Minimum number of ratios for LFQ.
  max: 10
  min: 1
  type: spinbox
max_lfq:
  default: true
  description: Perform max lfq type quantification.
  type: checkbox
mode:
  default: ms1_int_sum_apex
  description: Column to perform quantification on.
  type: combobox
  value:
  - ms1_int_sum_apex



In [29]:
#export
import sys
import hashlib

def hash_file(path):
    """
    Helper function to hash a file
    Taken from https://stackoverflow.com/questions/22058048/hashing-a-file-in-python
    """

    BUF_SIZE = 65536  # lets read stuff in 64kb chunks!

    md5 = hashlib.md5()
    sha1 = hashlib.sha1()

    with open(path, 'rb') as f:
        while True:
            data = f.read(BUF_SIZE)
            if not data:
                break
            md5.update(data)
            sha1.update(data)

    return md5.hexdigest(), sha1.hexdigest()


In [30]:
#export

def create_default_settings():
    
    settings = {}
    
    for category in SETTINGS_TEMPLATE.keys():

        temp_settings = {}

        for key in SETTINGS_TEMPLATE[category].keys():
            temp_settings[key] = SETTINGS_TEMPLATE[category][key]['default']

        settings[category] = temp_settings
            
    md5, sha1 = hash_file(modfile_path)
    settings['general']['modfile_hash'] = md5

    save_settings(settings, DEFAULT_SETTINGS_PATH) #Save in home folder to be able to modify
    save_settings(SETTINGS_TEMPLATE,  os.path.join(AP_PATH, 'settings_template.yaml'))
    

import os
import logging

md5, sha1 = hash_file(modfile_path)

HOME = os.path.expanduser("~")
AP_PATH = os.path.join(HOME, ".alphapept")
DEFAULT_SETTINGS_PATH = os.path.join(AP_PATH, 'default_settings.yaml')

skip = False

previous_md5 = None
if os.path.isfile(DEFAULT_SETTINGS_PATH):
    s_ = load_settings(DEFAULT_SETTINGS_PATH)
    if 'modfile_hash' in s_['general']:
        previous_md5 = s_['general']['modfile_hash'] 

if previous_md5 is not None:
    if previous_md5 == md5:
        skip = True
                
if not skip:     
    logging.info('Creating default settings.')
    create_default_settings()
else:
    logging.info('Using existing settings.')

In [31]:
#hide
from nbdev.export import *
notebook2script()

Converted 00_settings.ipynb.
Converted 01_chem.ipynb.
Converted 02_io.ipynb.
Converted 03_fasta.ipynb.
Converted 04_feature_finding.ipynb.
Converted 05_search.ipynb.
Converted 06_score.ipynb.
Converted 07_recalibration.ipynb.
Converted 08_quantification.ipynb.
Converted 09_matching.ipynb.
Converted 10_constants.ipynb.
Converted 11_interface.ipynb.
Converted 12_performance.ipynb.
Converted 13_export.ipynb.
Converted 14_display.ipynb.
Converted 15_label.ipynb.
Converted additional_code.ipynb.
Converted contributing.ipynb.
Converted file_formats.ipynb.
Converted index.ipynb.
