In [None]:
# default_exp settings_template

# Settings Template

> A template for settings

ToDo:
Make checks that settings are not contradicting.
E.g. if a fasta file is provided check to make the library conversion.
If path to library is provided don't do this.

In [None]:
#hide
from nbdev.showdoc import *

## Settings

Each entry has a type, default values and a description.

* spinbox -> QSpinBox with minimum and maximum values (int)
* doublespinbox -> QDoubleSpinBox with minimum and maximum values (float)
* path -> Clickable button to select a path
* combobox -> QComboBox, dropdown menu with values to choose from
* checkbox -> QCheckBox, checkbox that can be selected
* checkgroup -> Creates a list of QCheckBox options that can be selected

In [None]:
import pandas as pd
import yaml
from alphapept.constants import protease_dict

SETTINGS_TEMPLATE = {}

# General
general = {}
SETTINGS_TEMPLATE["general"] = general
general["settings_path"] = {'type':'path','default':'...', 'filetype':['yaml'], 'folder':False, 'description':"Path to settings file (.yaml)."}
general["ppm"] = {'type':'checkbox', 'default':True, 'description':"Use ppm instead of Dalton."}
general["parallel"] = {'type':'checkbox', 'default':True, 'description':"Use parallel processing."}
general["create_library"] = {'type':'checkbox', 'default':True, 'description':"Flag to re-create the database from fasta."}
general["convert_raw"] = {'type':'checkbox', 'default':True, 'description':"Flag to re-create the npz container from raw."}

# Raw
raw = {}
SETTINGS_TEMPLATE["raw"] = raw

raw["most_abundant"] = {'type':'spinbox', 'min':1, 'max':1000, 'default':400, 'description':"Number of most abundant peaks to be isolated from raw spectra."}
raw["raw_path"] = {'type':'path','default':'...', 'filetype':['raw'], 'folder':False, 'description':"Path to raw file."}
raw["raw_folder"] = {'type':'path','default':'...', 'filetype':['raw'], 'folder':True, 'description':"Path to folder with raw files."}
raw["raw_path_npz"] = {'type':'path','default':'...', 'filetype':['npz'], 'folder':False, 'description':"Path to npz file, converted from raw."}


# Fasta
fasta = {}
SETTINGS_TEMPLATE["fasta"] = fasta

## Read modifications from modifications file
mod_db = pd.read_csv('./modifications.tsv', sep='\t')

mods = {}
mods_terminal = {}
mods_protein = {}

for i in range(len(mod_db)):
    mod = mod_db.iloc[i]
    if 'terminus' in mod['Type']:
        if 'peptide' in mod['Type']:
            mods_terminal[mod['Identifier']] = mod['Description']
        elif 'protein' in mod['Type']:
            mods_protein[mod['Identifier']] = mod['Description']
        else:
            print('Not understood')
            print(mod['Type'])
    else:
        mods[mod['Identifier']] = mod['Description']

fasta["mods_fixed"] = {'type':'checkgroup', 'value':mods, 'default':['cC'],'description':"Fixed modifications."}
fasta["mods_fixed_terminal"] = {'type':'checkgroup', 'value':mods_terminal, 'default':[],'description':"Fixed terminal modifications."}
fasta["mods_variable"] = {'type':'checkgroup', 'value':mods, 'default':['oxM'],'description':"Fixed variable modifications."}
fasta["mods_variable_terminal"]  = {'type':'checkgroup', 'value':mods_terminal, 'default':[], 'description':"Fixed varibale terminal modifications."}

fasta["mods_fixed_terminal_prot"] = {'type':'checkgroup', 'value':mods_protein, 'default':[],'description':"Fixed terminal modifications on proteins."}
fasta["mods_variable_terminal_prot"]  = {'type':'checkgroup', 'value':mods_protein, 'default':[], 'description':"Fixed varibale terminal modifications  on proteins."}

fasta["num_missed_cleavages"] = {'type':'spinbox', 'min':0, 'max':99, 'default':2, 'description':"Number of missed cleavages."}
fasta["min_length"] = {'type':'spinbox', 'min':6, 'max':99, 'default':6, 'description':"Minimum peptide length."}
fasta["max_length"] = {'type':'spinbox', 'min':6, 'max':99, 'default':27, 'description':"Maximum peptide length."}
fasta["max_isoforms"] = {'type':'spinbox', 'min':1, 'max':4096, 'default':1024, 'description':"Maximum number of isoforms per peptide."}

fasta["fasta_path"] = {'type':'path','default':'...', 'filetype':['fasta'], 'folder':False, 'description':"Path to fasta file."}
fasta["fasta_folder"] = {'type':'path','default':'...', 'filetype':['fasta'], 'folder':True, 'description':"Path to folder with fasta files."}
fasta["library_path"] = {'type':'path','default':'...', 'filetype':['npz'], 'folder':False, 'description':"Path to library file (.npz)."}

fasta["contaminants_path"] = {'type':'path','default':'...', 'filetype':['fasta'], 'folder':False, 'description':"Path to conaminants fasta file."}

proteases = [_ for _ in protease_dict.keys()]
fasta["protease"] = {'type':'combobox', 'value':proteases, 'default':'trypsin', 'description':"Protease for digestions."}

# Search Settings
search = {}

SETTINGS_TEMPLATE["search"] = search

search["m_offset"] = {'type':'spinbox', 'min':1, 'max':99, 'default':10, 'description':"Precursor mass offset."}
search["m_tol"] = {'type':'spinbox', 'min':1, 'max':99, 'default':20, 'description':"MSMS mass offset."}
search["min_frag_hits"] = {'type':'spinbox', 'min':1, 'max':99, 'default':7, 'description':"Minimum number of fragment hits."}
search["ppm"] = {'type':'checkbox', 'default':True, 'description':"Use ppm instead of Dalton."}
search["calibrate"] = {'type':'checkbox', 'default':True, 'description':"Recalibrate masses."}
search["calibration_std"] = {'type':'spinbox', 'min':1, 'max':5, 'default':3, 'description':"Std range for search after calibration."}
search["parallel"] = {'type':'checkbox', 'default':True, 'description':"Use parallel processing."}
search["peptide_fdr"] = {'type':'doublespinbox', 'min':0.0, 'max':1.0, 'default':0.01, 'description':"FDR level for peptides."}
search["protein_fdr"] = {'type':'doublespinbox', 'min':0.0, 'max':1.0, 'default':0.01, 'description':"FDR level for proteins."}

# Feature Settings
features = {}
SETTINGS_TEMPLATE["features"] = features

features["min_hill_length"] = {'type':'spinbox', 'min':1, 'max':10, 'default':3}
features["max_gap"] = {'type':'spinbox', 'min':1, 'max':99, 'default':2}
features["ppm_tol"] = {'type':'spinbox', 'min':1, 'max':99, 'default':8}
features["smoothing"] = {'type':'spinbox', 'min':1, 'max':10, 'default':1}

features["max_neighbors"] = {'type':'spinbox', 'min':1, 'max':10, 'default':4}
features["max_distance"] = {'type':'doublespinbox', 'min':0.0, 'max':1.0, 'default':0.4}
features["mass_importance"] = {'type':'spinbox', 'min':1, 'max':1000, 'default':100}

# Calibration

calibration = {}
SETTINGS_TEMPLATE["calibration"] = calibration

calibration["min_mz_step"] = {'type':'spinbox', 'min':1, 'max':999, 'default':80, 'description':"Minimum number of datapoints in for mz interpolation."}
calibration["min_rt_step"] = {'type':'spinbox', 'min':1, 'max':999, 'default':50, 'description':"Minimum number of datapoints in for rt interpolation."}
calibration["minimum_score"] = {'type':'spinbox', 'min':1, 'max':999, 'default':20, 'description':"Minimum score for psms used for calibration."}
calibration["outlier_std"] = {'type':'spinbox', 'min':1, 'max':5, 'default':3, 'description':"Number of std. deviations to filter outliers in psms."}
calibration["method"] = {'type':'combobox', 'value':['linear', 'nearest', 'cubic'], 'default':'linear', 'description':"Interpolation method."}

# Save everything

path = "settings_template.yaml"

with open(path, "w") as file:
    yaml.dump(SETTINGS_TEMPLATE, file)

# for debugging
with open(path, "r") as settings_file:
    SETTINGS_LOADED = yaml.load(settings_file, Loader=yaml.FullLoader)
    print(yaml.dump(SETTINGS_LOADED))



calibration:
  method:
    default: linear
    description: Interpolation method.
    type: combobox
    value:
    - linear
    - nearest
    - cubic
  min_mz_step:
    default: 80
    description: Minimum number of datapoints in for mz interpolation.
    max: 999
    min: 1
    type: spinbox
  min_rt_step:
    default: 50
    description: Minimum number of datapoints in for rt interpolation.
    max: 999
    min: 1
    type: spinbox
  minimum_score:
    default: 20
    description: Minimum score for psms used for calibration.
    max: 999
    min: 1
    type: spinbox
  outlier_std:
    default: 3
    description: Number of std. deviations to filter outliers in psms.
    max: 5
    min: 1
    type: spinbox
fasta:
  fasta_folder:
    default: '...'
    description: Path to folder with fasta files.
    filetype:
    - fasta
    folder: true
    type: path
  fasta_path:
    default: '...'
    description: Path to fasta file.
    filetype:
    - fasta
    folder: false
    type: path
  li

In [None]:
#hide
from nbdev.export import *
notebook2script()