-
Notifications
You must be signed in to change notification settings - Fork 3
/
config_example.yaml
110 lines (75 loc) · 4.47 KB
/
config_example.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#=========================== SCORPiOs EXAMPLE CONFIGURATION FILE ============================#
##For a full description of supported settings, please take a look at SCORPiOs documentation.#
#----------------------------------------- INPUTS -------------------------------------------#
# INPUT1 - The gene trees to correct, as a single file in New Hampshire Extended (.nhx) format.
trees: data/example/forest.nhx
# INPUT2 - The multiple sequence alignments used to build the trees, as a single fasta file.
alis: data/example/ali.fa.gz
# INPUT3 - The genes coordinates for all duplicated species and outgroup(s).
#one file per species, in BED (.bed) format. Can also be in 'dyogen' format.
genes: data/example/genes/genes.%s.bed
# Uncomment if genes coordinates are in dyogen format, otherwise .bed is assumed.
#genes_format: dyogen
# INPUT4 - The species tree in Newick format with labelled internal nodes (ancestor names).
species_tree: data/example/species_tree.nwk
# Gene-to-species mapping file : a single text file with two columns: gene_name species_name.
#genes_sp_mapping: data/example/genes_sp_mapping.txt
# Restrict correction to gene trees that contain the gene id listed in the following file:
# (any gene id within a given tree can be listed and will induce correction of this tree)
# subset_to_correct: 'data/example/correct_only_genelist.txt'
#----------------------------------------- OUTPUTS ------------------------------------------#
# Provide a job name, which will be appended to the output folder name.
jobname: 'example'
#---------------------------------------- PARAMETERS ----------------------------------------#
# Whole-genome duplication(s) and outgroup(s)
# Outgroups, if multiple, should be monophyletic if you intent to run LORelEi.
WGDs:
Clupeocephala: 'Lepisosteus.oculatus,Amia.calva'
Salmonidae: 'Esox.lucius,Gasterosteus.aculeatus,Oryzias.latipes'
# Whether the average number of syntenic orthologs to include genes in the Orthology Table
# should be optimized: yes ('y') or no ('n'). Default threshold value if not optimized: 2.0.
optimize_synteny_support_threshold: 'y'
# Whether orthologs without synteny support should be discarded from the synteny analysis:
# yes ('y') or no ('n')
filter_otable_nosynteny: 'n'
# Size of the sliding window used in the pairwise synteny analysis:
windowSize: 15
# Cut-off on pairwise synteny similarity scores (between 0 and 1):
cutoff: 0
# Whether branch-lengths should be recomputed after corrections: yes ('y') or no ('n').
brlength: 'y'
# Software to recompute the branch-lengths: 'treebest phyml' (default) or 'raxml'.
## Uncomment to use RAxML##
#brlength_tool: raxml
# Any species with a poorer assembly quality that should be discarded for synteny analysis.
## Comment out if you want to use all species in the synteny analysis.##
lowcov_sp: 'data/example/lowcov'
# Whether to ignore tree-synteny inconsistencies when an orthogroup graph community contains
# only a single gene: yes ('y') or no ('n'). These are poorly-supported WGD duplication nodes.
ignoreSingleGeneCom: 'y'
# Whether each individual corrected tree and its non-corrected counterpart should be saved,
# each in a .nhx file: yes ('y') or no ('n'). This facilitates direct inspection of corrections.
save_tmp_trees: 'y'
# Whether more detailed intermediary outputs should be saved: yes ('y') or no ('n').
# Setting `save_subtrees_lktest` to 'y' saves, in addition to default outputs:
# - constrained tree topologies
# - profileNJ and TreeBeST synteny-aware trees
# - AU-tests outputs
# Should be set to 'y' if you intent to run LORelEi.
save_subtrees_lktest: 'y'
#Skip profileNJ solution and only resolve synteny-derived multifurcated trees with treebest
#skip_profilenj: 'y'
# Optionally, use spectral clustering instead of Girvan-Newman for graph community detection.
spectral: 'n'
# Iterative-correction related option, automatically updated by the wrapper iterate_scorpios.sh.
## DO NOT MODIFY MANUALLY even if using iterative mode.##
current_iter: 0
#---------------------------------------- RESSOURCES ----------------------------------------#
# Maximum number of threads (will never use more than this number).
# It will be restricted to the number specified via --cores (1 if --cores is not invoked).
ncores: 14
# Use a parallelization scheme specific to large jobs: yes ('y') or no ('n').
parallel_scheme_large_job: 'n'
# Limit number of cores for the branch length computation (after all corrections).
## Uncomment to reduce RAM usage.##
#limit_threads_for_branch_lengths: 37