/
mip_rd_dna_config.yaml
executable file
·135 lines (135 loc) · 5.61 KB
/
mip_rd_dna_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
---
## Environment
analysis_constant_path: analysis
load_env:
mip_ci:
method: conda
mip:
max_cores_per_node: 36
node_ram_memory: 192
project_id: travis_test
slurm_quality_of_service: low
## Input
pedigree_file: cluster_constant_path!/case_id!/case_id!_pedigree.yaml
reference_dir: cluster_constant_path!/references
sv_vcfparser_select_file: cluster_constant_path!/case_id!/aggregated_gene_panel_test.txt
vcfparser_select_file: cluster_constant_path!/case_id!/aggregated_gene_panel_test.txt
## Output
config_file_analysis: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_config.yaml
outdata_dir: cluster_constant_path!/case_id!/analysis_constant_path!
outscript_dir: cluster_constant_path!/case_id!/analysis_constant_path!/scripts
sample_info_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_qc_sample_info.yaml
## References
gatk_genotypegvcfs_ref_gvcf: grch37_gatk_merged_reference_samples.txt
genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv
human_genome_reference: grch37_homo_sapiens_-d5-.fasta
me_annotate_query_files:
# FORMAT: filename|OUT_FREQUENCY_INFO_KEY|OUT_ALLELE_COUNT_INFO_KEY|IN_FREQUENCY_INFO_KEY|IN_ALLELE_COUNT_INFO_KEY|USE_IN_FREQUENCY_FILTER
grch37_alu_swegen.vcf.gz: swegen_alu_|FRQ|OCC|FRQ|OCC|1
grch37_herv_swegen.vcf.gz: swegen_herv_|FRQ|OCC|FRQ|OCC|1
grch37_l1_swegen.vcf.gz: swegen_l1_|FRQ|OCC|FRQ|OCC|1
grch37_sva_swegen.vcf.gz: swegen_sva_|FRQ|OCC|FRQ|OCC|1
mobile_element_reference:
grch37_g1k_alu.bed: ALU
grch37_g1k_l1.bed: L1
grch37_g1k_herv.bed: HERV
grch37_g1k_sva.bed: SVA
rank_model_file: rank_model_-v1.34-.ini
sambamba_depth_bed: grch37_scout_exons_-2017-01-.bed
sv_vcfanno_config: grch37_sv_vcfanno_config_-v1.4-.toml
sv_genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv
sv_rank_model_file: svrank_model_-v1.9-.ini
sv_svdb_query_db_files:
# FORMAT: filename|OUT_FREQUENCY_INFO_KEY|OUT_ALLELE_COUNT_INFO_KEY|IN_FREQUENCY_INFO_KEY|IN_ALLELE_COUNT_INFO_KEY|USE_IN_FREQUENCY_FILTER
grch37_clinvar_sv_pathogenic_-20221004-.bedpe: clinvar_pathogenic|Occ|Frq
grch37_gnomad_reformated_-r2.1.1_sv-.vcf.gz: gnomad_sv|AF|AC|AF|AC|1
grch37_loqusdb_sv_variants_export-20230214-.vcf: clinical_genomics_loqus|Frq|Obs|Frq|Obs|1
grch37_mip_sv_svdb_export_-2018-10-09-.vcf: clinical_genomics_mip|AF|OCC|FRQ|OCC|1
grch37_svdb_query_decipher_-v1.0.0-.vcf: decipher|AF|OCC|FRQ|OCC
grch37_svdb_query_clingen_cgh_benign_-v1.0.0-.vcf: clingen_cgh_benign
grch37_svdb_query_clingen_cgh_pathogenic_-v1.0.0-.vcf: clingen_cgh_pathogenic
grch37_svdb_query_clingen_ngi_-v1.0.0-.vcf: clingen_ngi|AF|OCC|FRQ|OCC|1
grch37_swegen_concat_sort_-20170830-.vcf: swegen|AF|OCC|FRQ|OCC|1
vcf2cytosure_blacklist: grch37_cytosure_blacklist_-1.0-.bed
vcfanno_config: grch37_vcfanno_config_-v1.17-.toml
### Analysis
### Programs
## Parameters
fqf_annotations:
- GNOMADAF
- GNOMADAF_popmax
- SWEGENAF
sv_fqa_vcfanno_filters:
- GNOMADAF
- GNOMADAF_popmax
- gnomad_svAF
- clinical_genomics_loqusFrq
gatk_path: /usr
picardtools_path: /usr/picard
qccollect_sampleinfo_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_qc_sample_info.yaml
sv_genmod_models_case_type: cmms
sv_vep_plugin:
pLI:
exist_check:
- type: file
path: cluster_constant_path!/references/gnomad_pli_per_gene_-_r2.1.1-.txt
parameters:
- cluster_constant_path!/references/gnomad_pli_per_gene_-_r2.1.1-.txt
vep_custom_annotation:
genomic_superdups_frac_match:
annotation_type: overlap
force_report_coordinates: 0
key: genomic_superdups_frac_match
file_type: bed
path: cluster_constant_path!/references/grch37_genomic_superdups_reformated_-20181009-.bed.gz
clinvar:
annotation_type: exact
force_report_coordinates: 0
key: CLINVAR
file_type: vcf
path: cluster_constant_path!/references/grch37_clinvar_reformated_-20230218-.vcf.gz
vcf_fields: CLNSIG,CLNVID,CLNREVSTAT
vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/
vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins
vep_plugin:
dbNSFP:
exist_check:
- type: file
path: cluster_constant_path!/references/grch37_dbnsfp_-v3.5a-.txt.gz
parameters:
- cluster_constant_path!/references/grch37_dbnsfp_-v3.5a-.txt.gz
- GERP++_RS
- GERP++_NR
- phyloP100way_vertebrate
- phastCons100way_vertebrate
- REVEL_rankscore
- rs_dbSNP150
pLI:
exist_check:
- type: file
path: cluster_constant_path!/references/gnomad_pli_per_gene_-_r2.1.1-.txt
parameters:
- cluster_constant_path!/references/gnomad_pli_per_gene_-_r2.1.1-.txt
LoFtool:
exist_check:
- type: file
path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt
parameters:
- cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt
MaxEntScan:
exist_check:
- type: directory
path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload
parameters:
- cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload
- SWA
- NCSS
SpliceAI:
exist_check:
- type: file
path: cluster_constant_path!/references/grch37_spliceai_scores_raw_indel_-v1.3-.vcf.gz
- type: file
path: cluster_constant_path!/references/grch37_spliceai_scores_raw_snv_-v1.3-.vcf.gz
parameters:
- snv=cluster_constant_path!/references/grch37_spliceai_scores_raw_snv_-v1.3-.vcf.gz
- indel=cluster_constant_path!/references/grch37_spliceai_scores_raw_indel_-v1.3-.vcf.gz