This repository has been archived by the owner on Jan 28, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
exomeseq-gatk4-preprocessing.cwl
106 lines (106 loc) · 2.86 KB
/
exomeseq-gatk4-preprocessing.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
label: WES GATK4 Preprocessing
doc: |
Whole Exome Sequence analysis GATK4 Preprocessing
requirements:
ScatterFeatureRequirement: {}
SubworkflowFeatureRequirement: {}
SchemaDefRequirement:
types:
- $import: ../types/FASTQReadPairType.yml
inputs:
# Intervals should come from capture kit (target intervals) bed format
target_intervals: File[]?
# Intervals should come from capture kit (bait intervals) bed format
bait_intervals: File[]?
interval_padding: int?
# Named read pair in FASTQ format
read_pair:
type: ../types/FASTQReadPairType.yml#FASTQReadPairType
# reference genome, fasta
reference_genome:
type: File
secondaryFiles:
- .amb
- .ann
- .bwt
- .pac
- .sa
- .fai
- ^.dict
# Number of threads to use
threads: int
# Read Group annotation
# Can be the project name
library: string
# e.g. Illumina
platform: string
known_sites:
type: File[] # vcf files of known sites, with indexing
secondaryFiles:
- .idx
resource_dbsnp:
type: File
secondaryFiles:
- .idx
outputs:
fastqc_reports:
type: File[]
outputSource: preprocessing/fastqc_reports
trim_reports:
type: File[]
outputSource: preprocessing/trim_reports
markduplicates_bam:
type: File
outputSource: preprocessing/markduplicates_bam
doc: "BAM and bai files from markduplicates"
recalibration_table:
type: File
outputSource: preprocessing/recalibration_table
doc: "Table of recalibration"
recalibrated_reads:
type: File
outputSource: preprocessing/recalibrated_reads
doc: "BAM file containing recalibrated reads"
haplotypes_bam:
type: File
outputSource: preprocessing/haplotypes_bam
doc: "BAM file containing assembled haplotypes"
raw_variants:
type: File
outputSource: preprocessing/raw_variants
doc: "Variants from HaplotypeCaller"
steps:
prepare_reference_data:
run: ../subworkflows/exomeseq-00-prepare-reference-data.cwl
in:
target_intervals: target_intervals
bait_intervals: bait_intervals
reference_genome: reference_genome
out:
- target_interval_list
- bait_interval_list
preprocessing:
run: ../subworkflows/exomeseq-gatk4-01-preprocessing.cwl
in:
intervals: target_intervals
interval_padding: interval_padding
target_interval_list: prepare_reference_data/target_interval_list
bait_interval_list: prepare_reference_data/bait_interval_list
read_pair: read_pair
reference_genome: reference_genome
threads: threads
library: library
platform: platform
known_sites: known_sites
resource_dbsnp: resource_dbsnp
out:
- fastqc_reports
- trim_reports
- markduplicates_bam
- recalibration_table
- recalibrated_reads
- raw_variants
- haplotypes_bam