/
emg-pipeline-v3.cwl
192 lines (170 loc) · 5.35 KB
/
emg-pipeline-v3.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
cwlVersion: v1.0
class: Workflow
label: EMG pipeline v3.0 (draft CWL version)
requirements:
- class: InlineJavascriptRequirement
- class: StepInputExpressionRequirement
- class: SubworkflowFeatureRequirement
- class: MultipleInputFeatureRequirement
- class: SchemaDefRequirement
types:
- $import: ../tools/InterProScan-apps.yaml
- $import: ../tools/InterProScan-protein_formats.yaml
- $import: ../tools/trimmomatic-sliding_window.yaml
- $import: ../tools/trimmomatic-end_mode.yaml
- $import: ../tools/trimmomatic-phred.yaml
inputs:
forward_reads:
type: File
format: edam:format_1930 # FASTQ
reverse_reads:
type: File
format: edam:format_1930 # FASTQ
fraggenescan_model: File
fraggenescan_prob_forward: File
fraggenescan_prob_backward: File
fraggenescan_prob_noncoding: File
fraggenescan_prob_start: File
fraggenescan_prob_stop: File
fraggenescan_prob_start1: File
fraggenescan_prob_stop1: File
fraggenescan_pwm_dist: File
16S_model:
type: File
format: edam:format_1370 # HMMER
5S_model:
type: File
format: edam:format_1370 # HMMER
23S_model:
type: File
format: edam:format_1370 # HMMER
tRNA_model:
type: File
format: edam:format_1370 # HMMER
outputs:
processed_sequences:
type: File
outputSource: mask_rRNA_and_tRNA/masked_sequences
pCDS:
type: File
outputSource: fraggenescan/predictedCDS
annotations:
type: File
outputSource: interproscan/i5Annotations
steps:
overlap_reads:
run: ../tools/seqprep.cwl
in:
forward_reads: forward_reads
reverse_reads: reverse_reads
out: [ merged_reads, forward_unmerged_reads, reverse_unmerged_reads ]
combine_seqprep:
run: ../tools/seqprep-merge.cwl
in:
merged_reads: overlap_reads/merged_reads
forward_unmerged_reads: overlap_reads/forward_unmerged_reads
reverse_unmerged_reads: overlap_reads/reverse_unmerged_reads
out: [ merged_with_unmerged_reads ]
trim_quality_control:
run: ../tools/trimmomatic.cwl
in:
reads1: combine_seqprep/merged_with_unmerged_reads
phred: { default: '33' }
leading: { default: 3 }
trailing: { default: 3 }
end_mode: { default: SE }
slidingwindow:
default:
windowSize: 4
requiredQuality: 15
out: [reads1_trimmed]
convert_trimmed-reads_to_fasta:
run: ../tools/fastq_to_fasta.cwl
in:
fastq: trim_quality_control/reads1_trimmed
out: [ fasta ]
index_reads:
run: ../tools/esl-sfetch-index.cwl
in:
sequences: convert_trimmed-reads_to_fasta/fasta
out: [ sequences_with_index ]
find_16S_matches:
run: ../tools/rRNA_selection.cwl
in:
indexed_sequences: index_reads/sequences_with_index
model: 16S_model
out: [ matching_sequences, hmmer_search_results ]
find_23S_matches:
run: ../tools/rRNA_selection.cwl
in:
indexed_sequences: index_reads/sequences_with_index
model: 23S_model
out: [ matching_sequences, hmmer_search_results ]
find_5S_matches:
run: ../tools/rRNA_selection.cwl
in:
indexed_sequences: index_reads/sequences_with_index
model: 5S_model
out: [ matching_sequences, hmmer_search_results ]
find_tRNA_matches:
run: ../tools/tRNA_selection.cwl
in:
indexed_sequences: index_reads/sequences_with_index
model: tRNA_model
out: [ matching_sequences, hmmer_search_results ]
collate_unique_rRNA_hmmer_hits:
run: ../tools/collate_unique_SSU_headers.cwl
in:
hits:
- find_16S_matches/hmmer_search_results
- find_23S_matches/hmmer_search_results
- find_5S_matches/hmmer_search_results
out: [ unique_hits ]
collate_unique_tRNA_hmmer_hits:
run: ../tools/collate_unique_SSU_headers.cwl
in:
hits:
source: find_tRNA_matches/hmmer_search_results
valueFrom: ${ return [ self ]; }
out: [ unique_hits ]
mask_rRNA_and_tRNA:
run: ../tools/mask_RNA.cwl
in:
unique_rRNA_hits: collate_unique_rRNA_hmmer_hits/unique_hits
16s_rRNA_hmmer_matches: find_16S_matches/matching_sequences
23s_rRNA_hmmer_matches: find_23S_matches/matching_sequences
5s_rRNA_hmmer_matches: find_5S_matches/matching_sequences
unique_tRNA_hits: collate_unique_tRNA_hmmer_hits/unique_hits
tRNA_matches: find_tRNA_matches/matching_sequences
sequences: index_reads/sequences_with_index
out: [ masked_sequences ]
fraggenescan:
run: ../tools/FragGeneScan1_20.cwl
in:
sequence: mask_rRNA_and_tRNA/masked_sequences
completeSeq: { default: true }
model: fraggenescan_model
prob_forward: fraggenescan_prob_forward
prob_backward: fraggenescan_prob_backward
prob_noncoding: fraggenescan_prob_noncoding
prob_start: fraggenescan_prob_start
prob_stop: fraggenescan_prob_stop
prob_start1: fraggenescan_prob_start1
prob_stop1: fraggenescan_prob_stop1
pwm_dist: fraggenescan_pwm_dist
out: [predictedCDS]
interproscan:
run: ../tools/InterProScan5.21-60.cwl
in:
proteinFile: fraggenescan/predictedCDS
applications:
default:
- Pfam
- TIGRFAM
- PRINTS
- ProSitePatterns
- Gene3d
# outputFileType: { valueFrom: "TSV" }
out: [i5Annotations]
$namespaces: { edam: "http://edamontology.org/" }
$schemas: [ "http://edamontology.org/EDAM_1.16.owl" ]