-
Notifications
You must be signed in to change notification settings - Fork 18
/
func_ann_and_post_processing-subwf.cwl
197 lines (176 loc) · 6.64 KB
/
func_ann_and_post_processing-subwf.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
class: Workflow
cwlVersion: v1.2
requirements:
- class: ResourceRequirement
ramMin: 20000
- class: SubworkflowFeatureRequirement
- class: MultipleInputFeatureRequirement
- class: InlineJavascriptRequirement
- class: StepInputExpressionRequirement
- class: ScatterFeatureRequirement
inputs:
filtered_fasta: File
cgc_results_faa: File
rna_prediction_ncRNA: File
protein_chunk_size_eggnog: int
EggNOG_db: string
EggNOG_diamond_db: string
EggNOG_data_dir: string
protein_chunk_size_hmm: int
func_ann_names_hmmer: string
HMM_gathering_bit_score: boolean
HMM_omit_alignment: boolean
HMM_name_database: string
hmmsearch_header: string
protein_chunk_size_IPS: int
func_ann_names_ips: string
InterProScan_databases: string
InterProScan_applications: string[]
InterProScan_outputFormat: string[]
ips_header: string
diamond_maxTargetSeqs: int
diamond_databaseFile: string
Uniref90_db_txt: string
diamond_header: string
antismash_geneclusters_txt: File
go_config: string
ko_file: string
graphs: string
pathways_names: string
pathways_classes: string
gp_flatfiles_path: string
outputs:
# << functional annotation >>
functional_annotation_folder: # [15]
type: Directory?
outputSource: folder_functional_annotation/functional_annotation_folder
stats: # [6]
outputSource: folder_functional_annotation/stats
type: Directory?
pathways_systems_folder_antismash_summary:
type: Directory?
outputSource: move_antismash_summary_to_pathways_systems_folder/summary_in_folder
pathways_systems_folder:
type: Directory?
outputSource: move_to_pathways_systems_folder/out
steps:
# ----------------------------------- << STEP FUNCTIONAL ANNOTATION >> -----------------------------------
functional_annotation:
run: ../../subworkflows/assembly/functional_annotation.cwl
in:
CGC_predicted_proteins: cgc_results_faa
chunk_size_eggnog: protein_chunk_size_eggnog
chunk_size_hmm: protein_chunk_size_hmm
chunk_size_IPS: protein_chunk_size_IPS
name_ips: func_ann_names_ips
name_hmmer: func_ann_names_hmmer
HMM_gathering_bit_score: HMM_gathering_bit_score
HMM_omit_alignment: HMM_omit_alignment
HMM_database: HMM_name_database
EggNOG_db: EggNOG_db
EggNOG_diamond_db: EggNOG_diamond_db
EggNOG_data_dir: EggNOG_data_dir
InterProScan_databases: InterProScan_databases
InterProScan_applications: InterProScan_applications
InterProScan_outputFormat: InterProScan_outputFormat
out: [ hmm_result, ips_result, eggnog_annotations, eggnog_orthologs ]
# ----------------------------------- << STEP GFF >> -----------------------------------
gff:
run: ../../../tools/Assembly/GFF/gff_generation.cwl
in:
ips_results: functional_annotation/ips_result
eggnog_results: functional_annotation/eggnog_annotations
input_faa: cgc_results_faa
output_name:
source: filtered_fasta
valueFrom: $(self.nameroot).annotations.gff
out: [ output_gff_gz, output_gff_index ]
# ----------------------------------- << FUNCTIONAL ANNOTATION FOLDER >> -----------------------------------
# << DIAMOND >>
diamond:
run: ../../../tools/Assembly/Diamond/diamond-subwf.cwl
in:
queryInputFile: cgc_results_faa
outputFormat: { default: '6' }
maxTargetSeqs: diamond_maxTargetSeqs
strand: { default: 'both'}
databaseFile: diamond_databaseFile
threads: { default: 32 }
Uniref90_db_txt: Uniref90_db_txt
filename:
source: filtered_fasta
valueFrom: $(self.nameroot)
out: [post-processing_output]
# << collect folder >>
folder_functional_annotation:
run: ../../subworkflows/assembly/deal_with_functional_annotation.cwl
in:
fasta: filtered_fasta
IPS_table: functional_annotation/ips_result
diamond_table: diamond/post-processing_output
hmmscan_table: functional_annotation/hmm_result
antismash_geneclusters_txt: antismash_geneclusters_txt
rna: rna_prediction_ncRNA
cds: cgc_results_faa
go_config: go_config
eggnog_orthologs: functional_annotation/eggnog_orthologs
eggnog_annotations: functional_annotation/eggnog_annotations
diamond_header: diamond_header
hmmsearch_header: hmmsearch_header
ips_header: ips_header
output_gff_gz: gff/output_gff_gz
output_gff_index: gff/output_gff_index
ko_file: ko_file
out: [functional_annotation_folder, stats, summary_antismash]
# ----------------------------------- << PATHWAYS and SYSTEMS >> -----------------------------------
# << KEGG PATHWAYS >>
pathways:
run: ../../subworkflows/assembly/kegg_analysis.cwl
in:
input_table_hmmscan: functional_annotation/hmm_result
filtered_fasta: filtered_fasta
outputname:
source: filtered_fasta
valueFrom: $(self.nameroot)
graphs: graphs
pathways_names: pathways_names
pathways_classes: pathways_classes
out: [ kegg_pathways_summary, kegg_contigs_summary]
# ---------------------- << GENOME PROPERTIES >> ------------------------
genome_properties:
run: ../../../tools/Assembly/Genome_properties/genome_properties.cwl
in:
input_tsv_file: functional_annotation/ips_result
flatfiles_path: gp_flatfiles_path
GP_txt: {default: genomeProperties.txt}
name:
source: filtered_fasta
valueFrom: $(self.nameroot).summary.gprops.tsv
out: [ summary ]
# << change TSV to CSV >>
change_formats_and_names:
run: ../../subworkflows/assembly/change_formats_and_names.cwl
in:
genome_properties_summary: genome_properties/summary
kegg_summary: pathways/kegg_pathways_summary
fasta: filtered_fasta
out: [gp_summary_csv, kegg_summary_csv]
# << move PATHWAYS-SYSTEMS >>
move_to_pathways_systems_folder:
run: ../../../utils/return_directory.cwl
in:
file_list:
source:
- pathways/kegg_contigs_summary # kegg contigs.tsv -- not using
- change_formats_and_names/kegg_summary_csv # kegg pathways.csv
- change_formats_and_names/gp_summary_csv # genome properties.csv
linkMerge: merge_flattened
dir_name: { default: pathways-systems }
out: [ out ]
# << move PATHWAYS-SYSTEMS antismash summary>>
move_antismash_summary_to_pathways_systems_folder:
run: ../../../tools/Assembly/antismash/move_antismash_summary/move_antismash_summary.cwl
in:
antismash_summary: folder_functional_annotation/summary_antismash
folder_name: { default: pathways-systems }
out: [ summary_in_folder ]