-
Notifications
You must be signed in to change notification settings - Fork 0
/
custom_trimFASTP_mapPYTHON.nf
119 lines (93 loc) · 4.11 KB
/
custom_trimFASTP_mapPYTHON.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/*
* pipeline input parameters
*/
params.reads = "DATA/*/*_*_{1,2}.fq.gz"
// Output location
params.multiqc = "$baseDir/multiqc"
params.outdir = "results"
// Compute and codes
params.max_memory = '29.GB'
params.max_cpus = 4
params.max_time = '240.h'
// Parameter
params.adapter = '^ggcaagtgaccgtgtgtgtaaagagtgaggcgtatgaggctgtgtcggggcagaggcacaacgtttc...gcaggggagataccatgatcacgaaggtggttttcccagggcgaggcttatccattgcactccg$'
params.adapterRV = '^cggagtgcaatggataagcctcgccctgggaaaaccaccttcgtgatcatggtatctcccctgc...gaaacgttgtgcctctgccccgacacagcctcatacgcctcactctttacacacacggtcacttgcc$'
params.bwt_index = "/mnt/volume2/reference_seq"
params.overlap=6
params.gtf = "/mnt/volume2/reference_seq.gtf"
params.ref = "/mnt/volume2/reference_seq.fa"
log.info """\
GERMS-16s - N F P I P E L I N E
===================================
reads : ${params.reads}
outdir : ${params.outdir}
"""
.stripIndent()
/*
* Create Channel
*/
reads_ch = Channel
.fromFilePairs( params.reads )
.map { item ->
sampleName = item[0];
sampleName = sampleName.split('_')[0]
files = item[1];
return [ sampleName, files ] }
process fastp {
tag "$sample_id"
label "process_medium"
publishDir path: "${params.outdir}/${sample_id}" , mode: 'copy' , pattern: '*.fq.gz'
publishDir path: "${params.outdir}/${sample_id}/logs" , mode: 'copy' , pattern: '*.log'
input:
tuple val(sample_id) , path(fastq) from reads_ch
val overlap from params.overlap
val adapter from params.adapter
output:
tuple val(sample_id) , file("${sample_id}.MERGED.fq.gz") into reads_merged_ch
tuple val(sample_id) , file("${sample_id}.unmerged.passQC.1.fq.gz") , file("${sample_id}.unmerged.passQC.2.fq.gz") , file("${sample_id}.unmerged.failQC.1.fq.gz") , file("${sample_id}.unmerged.failQC.2.fq.gz") into reads_unmerged_ch
file "${sample_id}.fastp.log" into log_ch_1
script:
"""
fastp --correction --merge --length_required 10 --thread $task.cpus --merged_out ${sample_id}.MERGED.fq.gz --in1 ${fastq[0]} --in2 ${fastq[1]} --out1 ${sample_id}.unmerged.passQC.1.fq.gz --out2 ${sample_id}.unmerged.passQC.2.fq.gz --unpaired1 ${sample_id}.unmerged.failQC.1.fq.gz --unpaired2 ${sample_id}.unmerged.failQC.2.fq.gz >> ${sample_id}.fastp.log 2>&1
echo "COMPLETED Step1 (fastp merge and correct reads pair) : ${sample_id}" >> ${sample_id}.fastp.log
"""
}
process cutadapt {
tag "$sample_id"
label "process_medium"
publishDir path: "${params.outdir}/${sample_id}" , mode: 'copy' , pattern: '*.fq.gz'
publishDir path: "${params.outdir}/${sample_id}/logs" , mode: 'copy' , pattern: '*.log'
input:
tuple val(sample_id) , file(fastq) from reads_merged_ch
val overlap from params.overlap
val adapter from params.adapter
val adapterRV from params.adapterRV
output:
tuple val(sample_id) , file("${sample_id}.TRIMMED.fq.gz") into reads_trimmed_ch
tuple val(sample_id) , file("${sample_id}.untrimmed.fq.gz") into reads_untrimmed_ch
file "${sample_id}.cutadapt.log" into log_ch_2
script:
"""
cutadapt -j $task.cpus -a $adapter -a $adapterRV --overlap $overlap -o ${sample_id}.TRIMMED.fq.gz --untrimmed-o ${sample_id}.untrimmed.fq.gz $fastq >> ${sample_id}.cutadapt.log 2>&1
echo "COMPLETED Step1 (cutadapt 2adapters in merged reads) : ${sample_id}" >> ${sample_id}.cutadapt.log
"""
}
process count_reads {
tag "$sample_id"
label "process_low"
publishDir path: "${params.outdir}/${sample_id}" , mode: 'copy' , pattern: '*.csv'
publishDir path: "${params.outdir}/${sample_id}/logs" , mode: 'copy' , pattern: '*.log'
input:
tuple val(sample_id) , file(fastq) from reads_trimmed_ch
val ref from params.ref
output:
file("${sample_id}.countMatrix.csv") into sample_readCount_ch
file "${sample_id}.counting.log" into log_ch_3
script:
"""
gunzip -c -f $fastq > reads.fq
python /mnt/volume2/src/counting_reads.py --input_file reads.fq --sample_id $sample_id --ref $ref >> ${sample_id}.counting.log 2>&1
rm reads.fq
echo "COMPLETED Step3 (Counting reads) : ${sample_id}" >> ${sample_id}.counting.log
"""
}