Skip to content
Merged

Cram #41

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
${{ runner.os }}-openjdk-11
- name: Install openjdk
if: ${{ steps.cache-openjdk.outputs.cache-hit != 'true' }}
run: sudo apt-get install openjdk-11-jdk
run: sudo apt-get update && sudo apt-get install -y openjdk-11-jdk
# Install/cache nextflow
- name: Cache nextflow
id: cache-nextflow
Expand All @@ -52,13 +52,15 @@ jobs:
${{ runner.os }}-nextflow
- name: Install Nextflox
if: ${{ steps.cache-nextflow.outputs.cache-hit != 'true' }}
run: curl -s https://get.nextflow.io | bash && mv nextflow /usr/local/bin && chmod +x /usr/local/bin/nextflow
run: export NXF_VER=24.10.6 && curl -s https://get.nextflow.io | bash && mv nextflow /usr/local/bin && chmod +x /usr/local/bin/nextflow

# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
# Run tests sequentialy
- name: test short single
run: nextflow run -ansi-log -profile docker,test_illumina_single aline.nf
- name: test short single cram
run: nextflow run -ansi-log -profile docker,test_illumina_single aline.nf --cram
- name: test short paired
run: nextflow run -ansi-log -profile docker,test_illumina_paired aline.nf
- name: test ont
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ AliNe (Alignment in Nextflow) - RNAseq DNAseq
AliNe is a pipeline written in Nextflow that aims to efficiently align reads against a reference using the tools of your choice.

Input: file, list of file, folder or csv
Output: Coordinate sorted BAM file.
Output: Coordinate sorted BAM/CRAM file.

## Table of Contents

Expand Down Expand Up @@ -337,6 +337,7 @@ On success you should get a message looking like this:
--reference path to the reference file (fa, fa.gz, fasta or fasta.gz)
--aligner aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwamem2, bwasw, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, star, subread, sublong]
--outdir path to the output directory (default: alignment_results)
--cram output alignment files in sorted CRAM format instead of sorted BAM (default: false). This saves disk space but disables FastQC on alignment files.
--annotation [Optional][used by graphmap2, STAR, subread] Absolute path to the annotation file (gtf or gff3)

Type of input reads
Expand All @@ -351,7 +352,7 @@ On success you should get a message looking like this:

Extra steps
--trimming_fastp run fastp for trimming (default: false)
--fastqc run fastqc on raw and aligned reads (default: false)
--fastqc run fastqc on raw and aligned reads (default: false). Note: FastQC will be automatically disabled for alignment files when --cram is enabled.
--samtools_stats run samtools stats on aligned reads (default: false)
--multiqc_config path to the multiqc config file (default: config/multiqc_conf.yml)

Expand Down
278 changes: 176 additions & 102 deletions aline.nf

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion config/softwares.config
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ process {
container = 'quay.io/biocontainers/salmon:1.10.3--h6dccd9a_2'
}
withLabel: 'samtools' {
container = 'quay.io/biocontainers/samtools:1.3.1--h0cf4675_11'
container = 'quay.io/biocontainers/samtools:1.23--h96c455f_0'
}
withLabel: 'seqkit' {
container = 'quay.io/biocontainers/seqkit:2.8.2--h9ee0642_1'
Expand Down
2 changes: 1 addition & 1 deletion modules/bwamem2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ process bwamem2_index {
*/
process bwamem2 {
label 'bwamem2'
tag "$sample"
tag "${meta.id}"
publishDir "${params.outdir}/${outpath}", pattern: "*bwamem2.log", mode: 'copy'

input:
Expand Down
27 changes: 26 additions & 1 deletion modules/fastqc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,30 @@ process fastqc {
mkdir fastqc_${sample_id}${add_suffix}logs
fastqc -t ${task.cpus} -o fastqc_${sample_id}${add_suffix}logs -q ${reads}
"""

}

// To take in consideration the index coming along when aligned files are provided
process fastqc_ali {
label 'fastqc'
tag "${meta.id}"
publishDir "${params.outdir}/${outpath}", mode: 'copy'

input:
tuple val(meta), path(reads), path(index)
val outpath
val suffix

output:
path ("*logs")

script:

// Suffix to separate different runs
def sample_id = meta.id
def add_suffix = suffix ? "_${suffix}_" : '_'

"""
mkdir fastqc_${sample_id}${add_suffix}logs
fastqc -t ${task.cpus} -o fastqc_${sample_id}${add_suffix}logs -q ${reads}
"""
}
3 changes: 2 additions & 1 deletion modules/graphmap2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,12 @@ process graphmap2 {
rm ${fileName}_graphmap2.sam
awk '!/^@HD/ && !/^@SQ/ && !/^@RG/ && !/^@PG/ && !/^@CO/ && NF' ${reads[1].baseName}_graphmap2.sam >> ${fileName}_graphmap2_concatR1R2.sam
rm ${reads[1].baseName}_graphmap2.sam
sed -i '/^\$/d' ${fileName}_graphmap2_concatR1R2.sam # remove empty lines that can be generated by graphmap2 in header
"""
} else {
"""
graphmap2 ${graphmap2_options} -i ${graphmap2_index_files} -t ${task.cpus} -r ${genome} -d ${reads[0]} -o ${fileName}_graphmap2.sam 2> ${fileName}_graphmap2.log
sed -i '\$!N; /^\\(.*\\)\\n\\1\$/!P; D' ${fileName}_graphmap2.sam
sed -i '/^\$/d' ${fileName}_graphmap2.sam # remove empty lines that can be generated by graphmap2 in header
"""
}
}
Expand Down
4 changes: 2 additions & 2 deletions modules/kallisto.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ process kallisto_index {
process kallisto {
label 'kallisto'
tag "${meta.id}"
publishDir "${params.outdir}/${outpath}", pattern: "${filename}/*.bam", mode: 'copy'
publishDir "${params.outdir}/${outpath}", mode: 'copy', pattern: "*.log"

input:
tuple val(meta), path(reads)
Expand All @@ -42,7 +42,7 @@ process kallisto {
def kallisto_options = meta.kallisto_options ?: ""

// catch filename
filename = AlineUtils.getCleanName(reads) + "_kallisto_sorted"
filename = AlineUtils.getCleanName(reads) + "_kallisto"

// For paired-end reads, Kallisto automatically estimates the fragment length distribution from the data and does not require you to specify it manually
if (meta.paired){
Expand Down
2 changes: 1 addition & 1 deletion modules/ngmlr.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ process ngmlr {
val outpath

output:
tuple val(meta), path ("*ngmlr.sam"), emit: tuple_sample_sam, optional:true
tuple val(meta), path ("*.sam"), emit: tuple_sample_sam, optional:true
path "*.log", emit: ngmlr_summary

script:
Expand Down
61 changes: 56 additions & 5 deletions modules/samtools.nf
Original file line number Diff line number Diff line change
Expand Up @@ -70,26 +70,77 @@ process samtools_merge_bam_if_paired {
/*
http://www.htslib.org/doc/samtools-sort.html
Sort alignments by leftmost coordinates
And convert to cram if needed (when samtools_bam2cram can be avoided) to save disk space
*/
process samtools_sort {
label 'samtools'
tag "${meta.id}"
publishDir "${params.outdir}/${outpath}", mode: 'copy'

input:
tuple val(meta), path(bam)
val outpath
path(genome_fasta)

output:
tuple val(meta), path ("*_sorted.{bam,cram}"), emit: tuple_sample_ali

script:

if (params.cram) {
"""
samtools sort -@ ${task.cpus} --reference ${genome_fasta} -o ${bam.baseName}_sorted.cram ${bam}
"""
} else {
"""
samtools sort -@ ${task.cpus} -o ${bam.baseName}_sorted.bam ${bam}
"""
}
}


/*
http://www.htslib.org/doc/samtools-view.html
Convert BAM to CRAM format
*/
process samtools_bam2cram {
label 'samtools'
tag "${meta.id}"

input:
tuple val(meta), path(bam)
path(genome_fasta)

output:
tuple val(meta), path ("*_sorted.bam"), emit: tuple_sample_sortedbam
tuple val(meta), path ("*.cram"), emit: tuple_sample_ali

script:

"""
samtools sort -@ ${task.cpus} ${bam} -o ${bam.baseName}_sorted.bam
samtools view -C -T ${genome_fasta} -o ${bam.baseName}.cram ${bam}
"""
}

/*
http://www.htslib.org/doc/samtools-index.html
Index BAM or CRAM files
*/
process samtools_index {
label 'samtools'
tag "${meta.id}"
publishDir "${params.outdir}/${outpath}", mode: 'copy', pattern: "{*.bam,*.cram,*.crai,*.bai}"

input:
tuple val(meta), path(alignment)
val outpath

output:
tuple val(meta), path(alignment), path ("*.{bai,crai}"), emit: tuple_sample_ali

script:

"""
samtools index ${alignment}
"""
}

/*
http://www.htslib.org/doc/samtools-stats.html
Expand All @@ -101,7 +152,7 @@ process samtools_stats {
publishDir "${params.outdir}/${outpath}", mode: 'copy'

input:
tuple val(meta), path(bam)
tuple val(meta), path(bam), path(index)
path(genome_fasta)
val outpath
val suffix
Expand Down
22 changes: 22 additions & 0 deletions modules/seqkit.nf
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,25 @@ process seqkit_convert {
"""
}
}

/*
* Clean FASTA headers by removing everything after the first space
* and create samtools index
*/
process seqkit_clean_fasta_headers {
label 'seqkit'
tag "${fasta.baseName}"
publishDir "${params.outdir}/${outpath}", mode: 'copy'

input:
path(fasta)
val outpath

output:
path("*.clean.fa"), emit: clean_fasta

script:
"""
seqkit replace -p " .*" -r "" ${fasta} > ${fasta.baseName}.clean.fa
"""
}
6 changes: 4 additions & 2 deletions modules/star.nf
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ process star_index {
process star {
label 'star'
tag "${meta.id}"
publishDir "${params.outdir}/${outpath}", mode: 'copy'
publishDir "${params.outdir}/${outpath}", mode: 'copy', pattern: "{*.out,*SJ.out.tab}"

input:
tuple val(meta), path(reads)
Expand Down Expand Up @@ -137,7 +137,7 @@ For a study with multiple samples, it is recommended to collect 1st pass junctio
process star2pass{
label 'star'
tag "${meta.id}"
publishDir "${params.outdir}/${outpath}", pattern: "*.log", mode: 'copy'
publishDir "${params.outdir}/${outpath}", mode: 'copy', pattern: "{*.out,*SJ.out.tab}"

input:
tuple val(meta), path(reads)
Expand All @@ -149,6 +149,8 @@ process star2pass{
output:
tuple val(meta), path ("*.bam"), emit: tuple_sample_bam
path "*.out", emit: star_summary
path "*SJ.out.tab", emit: splice_junctions


script:
// options for STAR
Expand Down
5 changes: 3 additions & 2 deletions modules/subread.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ process subread_index {

/*
* To align with subread
* Particularity it is directly sorted by coordinates
*/
process subread {
label 'subread'
tag "${meta.id}"
publishDir "${params.outdir}/${outpath}", pattern: "*subread.vcf", mode: 'copy'
publishDir "${params.outdir}/${outpath}", mode: 'copy', pattern: "*.{vcf,log}"

input:
tuple val(meta), path(fastq)
Expand All @@ -42,7 +43,7 @@ process subread {
output:
tuple val(meta), path ("*.bam"), emit: tuple_sample_bam, optional:true
path "*subread.vcf", emit: subread_vcf, optional:true
path "*.log", emit: sublong_log
path "*.log", emit: subread_log

script:
// options for subread
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ manifest {
description = 'Nextflow alignment pipeline'
mainScript = 'aline.nf'
nextflowVersion = '>=22.04.0'
version = '1.4.0'
version = '1.5.3'
}


Expand Down