Skip to content

Commit

Permalink
feat(salmon): download and build for salmon v1.4.0
Browse files Browse the repository at this point in the history
  • Loading branch information
jemten committed Apr 12, 2021
1 parent ef0d275 commit d862cd4
Show file tree
Hide file tree
Showing 22 changed files with 500 additions and 67 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -35,6 +35,7 @@ pdfmerger: v1.0
picardtools: 2.23.4 -> 2.25.0
preseq: 2.0.3 -> 3.1.2
rseqc: 3.0.1 -> 4.0.0
salmon: 0.12.0 -> 1.4.0
samtools: 1.10-h9402c20_2 -> 1.12=h9aed4be_1
smncopynumbercaller: 4b2c1ad -> v1.1.1
star: 2.7.4a -> 2.7.8a
Expand Down
19 changes: 0 additions & 19 deletions containers/salmon/Dockerfile

This file was deleted.

9 changes: 9 additions & 0 deletions definitions/download_parameters.yaml
Expand Up @@ -196,6 +196,13 @@ gencode_annotation:
data_type: SCALAR
default: 1
type: recipe
gencode_transcript:
analysis_mode: case
associated_recipe:
- mip
data_type: SCALAR
default: 1
type: recipe
genomic_superdups:
analysis_mode: case
associated_recipe:
Expand Down Expand Up @@ -333,6 +340,7 @@ recipe_core_number:
gatk_mitochondrial_ref: 1
genbank_haplogroup: 1
gencode_annotation: 1
gencode_transcript: 1
genomic_superdups: 1
giab: 1
gnomad: 1
Expand Down Expand Up @@ -384,6 +392,7 @@ recipe_time:
gatk_mitochondrial_ref: 1
genbank_haplogroup: 1
gencode_annotation: 1
gencode_transcript: 1
genomic_superdups: 1
giab: 1
gnomad: 25
Expand Down
15 changes: 14 additions & 1 deletion definitions/rd_rna_parameters.yaml
Expand Up @@ -66,7 +66,6 @@ transcript_annotation:
- gffcompare_ar
- picardtools_collectrnaseqmetrics
- rseqc
- salmon_quant
- star_aln
- star_fusion
- stringtie_ar
Expand All @@ -86,6 +85,14 @@ transcript_annotation_file_endings:
mandatory: no
reference: reference_dir
type: mip
transcript_sequence:
associated_recipe:
- salmon_quant
data_type: SCALAR
exists_check: file
is_reference: 1
reference: reference_dir
type: path
library_type:
associated_recipe:
- genebody_coverage
Expand Down Expand Up @@ -308,6 +315,12 @@ salmon_quant_reference_genome:
mandatory: no
reference: reference_dir
type: path
salmon_quant_gencode_reference:
associated_recipe:
- salmon_quant
data_type: SCALAR
default: 1
type: recipe_argument
## Blobfish
blobfish:
analysis_mode: case
Expand Down
2 changes: 1 addition & 1 deletion documentation/Setup.md
Expand Up @@ -66,7 +66,7 @@ You can speed up, for instance, the Readonly module by also installing the compa
- [Rhocall] (version: 0.5.1)
- [RSeQC] (version: 4.0.0)
- [rtg-tools] (version: 3.10.1)
- [Salmon] (version: 0.12.0)
- [Salmon] (version: 1.4.0)
- [Sambamba] (version: 0.6.8)
- [Samtools] (version: 1.11)
- [SMNCopyNumberCaller] (version: v1.1.1)
Expand Down
17 changes: 17 additions & 0 deletions lib/MIP/Cli/Mip/Analyse/Rd_rna.pm
Expand Up @@ -415,6 +415,14 @@ q{Default: grch37_dbsnp_-138-.vcf, grch37_1000g_indels_-phase1-.vcf, grch37_mill
)
);

option(
q{salmon_quant_gencode_reference} => (
documentation => q{Salmon uses a gencode reference},
is => q{rw},
isa => Bool,
)
);

option(
q{star_aln} => (
cmd_tags => [q{Analysis recipe switch}],
Expand Down Expand Up @@ -924,6 +932,15 @@ q{Regular expression file containing the regular expression to be used for each
)
);

option(
q{transcript_sequence} => (
cmd_tags => [q{Transcript sequence file: Format: fasta}],
documentation => q{Transcript sequences for the rd_rna pipeline},
is => q{rw},
isa => Str,
)
);

option(
q{trim_galore_ar} => (
cmd_tags => [q{Analysis recipe switch}],
Expand Down
23 changes: 17 additions & 6 deletions lib/MIP/Program/Gnu/Coreutils.pm
Expand Up @@ -339,7 +339,8 @@ sub gnu_cut {
## Function : Perl wrapper for writing cut command to already open $filehandle or return commands array. Based on cut 8.4
## Returns : @commands
## Arguments: $filehandle => Filehandle to write to
## : $infile_path => Infile paths {REF}
## : $delimiter => Delimiter
## : $infile_path => Infile path
## : $list => List of specified fields
## : $stderrfile_path => Stderrfile path
## : $stderrfile_path_append => Append to stderrinfo to file
Expand All @@ -348,6 +349,7 @@ sub gnu_cut {
my ($arg_href) = @_;

## Flatten argument(s)
my $delimiter;
my $filehandle;
my $list;
my $infile_path;
Expand All @@ -359,13 +361,16 @@ sub gnu_cut {
filehandle => {
store => \$filehandle,
},
delimiter => {
store => \$delimiter,
strict_type => 1,
},
list => {
store => \$list,
strict_type => 1,
},
infile_path => {
defined => 1,
required => 1,
store => \$infile_path,
strict_type => 1,
},
Expand All @@ -388,13 +393,20 @@ sub gnu_cut {
## Stores commands depending on input parameters
my @commands = qw{ cut };

if ($delimiter) {

push @commands, q{-d} . $SPACE . $delimiter;
}

if ($list) {

push @commands, q{-f} . $SPACE . $list;
}

## Infiles
push @commands, $infile_path;
if ($infile_path) {

push @commands, $infile_path;
}

push @commands,
unix_standard_streams(
Expand Down Expand Up @@ -505,8 +517,7 @@ sub gnu_echo {
}

## Strings
push @commands,
$string_wrapper . join( $EMPTY_STR, @{$strings_ref} ) . $string_wrapper;
push @commands, $string_wrapper . join( $EMPTY_STR, @{$strings_ref} ) . $string_wrapper;

if ($outfile_path) {
push @commands, q{>} . $SPACE . $outfile_path;
Expand Down
49 changes: 47 additions & 2 deletions lib/MIP/Program/Salmon.pm
Expand Up @@ -32,26 +32,38 @@ Readonly my $BASE_COMMAND => q{salmon};

sub salmon_index {

## Function : Perl wrapper for Salmon index, version 0.9.1.
## Function : Perl wrapper for Salmon index, version 1.4.0.
## Returns : @commands
## Arguments : $fasta_path => Input reference fasta path, note salmon does not use the genome reference fasta, it uses a fasta file of transcripts
## Arguments : $decoy_path => Decoy sequence ids
## : $fasta_path => Input reference fasta path, note salmon does not use the genome reference fasta, it uses a fasta file of transcripts
## : $filehandle => Filehandle to write to
## : $gencode => Transcripts are in gencode format
## : $outfile_path => Outfile path
## : $stderrfile_path => Stderrfile path
## : $stderrfile_path_append => Append stderr info to file path
## : $stdoutfile_path => Stdoutfile path
## : $temp_directory => Temporary directory
## : $threads => Threads used for indexing

my ($arg_href) = @_;

## Flatten argument(s)
my $decoy_path;
my $fasta_path;
my $filehandle;
my $gencode;
my $outfile_path;
my $stderrfile_path;
my $stderrfile_path_append;
my $stdoutfile_path;
my $temp_directory;
my $threads;

my $tmpl = {
decoy_path => {
store => \$decoy_path,
strict_type => 1,
},
fasta_path => {
defined => 1,
required => 1,
Expand All @@ -61,6 +73,11 @@ sub salmon_index {
filehandle => {
store => \$filehandle,
},
gencode => {
allow => [ undef, 0, 1 ],
store => \$gencode,
strict_type => 1,
},
outfile_path => {
defined => 1,
required => 1,
Expand All @@ -79,6 +96,15 @@ sub salmon_index {
store => \$stdoutfile_path,
strict_type => 1,
},
temp_directory => {
store => \$temp_directory,
strict_type => 1,
},
threads => {
allow => [ undef, qr/\A \d+ \z/xms ],
store => \$threads,
strict_type => 1,
},
};

check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!};
Expand All @@ -90,6 +116,25 @@ sub salmon_index {

push @commands, q{--index} . $SPACE . $outfile_path;

if ($decoy_path) {

push @commands, q{--decoy} . $SPACE . $decoy_path;
}

if ($gencode) {

push @commands, q{--gencode};
}

if ($temp_directory) {

push @commands, q{--tmpdir} . $SPACE . $temp_directory;
}

if ($threads) {
push @commands, q{--threads} . $SPACE . $threads,;
}

push @commands,
unix_standard_streams(
{
Expand Down
62 changes: 50 additions & 12 deletions lib/MIP/Recipes/Build/Salmon_quant_prerequisites.pm
Expand Up @@ -16,7 +16,7 @@ use autodie qw{ :all };
use Readonly;

## MIPs lib/
use MIP::Constants qw{ $DOT $NEWLINE $UNDERSCORE };
use MIP::Constants qw{ $NEWLINE $PIPE $SPACE $UNDERSCORE };

BEGIN {

Expand Down Expand Up @@ -144,7 +144,9 @@ sub build_salmon_quant_prerequisites {
use MIP::Program::Gnu::Coreutils qw{ gnu_mkdir };
use MIP::Language::Shell qw{ check_exist_and_move_file };
use MIP::Processmanagement::Processes qw{ submit_recipe };
use MIP::Program::Star_fusion qw{ star_fusion_gtf_file_to_feature_seqs };
use MIP::Program::Gnu::Coreutils qw{ gnu_cat gnu_cut };
use MIP::Program::Gnu::Software::Gnu_grep qw{ gnu_grep };
use MIP::Program::Gnu::Software::Gnu_sed qw{ gnu_sed };
use MIP::Program::Salmon qw{ salmon_index };
use MIP::Recipe qw{ parse_recipe_prerequisites };
use MIP::Script::Setup_script qw{ setup_script };
Expand Down Expand Up @@ -206,24 +208,60 @@ sub build_salmon_quant_prerequisites {
);
say {$filehandle} $NEWLINE;

## Build cDNA sequence file
star_fusion_gtf_file_to_feature_seqs(
## Make decoy file
my $decoy_file_path = catfile( $salmon_quant_directory_tmp, q{decoys.txt} );
gnu_grep(
{
filehandle => $filehandle,
gtf_path => $active_parameter_href->{transcript_annotation},
referencefile_path => $human_genome_reference,
seq_type => q{cDNA},
stdoutfile_path => catfile( $salmon_quant_directory_tmp, q{cDNA_seqs.fa} ),
filehandle => $filehandle,
infile_path => $active_parameter_href->{transcript_sequence},
pattern => q{'^>'},
}
);
print {$filehandle} $PIPE . $SPACE;

gnu_cut(
{
delimiter => q{' '},
filehandle => $filehandle,
list => 1,
stdoutfile_path => $decoy_file_path,
}
);
say {$filehandle} $NEWLINE;

gnu_sed(
{
filehandle => $filehandle,
infile_path => $decoy_file_path,
inplace_edit => 1,
script => q{'s/>//g'},
}
);
say {$filehandle} $NEWLINE;

## Combine transcriptome and genome
gnu_cat(
{
filehandle => $filehandle,
infile_paths_ref => [
$active_parameter_href->{transcript_sequence},
$active_parameter_href->{human_genome_reference}
],
stdoutfile_path => catfile( $salmon_quant_directory_tmp, q{gentrome.fa} ),
}
);
say {$filehandle} $NEWLINE;

## Build Salmon index file
salmon_index(
{
fasta_path => catfile( $salmon_quant_directory_tmp, q{cDNA_seqs.fa} ),
filehandle => $filehandle,
outfile_path => $salmon_quant_directory_tmp,
fasta_path => catfile( $salmon_quant_directory_tmp, q{gentrome.fa} ),
threads => $NUMBER_OF_CORES,
filehandle => $filehandle,
temp_directory => $temp_directory,
gencode => $active_parameter_href->{salmon_quant_gencode_reference},
decoy_path => $decoy_file_path,
outfile_path => $salmon_quant_directory_tmp,
}
);
say {$filehandle} $NEWLINE;
Expand Down

0 comments on commit d862cd4

Please sign in to comment.