Merge 0299a3e into 3d1f6c6

Clinical-Genomics · Feb 10, 2021 · 1c61549 · 1c61549
2 parents 3d1f6c6 + 0299a3e
commit 1c61549
Show file tree

Hide file tree

Showing 21 changed files with 144 additions and 105 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,8 +2,17 @@
 All notable changes to this project will be documented in this file.
 This project adheres to [Semantic Versioning](http://semver.org/).
 
+## [9.1.3]
+- Fix memory allocation for mip-rna markduplicates.
+- Update to repeat expansion calling
+- Add possibility to rename arriba fusion report from with sample display name
+
+**Tools**  
+expansionhunter 3.1.2 -> 4.0.2  
+stranger 0.5.5 -> 0.7
+
 ## [9.1.2]
-- Increase markduplicates java memory allocation for chromosme 2.
+- Increase markduplicates java memory allocation for chromosome 2.
 - Turn off chromograph_viz for wes analysis
 - Chromograph exits gracefully on empty infile
 - Use median coverage instead of expected coverage when evaluating whether expected coverage has been reached or not [#1719](https://github.com/Clinical-Genomics/MIP/issues/1719)

diff --git a/containers/expansionhunter/Dockerfile b/containers/expansionhunter/Dockerfile
@@ -5,15 +5,15 @@ FROM clinicalgenomics/mip:2.0
 ################## METADATA ######################
 
 LABEL base_image="clinicalgenomics/mip:2.0"
-LABEL version="2"
+LABEL version="3"
 LABEL software="expansionhunter"
-LABEL software.version="3.2.2"
+LABEL software.version="4.0.2"
 LABEL extra.binaries="expansionhunter"
 LABEL maintainer="Clinical-Genomics/MIP"
 
-RUN conda install -c bioconda expansionhunter=3.2.2=hd1df264_0
+RUN conda install -c bioconda expansionhunter=4.0.2
 
 ## Clean up after conda
-RUN /opt/conda/bin/conda clean -tipsy
+RUN /opt/conda/bin/conda clean -ya
 
 WORKDIR /data/
diff --git a/containers/rseqc/Dockerfile b/containers/rseqc/Dockerfile
@@ -5,12 +5,13 @@ FROM clinicalgenomics/mip:2.0
 ################## METADATA ######################
 
 LABEL base_image="clinicalgenomics/mip:2.0"
-LABEL version="1"
+LABEL version="2"
 LABEL software="rseqc"
 LABEL software.version="3.0.1"
 LABEL extra.binaries="rseqc"
 LABEL maintainer="Clinical-Genomics/MIP"
 
-RUN pip install --no-cache-dir rseqc==3.0.1
+RUN conda install rseqc=3.0.1 ucsc-wigtobigwig
+RUN conda clean -ya
 
 WORKDIR /data/
diff --git a/containers/stranger/Dockerfile b/containers/stranger/Dockerfile
@@ -5,17 +5,17 @@ FROM clinicalgenomics/mip:2.0
 ################## METADATA ######################
 
 LABEL base_image="clinicalgenomics/mip:2.0"
-LABEL version="1"
+LABEL version="2"
 LABEL software="stranger"
-LABEL software.version="0.5.5"
+LABEL software.version="0.7"
 LABEL extra.binaries="stranger"
 LABEL maintainer="Clinical-Genomics/MIP"
 
 RUN conda install pip python=3.7
 
 ## Clean up after conda
-RUN /opt/conda/bin/conda clean -tipsy
+RUN /opt/conda/bin/conda clean -ya
 
-RUN pip install --no-cache-dir stranger==0.5.5
+RUN pip install --no-cache-dir stranger==0.7
 
 WORKDIR /data/
diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml
@@ -773,7 +773,7 @@ expansionhunter_variant_catalog_file_path:
   associated_recipe:
     - expansionhunter
   data_type: SCALAR
-  default: grch37_expansionhunter_variant_catalog_-3.1.2-.json
+  default: grch37_expansionhunter_variant_catalog_-4.0.2-.json
   exists_check: file
   is_reference: 1
   reference: reference_dir

diff --git a/definitions/rd_rna_parameters.yaml b/definitions/rd_rna_parameters.yaml
@@ -198,7 +198,7 @@ recipe_core_number:
     gffcompare_ar: 1
     gzip_fastq: 0
     multiqc_ar: 1
-    markduplicates: 16
+    markduplicates: 13
     picardtools_collectrnaseqmetrics: 1
     picardtools_mergesamfiles: 13
     preseq_ar: 1
@@ -455,6 +455,12 @@ arriba_proteindomain_path:
     - arriba_ar
   data_type: SCALAR
   type: path
+arriba_use_sample_id_as_display_name:
+  associated_recipe:
+    - arriba_ar
+  data_type: SCALAR
+  default: 0
+  type: mip
 ## Merge bam files
 picardtools_mergesamfiles:
   analysis_mode: sample
@@ -609,7 +615,7 @@ rseqc_transcripts_file:
     - genebody_coverage
     - rseqc
   data_type: SCALAR
-  default: grch37_gencode_annotation_reformated_-v31-.bed
+  default: grch37_gencode_annotation_reformated_-v34-.bed
   exists_check: file
   is_reference: 1
   reference: reference_dir

diff --git a/lib/MIP/Cli/Mip/Analyse/Rd_rna.pm b/lib/MIP/Cli/Mip/Analyse/Rd_rna.pm
@@ -151,6 +151,15 @@ sub _build_usage {
         )
     );
 
+    option(
+        q{arriba_use_sample_id_as_display_name} => (
+            cmd_tags      => [q{Default: 0}],
+            documentation => q{Use sample id as display name for arriba fusion report},
+            is            => q{rw},
+            isa           => Bool,
+        )
+    );
+
     option(
         q{bcftools_merge} => (
             cmd_tags      => [q{Analysis recipe switch}],
@@ -499,7 +508,7 @@ q{Default: grch37_dbsnp_-138-.vcf, grch37_1000g_indels_-phase1-.vcf, grch37_mill
 
     option(
         q{rseqc_transcripts_file} => (
-            cmd_tags      => [q{Rseqc transcripts file: Format: GTF}],
+            cmd_tags      => [q{Rseqc transcripts file: Format: bed}],
             documentation => q{Input for rseqc to build transcript bed format file},
             is            => q{rw},
             isa           => Str,

diff --git a/lib/MIP/Constants.pm b/lib/MIP/Constants.pm
@@ -80,7 +80,7 @@ Readonly our %ANALYSIS => (
 );
 
 ## Set MIP version
-Readonly our $MIP_VERSION => q{v9.1.2};
+Readonly our $MIP_VERSION => q{v9.1.3};
 
 ## Cli
 Readonly our $MOOSEX_APP_SCEEN_WIDTH => 160;

diff --git a/lib/MIP/Program/Stranger.pm b/lib/MIP/Program/Stranger.pm
@@ -35,7 +35,8 @@ sub stranger {
 
 ## Function : Perl wrapper for stranger annotation of repeat expansions.
 ## Returns  : @commands
-## Arguments: $filehandle             => Filehandle to write to
+## Arguments: $family_id              => Family id
+##          : $filehandle             => Filehandle to write to
 ##          : $infile_path            => Infile
 ##          : $log_level              => Log level
 ##          : $repeats_file_path      => Path to a file with repeat definitions
@@ -47,6 +48,7 @@ sub stranger {
     my ($arg_href) = @_;
 
     ## Flatten argument(s)
+    my $family_id;
     my $filehandle;
     my $infile_path;
     my $repeats_file_path;
@@ -59,6 +61,10 @@ sub stranger {
     my $log_level;
 
     my $tmpl = {
+        family_id => {
+            store       => \$family_id,
+            strict_type => 1,
+        },
         filehandle => {
             store => \$filehandle,
         },
@@ -97,6 +103,11 @@ sub stranger {
 
     my @commands = qw{ stranger };
 
+    if ($family_id) {
+
+        push @commands, q{--family_id} . $SPACE . $family_id;
+    }
+
     push @commands, q{--loglevel} . $SPACE . $log_level;
 
     if ($repeats_file_path) {

diff --git a/lib/MIP/Recipes/Analysis/Arriba.pm b/lib/MIP/Recipes/Analysis/Arriba.pm
@@ -18,8 +18,7 @@ use autodie qw{ :all };
 use Readonly;
 
 ## MIPs lib/
-use MIP::Constants
-  qw{ $COMMA $DOT $EMPTY_STR $LOG_NAME $NEWLINE $PIPE $SPACE $UNDERSCORE };
+use MIP::Constants qw{ $COMMA $DOT $EMPTY_STR $LOG_NAME $NEWLINE $PIPE $SPACE $UNDERSCORE };
 
 BEGIN {
 
@@ -147,6 +146,7 @@ sub analysis_arriba {
     use MIP::Program::Sambamba qw{ sambamba_index sambamba_sort };
     use MIP::Program::Star qw{ star_aln };
     use MIP::Sample_info qw{
+      get_pedigree_sample_id_attributes
       get_rg_header_line
       set_file_path_to_store
       set_recipe_metafile_in_sample_info
@@ -211,6 +211,8 @@ sub analysis_arriba {
             recipe_name           => $recipe_name,
         }
     );
+    my $use_sample_id_as_display_name =
+      $active_parameter_href->{arriba_use_sample_id_as_display_name};
 
     ## Filehandles
     # Create anonymous filehandle
@@ -321,11 +323,11 @@ sub analysis_arriba {
             out_sam_attr_rgline           => $out_sam_attr_rgline,
             out_sam_type                  => q{BAM Unsorted},
             out_sam_unmapped              => q{Within},
-            pe_overlap_nbases_min => $active_parameter_href->{pe_overlap_nbases_min},
-            quant_mode            => q{-},
-            stdout_data_type      => q{BAM_Unsorted},
-            thread_number         => $recipe_resource{core_number},
-            two_pass_mode         => q{None},
+            pe_overlap_nbases_min         => $active_parameter_href->{pe_overlap_nbases_min},
+            quant_mode                    => q{-},
+            stdout_data_type              => q{BAM_Unsorted},
+            thread_number                 => $recipe_resource{core_number},
+            two_pass_mode                 => q{None},
         },
     );
     push @arriba_commands, $PIPE;
@@ -397,17 +399,28 @@ sub analysis_arriba {
     say {$filehandle} $NEWLINE;
 
     ## Visualize the fusions
-    my $report_path = $outfile_path_prefix . $DOT . q{pdf};
+    my $report_path         = $outfile_path_prefix . $DOT . q{pdf};
+    my $sample_display_name = get_pedigree_sample_id_attributes(
+        {
+            attribute        => q{sample_display_name},
+            sample_id        => $sample_id,
+            sample_info_href => $sample_info_href,
+        }
+    );
+    if ( $sample_display_name and not $use_sample_id_as_display_name ) {
+
+        $report_path = catfile( $outsample_directory,
+            $sample_display_name . $UNDERSCORE . q{arriba_fusions.pdf} );
+    }
     draw_fusions(
         {
-            alignment_file_path  => $sorted_bam_file,
-            annotation_file_path => $active_parameter_href->{transcript_annotation},
-            cytoband_file_path   => $active_parameter_href->{arriba_cytoband_path},
-            filehandle           => $filehandle,
-            fusion_file_path     => $outfile_path,
-            outfile_path         => $report_path,
-            protein_domain_file_path =>
-              $active_parameter_href->{arriba_protein_domain_path},
+            alignment_file_path      => $sorted_bam_file,
+            annotation_file_path     => $active_parameter_href->{transcript_annotation},
+            cytoband_file_path       => $active_parameter_href->{arriba_cytoband_path},
+            filehandle               => $filehandle,
+            fusion_file_path         => $outfile_path,
+            outfile_path             => $report_path,
+            protein_domain_file_path => $active_parameter_href->{arriba_protein_domain_path},
         }
     );
     say {$filehandle} $NEWLINE;
@@ -460,13 +473,13 @@ sub analysis_arriba {
 
         submit_recipe(
             {
-                base_command         => $profile_base_command,
-                case_id              => $case_id,
-                dependency_method    => q{sample_to_island},
-                job_id_chain         => $job_id_chain,
-                job_id_href          => $job_id_href,
-                job_reservation_name => $active_parameter_href->{job_reservation_name},
-                log                  => $log,
+                base_command                      => $profile_base_command,
+                case_id                           => $case_id,
+                dependency_method                 => q{sample_to_island},
+                job_id_chain                      => $job_id_chain,
+                job_id_href                       => $job_id_href,
+                job_reservation_name              => $active_parameter_href->{job_reservation_name},
+                log                               => $log,
                 max_parallel_processes_count_href =>
                   $file_info_href->{max_parallel_processes_count},
                 recipe_file_path   => $recipe_file_path,

diff --git a/lib/MIP/Recipes/Analysis/Expansionhunter.pm b/lib/MIP/Recipes/Analysis/Expansionhunter.pm
@@ -152,9 +152,8 @@ sub analysis_expansionhunter {
     my $max_cores_per_node = $active_parameter_href->{max_cores_per_node};
     my $modifier_core_number =
       scalar( @{ $active_parameter_href->{sample_ids} } );
-    my $human_genome_reference =
-      $arg_href->{active_parameter_href}{human_genome_reference};
-    my $job_id_chain = get_recipe_attributes(
+    my $human_genome_reference = $arg_href->{active_parameter_href}{human_genome_reference};
+    my $job_id_chain           = get_recipe_attributes(
         {
             parameter_href => $parameter_href,
             recipe_name    => $recipe_name,
@@ -184,7 +183,6 @@ sub analysis_expansionhunter {
         }
     );
 
-    my $outdir_path_prefix  = $io{out}{dir_path_prefix};
     my $outfile_path_prefix = $io{out}{file_path_prefix};
     my $outfile_suffix      = $io{out}{file_constant_suffix};
     my $outfile_path        = $outfile_path_prefix . $outfile_suffix;
@@ -233,9 +231,7 @@ sub analysis_expansionhunter {
 
     ## Collect infiles for all sample_ids to enable migration to temporary directory
   SAMPLE_ID:
-    while ( my ( $sample_id_index, $sample_id ) =
-        each @{ $active_parameter_href->{sample_ids} } )
-    {
+    while ( my ( $sample_id_index, $sample_id ) = each @{ $active_parameter_href->{sample_ids} } ) {
 
         ## Get the io infiles per chain and id
         my %sample_io = get_io_files(
@@ -270,9 +266,7 @@ sub analysis_expansionhunter {
     my @vt_outfile_paths;
 
   SAMPLE_ID:
-    while ( my ( $sample_id_index, $sample_id ) =
-        each @{ $active_parameter_href->{sample_ids} } )
-    {
+    while ( my ( $sample_id_index, $sample_id ) = each @{ $active_parameter_href->{sample_ids} } ) {
 
         $process_batches_count = print_wait(
             {
@@ -314,11 +308,7 @@ sub analysis_expansionhunter {
         say {$filehandle} $AMPERSAND, $NEWLINE;
         push @vt_infile_paths, $sample_outfile_path_prefix . $outfile_suffix;
         push @vt_outfile_paths,
-            $outfile_path_prefix
-          . $UNDERSCORE . q{vt}
-          . $UNDERSCORE
-          . $sample_id
-          . $outfile_suffix;
+          $outfile_path_prefix . $UNDERSCORE . q{vt} . $UNDERSCORE . $sample_id . $outfile_suffix;
 
     }
     say {$filehandle} q{wait}, $NEWLINE;
@@ -344,8 +334,7 @@ sub analysis_expansionhunter {
 
     ## Get parameters
     ## Expansionhunter sample infiles needs to be lexiographically sorted for svdb merge
-    my $svdb_outfile_path =
-      $outfile_path_prefix . $UNDERSCORE . q{vt_svdbmerge} . $outfile_suffix;
+    my $svdb_outfile_path = $outfile_path_prefix . $UNDERSCORE . q{vt_svdbmerge} . $outfile_suffix;
 
     svdb_merge(
         {
@@ -363,6 +352,7 @@ sub analysis_expansionhunter {
       $outfile_path_prefix . $UNDERSCORE . q{vt_svdbmerge_ann} . $outfile_suffix;
     stranger(
         {
+            family_id         => $case_id,
             filehandle        => $filehandle,
             infile_path       => $svdb_outfile_path,
             repeats_file_path => $variant_catalog_file_path,
@@ -412,13 +402,13 @@ sub analysis_expansionhunter {
 
         submit_recipe(
             {
-                base_command         => $profile_base_command,
-                case_id              => $case_id,
-                dependency_method    => q{sample_to_case},
-                job_id_chain         => $job_id_chain,
-                job_id_href          => $job_id_href,
-                job_reservation_name => $active_parameter_href->{job_reservation_name},
-                log                  => $log,
+                base_command                      => $profile_base_command,
+                case_id                           => $case_id,
+                dependency_method                 => q{sample_to_case},
+                job_id_chain                      => $job_id_chain,
+                job_id_href                       => $job_id_href,
+                job_reservation_name              => $active_parameter_href->{job_reservation_name},
+                log                               => $log,
                 max_parallel_processes_count_href =>
                   $file_info_href->{max_parallel_processes_count},
                 recipe_file_path   => $recipe_file_path,