From 9b633491aa1b3e5bfd90b32e857a3b31f114f3db Mon Sep 17 00:00:00 2001 From: jemten Date: Fri, 11 Jun 2021 09:43:48 +0200 Subject: [PATCH 001/116] feat(download): gnomad version 3.1.1 --- definitions/download_parameters.yaml | 13 + lib/MIP/Recipes/Download/Gnomad.pm | 275 +++++++++++++++++- lib/MIP/Recipes/Pipeline/Download.pm | 3 +- .../test_data/download_active_parameters.yaml | 11 + t/download_gnomad_chrsplit.t | 92 ++++++ .../mip_download_rd_dna_config_-1.0-.yaml | 10 + 6 files changed, 397 insertions(+), 7 deletions(-) create mode 100644 t/download_gnomad_chrsplit.t diff --git a/definitions/download_parameters.yaml b/definitions/download_parameters.yaml index f387432b2..71f62aa09 100644 --- a/definitions/download_parameters.yaml +++ b/definitions/download_parameters.yaml @@ -240,6 +240,17 @@ gnomad: - tabix - vcfanno type: recipe +gnomad_chrsplit: + analysis_mode: case + associated_recipe: + - mip + data_type: SCALAR + default: 1 + program_executables: + - bcftools + - bgzip + - tabix + type: recipe gnomad_pli_per_gene: analysis_mode: case associated_recipe: @@ -371,6 +382,7 @@ recipe_core_number: genomic_superdups: 1 giab: 1 gnomad: 1 + gnomad_chrsplit: 13 gnomad_pli_per_gene: 1 hapmap: 1 human_reference: 1 @@ -423,6 +435,7 @@ recipe_time: genomic_superdups: 1 giab: 1 gnomad: 25 + gnomad_chrsplit: 32 gnomad_pli_per_gene: 1 hapmap: 1 human_reference: 1 diff --git a/lib/MIP/Recipes/Download/Gnomad.pm b/lib/MIP/Recipes/Download/Gnomad.pm index 9092741bc..2cfe4ba31 100644 --- a/lib/MIP/Recipes/Download/Gnomad.pm +++ b/lib/MIP/Recipes/Download/Gnomad.pm @@ -5,7 +5,7 @@ use Carp; use charnames qw{ :full :short }; use English qw{ -no_match_vars }; use File::Basename qw{ dirname }; -use File::Spec::Functions qw{ catfile }; +use File::Spec::Functions qw{ catfile devnull }; use open qw{ :encoding(UTF-8) :std }; use Params::Check qw{ allow check last_error }; use utf8; @@ -17,7 +17,14 @@ use autodie qw{ :all }; use Readonly; ## MIPs lib/ -use MIP::Constants qw{ $DASH $DOT $FORWARD_SLASH $NEWLINE $PIPE $SINGLE_QUOTE $SPACE $UNDERSCORE }; +use MIP::Constants + qw{ $DASH $DOT $EMPTY_STR $ESCAPE $FORWARD_SLASH $NEWLINE $PIPE $SINGLE_QUOTE $SPACE $UNDERSCORE }; + +## Constants +Readonly my $DOWNLOAD_TRIES => 12; +Readonly my $READ_TIMEOUT_SEC => 20; +Readonly my $TIMEOUT_SEC => 20; +Readonly my $WAIT_RETRY_SEC => 300; BEGIN { @@ -25,7 +32,7 @@ BEGIN { use base qw{ Exporter }; # Functions and variables which can be optionally exported - our @EXPORT_OK = qw{ download_gnomad }; + our @EXPORT_OK = qw{ download_gnomad download_gnomad_xargs}; } @@ -138,7 +145,7 @@ sub download_gnomad { } ); -## Filehandle(s) + ## Filehandle(s) # Create anonymous filehandle my $filehandle = IO::Handle->new(); @@ -258,6 +265,255 @@ sub download_gnomad { return 1; } +sub download_gnomad_xargs { + +## Function : Download gnomad using xargs +## Returns : +## Arguments: $active_parameter_href => Active parameters for this download hash {REF} +## : $genome_version => Human genome version +## : $job_id_href => The job_id hash {REF} +## : $profile_base_command => Submission profile base command +## : $recipe_name => Recipe name +## : $reference_href => Reference hash {REF} +## : $reference_version => Reference version +## : $quiet => Quiet (no output) +## : $temp_directory => Temporary directory for recipe +## : $verbose => Verbosity + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $active_parameter_href; + my $genome_version; + my $job_id_href; + my $recipe_name; + my $reference_href; + my $reference_version; + + ## Default(s) + my $profile_base_command; + my $quiet; + my $temp_directory; + my $verbose; + + my $tmpl = { + active_parameter_href => { + default => {}, + defined => 1, + required => 1, + store => \$active_parameter_href, + strict_type => 1, + }, + genome_version => { + store => \$genome_version, + strict_type => 1, + }, + job_id_href => { + default => {}, + defined => 1, + required => 1, + store => \$job_id_href, + strict_type => 1, + }, + profile_base_command => { + default => q{sbatch}, + store => \$profile_base_command, + strict_type => 1, + }, + recipe_name => { + defined => 1, + required => 1, + store => \$recipe_name, + strict_type => 1, + }, + reference_href => { + default => {}, + defined => 1, + required => 1, + store => \$reference_href, + strict_type => 1, + }, + reference_version => { + defined => 1, + required => 1, + store => \$reference_version, + strict_type => 1, + }, + quiet => { + allow => [ undef, 0, 1 ], + default => 1, + store => \$quiet, + strict_type => 1, + }, + temp_directory => { + store => \$temp_directory, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + use MIP::Processmanagement::Slurm_processes qw{ slurm_submit_job_no_dependency_dead_end }; + use MIP::Program::Bcftools qw{ bcftools_concat }; + use MIP::Program::Gnu::Coreutils qw{ gnu_rm }; + use MIP::Program::Htslib qw{ htslib_tabix }; + use MIP::Program::Wget qw{ wget }; + use MIP::Recipe qw{ parse_recipe_prerequisites }; + use MIP::Recipes::Analysis::Xargs qw{ xargs_command }; + use MIP::Script::Setup_script qw{ setup_script }; + + ### PREPROCESSING: + + ## Retrieve logger object + my $log = Log::Log4perl->get_logger( uc q{mip_download} ); + + ## Unpack parameters + my $reference_dir = $active_parameter_href->{reference_dir}; + + my %recipe = parse_recipe_prerequisites( + { + active_parameter_href => $active_parameter_href, + recipe_name => $recipe_name, + } + ); + + ## Filehandle(s) + # Create anonymous filehandle + my $filehandle = IO::Handle->new(); + my $xargsfilehandle = IO::Handle->new(); + + ## Creates recipe directories (info & data & script), recipe script filenames and writes sbatch header + my ( $recipe_file_path, $recipe_info_path ) = setup_script( + { + active_parameter_href => $active_parameter_href, + core_number => $recipe{core_number}, + directory_id => q{mip_download}, + filehandle => $filehandle, + info_file_id => $genome_version . $UNDERSCORE . $reference_version, + job_id_href => $job_id_href, + memory_allocation => $recipe{memory}, + outdata_dir => $reference_dir, + outscript_dir => $reference_dir, + process_time => $recipe{time}, + recipe_data_directory_path => $active_parameter_href->{reference_dir}, + recipe_directory => $recipe_name . $UNDERSCORE . $reference_version, + recipe_name => $recipe_name, + source_environment_commands_ref => $recipe{load_env_ref}, + } + ); + + ### SHELL: + + say {$filehandle} q{## } . $recipe_name; + + ## Create file commands for xargs + my ( $xargs_file_counter, $xargs_file_path_prefix ) = xargs_command( + { + core_number => $recipe{core_number}, + filehandle => $filehandle, + file_path => $recipe_file_path, + recipe_info_path => $recipe_info_path, + xargsfilehandle => $xargsfilehandle, + } + ); + + ## Expand to list + my @gnomad_files = glob $reference_href->{file}; + my @gnomad_file_paths; + + GNOMAD_FILE: + foreach my $gnomad_file (@gnomad_files) { + + my $gnomad_url = $reference_href->{url_prefix} . $gnomad_file; + + wget( + { + filehandle => $xargsfilehandle, + outfile_path => catfile( dirname( devnull() ), q{stdout} ), + quiet => 1, + read_timeout => $READ_TIMEOUT_SEC, + retry_connrefused => 1, + timeout => $TIMEOUT_SEC, + tries => $DOWNLOAD_TRIES, + url => $gnomad_url, + verbose => 0, + wait_retry => $WAIT_RETRY_SEC, + } + ); + print {$xargsfilehandle} $PIPE . $SPACE; + + my $gnomad_file_path = catfile( $reference_dir, $gnomad_file ); + _annotate( + { + escape => $ESCAPE, + filehandle => $xargsfilehandle, + info_keys_ref => [qw{ INFO/AF INFO/AF_popmax }], + infile_path => $DASH, + outfile_path => $gnomad_file_path, + } + ); + push @gnomad_file_paths, $gnomad_file_path; + } + + my $outfile_path = catfile( $reference_dir, $reference_href->{outfile} ); + bcftools_concat( + { + filehandle => $filehandle, + rm_dups => 0, + output_type => q{z}, + threads => $recipe{core_number}, + outfile_path => $outfile_path, + infile_paths_ref => \@gnomad_file_paths, + } + ); + say {$filehandle} $NEWLINE; + + htslib_tabix( + { + filehandle => $filehandle, + preset => q{vcf}, + infile_path => $outfile_path, + } + ); + say {$filehandle} $NEWLINE; + + ## Create AF file for bcftools roh + _build_af_file( + { + filehandle => $filehandle, + file_name => $reference_href->{outfile}, + infile_path => $outfile_path, + reference_dir => $reference_dir, + reference_version => $reference_version, + } + ); + + gnu_rm( + { + filehandle => $filehandle, + infile_path => catfile( $reference_dir, $reference_href->{file} ), + } + ); + + ## Close filehandles + close $filehandle or $log->logcroak(q{Could not close filehandle}); + close $xargsfilehandle or $log->logcroak(q{Could not close xargsfilehandle}); + + if ( $recipe{mode} == 1 ) { + + ## No upstream or downstream dependencies + slurm_submit_job_no_dependency_dead_end( + { + base_command => $profile_base_command, + job_id_href => $job_id_href, + log => $log, + sbatch_file_name => $recipe_file_path, + } + ); + } + return 1; +} + sub _build_af_file { ## Function : Build allele frequency file for bcftools roh @@ -301,7 +557,7 @@ sub _build_af_file { strict_type => 1, }, reference_version => { - allow => [qw{ r2.0.1 r2.1.1 r2.1.1_sv r3.0 }], + allow => [qw{ r2.0.1 r2.1.1 r2.1.1_sv r3.0 r3.1.1 }], required => 1, store => \$reference_version, strict_type => 1, @@ -374,7 +630,14 @@ sub _annotate { my $info_keys_ref; my $outfile_path; + ## Default + my $escape; + my $tmpl = { + escape => { + default => $EMPTY_STR, + store => \$escape, + }, filehandle => { required => 1, store => \$filehandle, @@ -409,7 +672,7 @@ sub _annotate { bcftools_annotate( { filehandle => $filehandle, - include => $SINGLE_QUOTE . $include_record . $SINGLE_QUOTE, + include => $escape . $SINGLE_QUOTE . $include_record . $escape . $SINGLE_QUOTE, infile_path => $infile_path, outfile_path => $outfile_path, output_type => q{z}, diff --git a/lib/MIP/Recipes/Pipeline/Download.pm b/lib/MIP/Recipes/Pipeline/Download.pm index 69cef2b0e..28d5c642c 100644 --- a/lib/MIP/Recipes/Pipeline/Download.pm +++ b/lib/MIP/Recipes/Pipeline/Download.pm @@ -96,7 +96,7 @@ sub pipeline_download { use MIP::Recipes::Download::Genomic_superdups qw{ download_genomic_superdups }; use MIP::Recipes::Download::Get_reference qw{ get_reference }; use MIP::Recipes::Download::Giab qw{ download_giab }; - use MIP::Recipes::Download::Gnomad qw{ download_gnomad }; + use MIP::Recipes::Download::Gnomad qw{ download_gnomad download_gnomad_xargs }; use MIP::Recipes::Download::Gnomad_pli_per_gene qw{ download_gnomad_pli_per_gene }; use MIP::Recipes::Download::Hapmap qw{ download_hapmap }; use MIP::Recipes::Download::Human_reference qw{ download_human_reference }; @@ -138,6 +138,7 @@ sub pipeline_download { giab => \&download_giab, gnomad => \&download_gnomad, gnomad_pli_per_gene => \&download_gnomad_pli_per_gene, + gnomad_chrsplit => \&download_gnomad_xargs, hapmap => \&download_hapmap, human_reference => \&download_human_reference, manta_call_regions => \&download_manta_call_regions, diff --git a/t/data/test_data/download_active_parameters.yaml b/t/data/test_data/download_active_parameters.yaml index 78b53f4d5..17d956804 100644 --- a/t/data/test_data/download_active_parameters.yaml +++ b/t/data/test_data/download_active_parameters.yaml @@ -92,6 +92,8 @@ reference: - na24631_v3.3.2_wgs gnomad: - r2.0.1 + gnomad_chrsplit: + - r3.1.1 gnomad_pli_per_gene: - r2.1.1 hapmap: @@ -559,6 +561,15 @@ reference_feature: outfile_index: grch37_gnomad.genomes_-r2.0.1-.vcf.gz.tbi url_prefix: ftp://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh37/variation_genotype/ grch38: {} + gnomad_chrsplit: + grch37: {} + grch38: + r3.1.1: + file: gnomad.genomes.v3.1.1.sites.chr{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y}.vcf.bgz + file_index: gnomad.genomes.v3.1.1.sites.chr{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y}.vcf.bgz.tbi + outfile: grch38_gnomad_reformated_-r3.1.1-.vcf.gz + outfile_index: grch38_gnomad_reformated_-r3.1.1-.vcf.gz.tbi + url_prefix: https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1.1/vcf/genomes/ gnomad_pli_per_gene: grch37: r2.1.1: diff --git a/t/download_gnomad_chrsplit.t b/t/download_gnomad_chrsplit.t new file mode 100644 index 000000000..cbe5b235c --- /dev/null +++ b/t/download_gnomad_chrsplit.t @@ -0,0 +1,92 @@ +#!/usr/bin/env perl + +use 5.026; +use Carp; +use charnames qw{ :full :short }; +use English qw{ -no_match_vars }; +use File::Basename qw{ dirname }; +use File::Spec::Functions qw{ catdir catfile }; +use File::Temp; +use FindBin qw{ $Bin }; +use open qw{ :encoding(UTF-8) :std }; +use Params::Check qw{ allow check last_error }; +use Test::More; +use utf8; +use warnings qw{ FATAL utf8 }; + +## CPANM +use autodie qw { :all }; +use Modern::Perl qw{ 2018 }; +use Readonly; + +## MIPs lib/ +use lib catdir( dirname($Bin), q{lib} ); +use MIP::Constants qw{ $COMMA $SPACE }; +use MIP::Test::Fixtures qw{ test_log test_mip_hashes }; + +BEGIN { + + use MIP::Test::Fixtures qw{ test_import }; + +### Check all internal dependency modules and imports +## Modules with import + my %perl_module = ( + q{MIP::Recipes::Download::Gnomad} => [qw{ download_gnomad_xargs }], + q{MIP::Test::Fixtures} => [qw{ test_log test_mip_hashes }], + ); + + test_import( { perl_module_href => \%perl_module, } ); +} + +use MIP::Recipes::Download::Gnomad qw{ download_gnomad_xargs }; + +diag( q{Test download_gnomad_xargs from Gnomad.pm} + . $COMMA + . $SPACE . q{Perl} + . $SPACE + . $PERL_VERSION + . $SPACE + . $EXECUTABLE_NAME ); + +my $test_dir = File::Temp->newdir(); +my $file_path = catfile( $test_dir, q{recipe_script.sh} ); +test_log( { log_name => uc q{mip_download}, no_screen => 1, } ); + +## Given download parameters for recipe +my $genome_version = q{grch38}; +my $recipe_name = q{gnomad_chrsplit}; +my $reference_version = q{r3.1.1}; +my $slurm_mock_cmd = catfile( $Bin, qw{ data modules slurm-mock.pl } ); + +my %active_parameter = test_mip_hashes( + { + mip_hash_name => q{download_active_parameter}, + } +); +$active_parameter{$recipe_name} = 1; +$active_parameter{project_id} = q{test}; +$active_parameter{reference_dir} = catfile($test_dir); +$active_parameter{recipe_core_number}{$recipe_name} = 1; +$active_parameter{recipe_time}{$recipe_name} = 1; +my $reference_href = + $active_parameter{reference_feature}{$recipe_name}{$genome_version}{$reference_version}; + +my %job_id; + +my $is_ok = download_gnomad_xargs( + { + active_parameter_href => \%active_parameter, + genome_version => $genome_version, + job_id_href => \%job_id, + profile_base_command => $slurm_mock_cmd, + recipe_name => $recipe_name, + reference_href => $reference_href, + reference_version => $reference_version, + temp_directory => catfile($test_dir), + } +); + +## Then +ok( $is_ok, q{ Executed download recipe } . $recipe_name ); + +done_testing(); diff --git a/templates/mip_download_rd_dna_config_-1.0-.yaml b/templates/mip_download_rd_dna_config_-1.0-.yaml index e65379f51..419c208c7 100644 --- a/templates/mip_download_rd_dna_config_-1.0-.yaml +++ b/templates/mip_download_rd_dna_config_-1.0-.yaml @@ -86,6 +86,8 @@ reference: - r2.1.1 - r2.1.1_sv - r3.0 + gnomad_chrsplit: + - r3.1.1 gnomad_pli_per_gene: - r2.1.1 hapmap: @@ -709,6 +711,14 @@ reference_feature: outfile: grch38_gnomad.genomes_-r3.0-.vcf.bgz outfile_index: grch38_gnomad.genomes_-r3.0-.vcf.bgz.tbi url_prefix: https://storage.googleapis.com/gnomad-public/release/3.0/vcf/genomes/ + gnomad_chrsplit: + grch38: + r3.1.1: + file: gnomad.genomes.v3.1.1.sites.chr{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y}.vcf.bgz + file_index: gnomad.genomes.v3.1.1.sites.chr{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y}.vcf.bgz.tbi + outfile: grch38_gnomad_reformated_-r3.1.1-.vcf.gz + outfile_index: grch38_gnomad_reformated_-r3.1.1-.vcf.gz.tbi + url_prefix: https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1.1/vcf/genomes/ gnomad_pli_per_gene: grch37: r2.1.1: From fbf58d94bf4917977818c6b497727382fc1216b2 Mon Sep 17 00:00:00 2001 From: jemten Date: Fri, 11 Jun 2021 10:04:13 +0200 Subject: [PATCH 002/116] updating test data for gnomad --- .../grch38_gnomad_reformated_-r3.0-.vcf.gz | 3389 ----------------- ...grch38_gnomad_reformated_-r3.0-.vcf.gz.tbi | 0 ... grch38_gnomad_reformated_-r3.1.1-.tab.gz} | 0 ...h38_gnomad_reformated_-r3.1.1-.tab.gz.tbi} | 0 .../grch38_gnomad_reformated_-r3.1.1-.vcf.gz | Bin 0 -> 1227 bytes ...ch38_gnomad_reformated_-r3.1.1-.vcf.gz.tbi | Bin 0 -> 72 bytes .../grch38_vcfanno_config_template-v1.0-.toml | 2 +- templates/grch38_mip_rd_dna_config.yaml | 2 +- 8 files changed, 2 insertions(+), 3391 deletions(-) delete mode 100644 t/data/references/grch38_gnomad_reformated_-r3.0-.vcf.gz delete mode 100644 t/data/references/grch38_gnomad_reformated_-r3.0-.vcf.gz.tbi rename t/data/references/{grch38_gnomad_reformated_-r3.0-.tab.gz => grch38_gnomad_reformated_-r3.1.1-.tab.gz} (100%) rename t/data/references/{grch38_gnomad_reformated_-r3.0-.tab.gz.tbi => grch38_gnomad_reformated_-r3.1.1-.tab.gz.tbi} (100%) create mode 100644 t/data/references/grch38_gnomad_reformated_-r3.1.1-.vcf.gz create mode 100644 t/data/references/grch38_gnomad_reformated_-r3.1.1-.vcf.gz.tbi diff --git a/t/data/references/grch38_gnomad_reformated_-r3.0-.vcf.gz b/t/data/references/grch38_gnomad_reformated_-r3.0-.vcf.gz deleted file mode 100644 index 8792da727..000000000 --- a/t/data/references/grch38_gnomad_reformated_-r3.0-.vcf.gz +++ /dev/null @@ -1,3389 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##FILTER= -##FILTER= -##FILTER= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##hailversion=0.2.24-9cd88d97bedd -##bcftools_annotateVersion=1.10.2+htslib-1.10.2 -##bcftools_annotateCommand=annotate --output-type v --include 'INFO/AF>0 || INFO/AF_afr>0 || INFO/AF_amr>0 || INFO/AF_ami>0 || INFO/AF_eas>0 || INFO/AF_nfe>0 || INFO/AF_sas>0' --remove ^INFO/AF,^INFO/AF_afr,^INFO/AF_amr,^INFO/AF_ami,^INFO/AF_eas,^INFO/AF_nfe,^INFO/AF_sas /home/proj/development/rare-disease/references_9.0/grch38_gnomad.genomes_-r3.0-.vcf.bgz; Date=Fri Sep 4 13:31:52 2020 -##bcftools_viewVersion=1.10.2+htslib-1.10.2 -##bcftools_viewCommand=view --output-type z --output-file /home/proj/development/rare-disease/references_9.0/grch38_gnomad_reformated_-r3.0-.vcf.gz /dev/stdin; Date=Fri Sep 4 13:31:52 2020 -##bcftools_viewCommand=view --output-type v /home/proj/development/rare-disease/references_9.0/grch38_gnomad_reformated_-r3.0-.vcf.gz; Date=Tue Sep 22 15:43:13 2020 -##INFO= -##INFO= -##bcftools_normCommand=view -h /home/proj/development/rare-disease/references_9.0/grch38_gnomad_reformated_-r3.0-.vcf.gz; Date=Tue Oct 20 16:24:45 2020 -#CHROM POS ID REF ALT QUAL FILTER INFO diff --git a/t/data/references/grch38_gnomad_reformated_-r3.0-.vcf.gz.tbi b/t/data/references/grch38_gnomad_reformated_-r3.0-.vcf.gz.tbi deleted file mode 100644 index e69de29bb..000000000 diff --git a/t/data/references/grch38_gnomad_reformated_-r3.0-.tab.gz b/t/data/references/grch38_gnomad_reformated_-r3.1.1-.tab.gz similarity index 100% rename from t/data/references/grch38_gnomad_reformated_-r3.0-.tab.gz rename to t/data/references/grch38_gnomad_reformated_-r3.1.1-.tab.gz diff --git a/t/data/references/grch38_gnomad_reformated_-r3.0-.tab.gz.tbi b/t/data/references/grch38_gnomad_reformated_-r3.1.1-.tab.gz.tbi similarity index 100% rename from t/data/references/grch38_gnomad_reformated_-r3.0-.tab.gz.tbi rename to t/data/references/grch38_gnomad_reformated_-r3.1.1-.tab.gz.tbi diff --git a/t/data/references/grch38_gnomad_reformated_-r3.1.1-.vcf.gz b/t/data/references/grch38_gnomad_reformated_-r3.1.1-.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..4351903d86233b7598929704146542a0aa209fd3 GIT binary patch literal 1227 zcmV;+1T^~}iwFb&00000{{{d;LjnM<1f`g5Z`(Ey$6vdjf@ofJ!z_hQBBiVf;5l`c zEziz^7R74lfwnbBqNfM9Df;1il5IJOi&n0JKsZwT9gnBKbO#?COruy$b(VOAdp{g4 zxk+Cf9E{G+@7~%n;3KT&y*MT;WRsrzye#|WrGWjR`Sd9K2j$H2_JC|<%` zwFaBi6x<+cDlxHT7)bl6*TE3)I90JifiBYmMLGJcGL5_`Y_i@fN@qxy1&Z}YBhcwI z3RM~?G*hWA9u_J`zntEn*NEcZ(aAL!1Pm_?(cljlKs*QziRP+6ffvX5uXV_e0~$~6 zZ^pMzgBrn+REVk@kD@5%nabxn4s)cZ$e^Z_%&FavFR$}@Q-hdTU{Oy-X{cfdZ;@%= z^P@N_B2|;Vs=Rrq$V!D4I)@ueg_d3xd1Q-xAk#U>NH5CRtJdcA9!s^DY_zl_n7~x%=jnFRCl-fEZ>A<;lowg# zmo@GBQ94oKOy%x-hE3c@%&M-uws~z^wQbe5sBKZ(vbJSy+hG6ON0qlKSob~Q)b1k! z4;8Ksw!ks1X1FCK*be1z6&BoO!~vTUi@{22GjNkm@}xR?<}xX8HIBV(218z-0_0po zvpT22JR|*BrL$u0Qf@nz;FR{Omn`w)2X~h0WNQEb6pI*#;$! zO&!a!+M|4nVoEr$tx>B%ZOE5R_t;f5sKhq59N8X~4Qe@z1Ik*Xc7t+@Fk3Lu8g&|! z+05aR$kr&qTb4;j?xP7pwycG1A@*VEqkXBg7;c|rvQ-q}OmGHoOKWy3DJ(H(lI+25 zHHC@e5Mu8Ub0aDTGk8k8y^={IDw|PDa!%T#ji_wQB;;w2T^mtZwjffF_Hv|+rUZvS zBHGK5Hj?5(V#l%Dzdy856ljsyT(p0%Z?|u)z#PN3u{HXx*;G<0IcZN`G@HV={d+?4 zgK434oKL(I8WJd;_v=bUOak4-* zDGm#j7l+m6UT9XWPoi>j$xLu-LjM4RN)+ZYn413VGcpY5m=qk#!(u?WvI}r zs)`>DM#mU^`GVGibvZxAuV7`S5)JLhhxNVwho_5>0eu#P>E6vt^jDcezlE@Ugz*vQ z(1cO)_>7R#z$@BjWVL#ekJadV*~kKT29s4%CfT}~RpG?Q>ckjzI=-2Ws+04ejU%jc zs(+5B9eLJ~=N);gBQHAgvLkPI8e7~RfeRi4oO!b zlCDamU6n{Xe+#=R@oh)`-G<+-Y?K^fam2+=Wm`t-9j+oI+?U$v=cm5q$6s{YK8 zY3^6U){%{}XcZYj*g3K&{I4S${&{- literal 0 HcmV?d00001 diff --git a/t/data/references/grch38_gnomad_reformated_-r3.1.1-.vcf.gz.tbi b/t/data/references/grch38_gnomad_reformated_-r3.1.1-.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..b9e0990fab2fca9dab7a72ca96023b80994bf088 GIT binary patch literal 72 zcmb2|=3rp}f&Xj_PR>jW+6>% Date: Mon, 14 Jun 2021 17:34:39 +0200 Subject: [PATCH 003/116] updating glnexus recipe --- lib/MIP/Recipes/Analysis/Glnexus.pm | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/MIP/Recipes/Analysis/Glnexus.pm b/lib/MIP/Recipes/Analysis/Glnexus.pm index 2e014d3e1..f01c28dfb 100644 --- a/lib/MIP/Recipes/Analysis/Glnexus.pm +++ b/lib/MIP/Recipes/Analysis/Glnexus.pm @@ -111,6 +111,7 @@ sub analysis_glnexus { }; check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + use MIP::File::Path qw{ remove_file_path_suffix }; use MIP::File_info qw{ get_io_files parse_io_outfiles }; use MIP::Program::Bcftools qw{ bcftools_norm }; use MIP::Program::Glnexus qw{ glnexus_merge }; @@ -190,8 +191,7 @@ sub analysis_glnexus { say {$filehandle} q{## } . $recipe_name; - ## Set infile for bcftools norm for single sample cases - my $bcftools_norm_infile_path = $genotype_infile_paths[0]; + my $bcftools_norm_infile_path; if ( scalar @{ $active_parameter_href->{sample_ids} } > 1 ) { @@ -209,6 +209,17 @@ sub analysis_glnexus { $bcftools_norm_infile_path = $DASH; } + else { + + ## Set infile for bcftools norm for single sample cases + $bcftools_norm_infile_path = remove_file_path_suffix( + { + file_path => $genotype_infile_paths[0], + file_suffixes_ref => [q{.g.vcf.gz}], + } + ); + $bcftools_norm_infile_path .= q{.vcf.gz}; + } bcftools_norm( { From cdd0fce428bfd4277a527061e8a58f77640b08f0 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 15 Jun 2021 11:58:29 +0200 Subject: [PATCH 004/116] changes to glnexus recipe --- lib/MIP/Recipes/Analysis/Glnexus.pm | 13 ++++--------- t/analysis_glnexus.t | 17 ++++++----------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/lib/MIP/Recipes/Analysis/Glnexus.pm b/lib/MIP/Recipes/Analysis/Glnexus.pm index f01c28dfb..7eae91b4a 100644 --- a/lib/MIP/Recipes/Analysis/Glnexus.pm +++ b/lib/MIP/Recipes/Analysis/Glnexus.pm @@ -111,7 +111,6 @@ sub analysis_glnexus { }; check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; - use MIP::File::Path qw{ remove_file_path_suffix }; use MIP::File_info qw{ get_io_files parse_io_outfiles }; use MIP::Program::Bcftools qw{ bcftools_norm }; use MIP::Program::Glnexus qw{ glnexus_merge }; @@ -138,6 +137,7 @@ sub analysis_glnexus { ## Get the io infiles per chain and id my @genotype_infile_paths; + my @genotype_infile_path_prefixes; SAMPLE_ID: foreach my $sample_id ( @{ $active_parameter_href->{sample_ids} } ) { @@ -152,7 +152,8 @@ sub analysis_glnexus { stream => q{in}, } ); - push @genotype_infile_paths, $sample_io{in}{file_path}; + push @genotype_infile_paths, $sample_io{in}{file_path}; + push @genotype_infile_path_prefixes, $sample_io{in}{file_path_prefix}; } my %io = parse_io_outfiles( @@ -212,13 +213,7 @@ sub analysis_glnexus { else { ## Set infile for bcftools norm for single sample cases - $bcftools_norm_infile_path = remove_file_path_suffix( - { - file_path => $genotype_infile_paths[0], - file_suffixes_ref => [q{.g.vcf.gz}], - } - ); - $bcftools_norm_infile_path .= q{.vcf.gz}; + $bcftools_norm_infile_path = $genotype_infile_path_prefixes[0] . q{.vcf.gz}; } bcftools_norm( diff --git a/t/analysis_glnexus.t b/t/analysis_glnexus.t index ed1786cd4..96fba75cc 100644 --- a/t/analysis_glnexus.t +++ b/t/analysis_glnexus.t @@ -21,8 +21,7 @@ use Test::Trap; ## MIPs lib/ use lib catdir( dirname($Bin), q{lib} ); use MIP::Constants qw{ $COLON $COMMA $SPACE }; -use MIP::Test::Fixtures - qw{ test_add_io_for_recipe test_log test_mip_hashes }; +use MIP::Test::Fixtures qw{ test_add_io_for_recipe test_log test_mip_hashes }; BEGIN { @@ -32,8 +31,7 @@ BEGIN { ## Modules with import my %perl_module = ( q{MIP::Recipes::Analysis::Glnexus} => [qw{ analysis_glnexus }], - q{MIP::Test::Fixtures} => - [qw{ test_add_io_for_recipe test_log test_mip_hashes }], + q{MIP::Test::Fixtures} => [qw{ test_add_io_for_recipe test_log test_mip_hashes }], ); test_import( { perl_module_href => \%perl_module, } ); @@ -49,10 +47,10 @@ diag( q{Test analysis_glnexus from Glnexus.pm} . $SPACE . $EXECUTABLE_NAME ); -my $log = test_log( { log_name => q{MIP}, no_screen => 1, } ); +test_log( { no_screen => 1, } ); ## Given analysis parameters -my $recipe_name = q{Glnexus}; +my $recipe_name = q{glnexus_merge}; my $slurm_mock_cmd = catfile( $Bin, qw{ data modules slurm-mock.pl } ); my %active_parameter = test_mip_hashes( @@ -118,15 +116,12 @@ foreach my $analysis_type (qw { mixed panel wgs wes }) { } ); ## Then return TRUE - ok( $is_ok, - q{ Executed analysis recipe } . $recipe_name . q{ with type } . $analysis_type ); + ok( $is_ok, q{ Executed analysis recipe } . $recipe_name . q{ with type } . $analysis_type ); } # Given a single sample $active_parameter{sample_ids} = [ $active_parameter{sample_ids}[0] ]; -$parameter{Glnexus}{chain} = q{TEST}; -$parameter{deepvariant}{chain} = q{TEST}; my $is_ok = analysis_glnexus( { active_parameter_href => \%active_parameter, @@ -135,7 +130,7 @@ my $is_ok = analysis_glnexus( job_id_href => \%job_id, parameter_href => \%parameter, profile_base_command => $slurm_mock_cmd, - recipe_name => q{Glnexus}, + recipe_name => $recipe_name, sample_info_href => \%sample_info, } ); From fa5f030ab538ee09d5b57b77553f7c7366bf9e0e Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 15 Jun 2021 12:01:50 +0200 Subject: [PATCH 005/116] version bump and changelog update --- CHANGELOG.md | 4 ++++ lib/MIP/Constants.pm | 2 +- templates/mip_install_config.yaml | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 619daefad..ad20eab1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). +## [10.0.2] + +- Don't use gvcf for single sample cases in glnexus recipe + ## [10.0.1] - Fix to gene panel regexp diff --git a/lib/MIP/Constants.pm b/lib/MIP/Constants.pm index 602af9250..b79aaf105 100644 --- a/lib/MIP/Constants.pm +++ b/lib/MIP/Constants.pm @@ -81,7 +81,7 @@ Readonly our %ANALYSIS => ( ); ## Set MIP version -Readonly our $MIP_VERSION => q{10.0.1}; +Readonly our $MIP_VERSION => q{10.0.2}; ## Cli Readonly our $MOOSEX_APP_SCEEN_WIDTH => 160; diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index 88ad7e262..ba128caa3 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -117,7 +117,7 @@ container: mip: executable: mip: - uri: docker.io/clinicalgenomics/mip:v10.0.1 + uri: docker.io/clinicalgenomics/mip:v10.0.2 multiqc: executable: multiqc: From eaddbcded6a1f648ff3b0c729c544a2752799821 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 16 Jun 2021 10:20:34 +0200 Subject: [PATCH 006/116] use glnexus for single sampple cases --- CHANGELOG.md | 4 +-- lib/MIP/Recipes/Analysis/Glnexus.pm | 40 ++++++++++------------------- t/analysis_glnexus.t | 18 ------------- 3 files changed, 15 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad20eab1d..6603f9782 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [10.0.2] -- Don't use gvcf for single sample cases in glnexus recipe +- Glnexus are used regardless of how many samples that are analysed. ## [10.0.1] @@ -14,8 +14,8 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Increased memory allocation for version_collect ### Tools -stranger: 0.7.1 -> 0.8.0 +stranger: 0.7.1 -> 0.8.0 ## [10.0.0] diff --git a/lib/MIP/Recipes/Analysis/Glnexus.pm b/lib/MIP/Recipes/Analysis/Glnexus.pm index 7eae91b4a..f4fd02a24 100644 --- a/lib/MIP/Recipes/Analysis/Glnexus.pm +++ b/lib/MIP/Recipes/Analysis/Glnexus.pm @@ -137,7 +137,6 @@ sub analysis_glnexus { ## Get the io infiles per chain and id my @genotype_infile_paths; - my @genotype_infile_path_prefixes; SAMPLE_ID: foreach my $sample_id ( @{ $active_parameter_href->{sample_ids} } ) { @@ -152,8 +151,7 @@ sub analysis_glnexus { stream => q{in}, } ); - push @genotype_infile_paths, $sample_io{in}{file_path}; - push @genotype_infile_path_prefixes, $sample_io{in}{file_path_prefix}; + push @genotype_infile_paths, $sample_io{in}{file_path}; } my %io = parse_io_outfiles( @@ -192,34 +190,22 @@ sub analysis_glnexus { say {$filehandle} q{## } . $recipe_name; - my $bcftools_norm_infile_path; - - if ( scalar @{ $active_parameter_href->{sample_ids} } > 1 ) { - - glnexus_merge( - { - config => q{DeepVariant_unfiltered}, - dir => catdir( $active_parameter_href->{temp_directory}, q{glnexus} ), - filehandle => $filehandle, - infile_paths_ref => \@genotype_infile_paths, - memory => $memory, - threads => $core_number, - } - ); - print {$filehandle} $PIPE . $SPACE; - - $bcftools_norm_infile_path = $DASH; - } - else { - - ## Set infile for bcftools norm for single sample cases - $bcftools_norm_infile_path = $genotype_infile_path_prefixes[0] . q{.vcf.gz}; - } + glnexus_merge( + { + config => q{DeepVariant_unfiltered}, + dir => catdir( $active_parameter_href->{temp_directory}, q{glnexus} ), + filehandle => $filehandle, + infile_paths_ref => \@genotype_infile_paths, + memory => $memory, + threads => $core_number, + } + ); + print {$filehandle} $PIPE . $SPACE; bcftools_norm( { filehandle => $filehandle, - infile_path => $bcftools_norm_infile_path, + infile_path => $DASH, multiallelic => q{-}, outfile_path => $outfile_path, output_type => q{z}, diff --git a/t/analysis_glnexus.t b/t/analysis_glnexus.t index 96fba75cc..9f5f6228c 100644 --- a/t/analysis_glnexus.t +++ b/t/analysis_glnexus.t @@ -119,22 +119,4 @@ foreach my $analysis_type (qw { mixed panel wgs wes }) { ok( $is_ok, q{ Executed analysis recipe } . $recipe_name . q{ with type } . $analysis_type ); } -# Given a single sample -$active_parameter{sample_ids} = [ $active_parameter{sample_ids}[0] ]; - -my $is_ok = analysis_glnexus( - { - active_parameter_href => \%active_parameter, - case_id => $case_id, - file_info_href => \%file_info, - job_id_href => \%job_id, - parameter_href => \%parameter, - profile_base_command => $slurm_mock_cmd, - recipe_name => $recipe_name, - sample_info_href => \%sample_info, - } -); -## Then return TRUE -ok( $is_ok, q{ Executed analysis recipe } . $recipe_name . q{ with a single sample } ); - done_testing(); From 37ea74acf5cfd22bb850fe9926d6332405b9c793 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Thu, 17 Jun 2021 11:25:50 +0200 Subject: [PATCH 007/116] installing hmtnote --- containers/hmtnote/Dockerfile | 21 +++++++++++++++++++++ definitions/install_parameters.yaml | 1 + lib/MIP/Cli/Mip/Install.pm | 4 ++-- templates/mip_install_config.yaml | 4 ++++ 4 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 containers/hmtnote/Dockerfile diff --git a/containers/hmtnote/Dockerfile b/containers/hmtnote/Dockerfile new file mode 100644 index 000000000..d01bbe763 --- /dev/null +++ b/containers/hmtnote/Dockerfile @@ -0,0 +1,21 @@ +################## BASE IMAGE ###################### + +FROM clinicalgenomics/mip_base:2.1 + +################## METADATA ###################### + +LABEL base_image="clinicalgenomics/mip_base:2.1" +LABEL version="1" +LABEL software="HmtNote" +LABEL software.version="0.7.2" +LABEL extra.binaries="HmtNote" +LABEL maintainer="Clinical-Genomics/MIP" + +RUN conda install pip python=3.7 + +## Clean up after conda +RUN /opt/conda/bin/conda clean -tipsy + +RUN pip install --no-cache-dir hmtnote==0.7.2 + +WORKDIR /data/ diff --git a/definitions/install_parameters.yaml b/definitions/install_parameters.yaml index 811b06f5a..59b8493aa 100644 --- a/definitions/install_parameters.yaml +++ b/definitions/install_parameters.yaml @@ -81,6 +81,7 @@ rd_dna: - gatk4 - genmod - glnexus + - hmtnote - htslib - manta - mip diff --git a/lib/MIP/Cli/Mip/Install.pm b/lib/MIP/Cli/Mip/Install.pm index faf75f180..c605712b7 100644 --- a/lib/MIP/Cli/Mip/Install.pm +++ b/lib/MIP/Cli/Mip/Install.pm @@ -132,7 +132,7 @@ sub _build_usage { [ qw{ arriba bedtools blobfish bootstrapann bwa bwakit bwa-mem2 cadd chanjo chromograph cnvnator cyrius deeptrio deepvariant delly expansionhunter fastqc gatk - gatk4 genmod gffcompare glnexus htslib manta megafusion mip mip_scripts multiqc + gatk4 genmod gffcompare glnexus hmtnote htslib manta megafusion mip mip_scripts multiqc pdfmerger perl peddy picard plink preseq python rhocall rseqc rtg-tools salmon sambamba smncopynumbercaller star star-fusion stranger stringtie svdb telomerecat tiddit trim-galore ucsc upd utilities varg vcf2cytosure vcfanno vep vts } @@ -154,7 +154,7 @@ sub _build_usage { [ qw{ arriba bedtools blobfish bootstrapann bwa bwakit bwa-mem2 cadd chanjo chromograph cnvnator cyrius deeptrio deepvariant delly expansionhunter fastqc gatk - gatk4 genmod gffcompare glnexus htslib manta megafusion mip mip_scripts multiqc + gatk4 genmod gffcompare glnexus hmtnote htslib manta megafusion mip mip_scripts multiqc pdfmerger perl peddy picard plink preseq python rhocall rseqc rtg-tools salmon sambamba smncopynumbercaller star star-fusion stranger stringtie svdb telomerecat tiddit trim-galore ucsc upd utilities varg vcf2cytosure vcfanno vep vts } diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index 88ad7e262..6778e19ae 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -98,6 +98,10 @@ container: executable: glnexus_cli: uri: quay.io/mlin/glnexus:v1.3.1 + hmtnote: + executable: + hmtnote: + uri: docker.io/clinicalgenomics/hmtnote:0.7.2 htslib: executable: bcftools: From 8ece2e470e08caeced5e31554e6383795399338f Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Thu, 17 Jun 2021 13:35:37 +0200 Subject: [PATCH 008/116] module added --- containers/hmtnote/Dockerfile | 2 + lib/MIP/Program/HmtNote.pm | 131 ++++++++++++++++++++++++++++++++++ t/hmtnote_annotate.t | 116 ++++++++++++++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 lib/MIP/Program/HmtNote.pm create mode 100644 t/hmtnote_annotate.t diff --git a/containers/hmtnote/Dockerfile b/containers/hmtnote/Dockerfile index d01bbe763..1eec118b1 100644 --- a/containers/hmtnote/Dockerfile +++ b/containers/hmtnote/Dockerfile @@ -18,4 +18,6 @@ RUN /opt/conda/bin/conda clean -tipsy RUN pip install --no-cache-dir hmtnote==0.7.2 +RUN hmtnote dump + WORKDIR /data/ diff --git a/lib/MIP/Program/HmtNote.pm b/lib/MIP/Program/HmtNote.pm new file mode 100644 index 000000000..32637720c --- /dev/null +++ b/lib/MIP/Program/HmtNote.pm @@ -0,0 +1,131 @@ +package MIP::Program::HmtNote; + +use 5.026; +use Carp; +use charnames qw{ :full :short }; +use English qw{ -no_match_vars }; +use open qw{ :encoding(UTF-8) :std }; +use Params::Check qw{ allow check last_error }; +use utf8; +use warnings; +use warnings qw{ FATAL utf8 }; + +## CPANM +use autodie qw{ :all }; +use Readonly; + +## MIPs lib/ +use MIP::Constants qw{ $SPACE }; +use MIP::Environment::Executable qw{ get_executable_base_command }; +use MIP::Unix::Standard_streams qw{ unix_standard_streams }; +use MIP::Unix::Write_to_file qw{ unix_write_to_file }; + +BEGIN { + require Exporter; + use base qw{ Exporter }; + + # Set the version for version checking + # Functions and variables which can be optionally exported + our @EXPORT_OK = qw{ hmtnote_annotate }; +} + +Readonly my $BASE_COMMAND => q{hmtnote}; + +sub hmtnote_annotate { + +## Function : Perl wrapper for hmtnote version 0.7.2 +## Returns : @commands +## Arguments: $filehandle => Filehandle to write to +## : $infile_path => Input.vcf +## : $offline => An argument if running the command offline +## : $outfile_path => Annotate.vcf +## : $stderrfile_path => Stderrfile path +## : $stderrfile_path_append => Append stderr info to file path +## : $stdinfile_path => Stdinfile path +## : $stdoutfile_path => Stdoutfile path + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $filehandle; + my $infile_path; + my $offline; + my $outfile_path; + my $stderrfile_path; + my $stderrfile_path_append; + my $stdinfile_path; + my $stdoutfile_path; + + ## Default(s) + + my $tmpl = { + filehandle => { + store => \$filehandle, + }, + infile_path => { + defined => 1, + required => 1, + store => \$infile_path, + strict_type => 1, + }, + offline => { + allow => [ undef, 0, 1 ], + store => \$offline, + strict_type => 1, + }, + outfile_path => { + defined => 1, + required => 1, + store => \$outfile_path, + strict_type => 1, + }, + stderrfile_path => { + store => \$stderrfile_path, + strict_type => 1, + }, + stderrfile_path_append => { + store => \$stderrfile_path_append, + strict_type => 1, + }, + stdinfile_path => { store => \$stdinfile_path, strict_type => 1, }, + stdoutfile_path => { + store => \$stdoutfile_path, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + my @commands = + ( get_executable_base_command( { base_command => $BASE_COMMAND, } ), qw{ annotate } ); + + push @commands, $infile_path; + push @commands, $outfile_path; + + if ($offline) { + + push @commands, q{--offline}; + } + + push @commands, + unix_standard_streams( + { + stderrfile_path => $stderrfile_path, + stderrfile_path_append => $stderrfile_path_append, + stdinfile_path => $stdinfile_path, + stdoutfile_path => $stdoutfile_path, + } + ); + + unix_write_to_file( + { + commands_ref => \@commands, + filehandle => $filehandle, + separator => $SPACE, + + } + ); + return @commands; +} + +1; diff --git a/t/hmtnote_annotate.t b/t/hmtnote_annotate.t new file mode 100644 index 000000000..fba74699a --- /dev/null +++ b/t/hmtnote_annotate.t @@ -0,0 +1,116 @@ +#!/usr/bin/env perl + +use 5.026; +use Carp; +use charnames qw{ :full :short }; +use English qw{ -no_match_vars }; +use File::Basename qw{ dirname }; +use File::Spec::Functions qw{ catdir }; +use FindBin qw{ $Bin }; +use open qw{ :encoding(UTF-8) :std }; +use Params::Check qw{ allow check last_error }; +use Test::More; +use utf8; +use warnings qw{ FATAL utf8 }; + +## CPANM +use autodie qw{ :all }; +use Modern::Perl qw{ 2018 }; + +## MIPs lib/ +use lib catdir( dirname($Bin), q{lib} ); +use MIP::Constants qw{ $COMMA $SPACE }; +use MIP::Test::Commands qw{ test_function }; + +BEGIN { + + use MIP::Test::Fixtures qw{ test_import }; + +### Check all internal dependency modules and imports +## Modules with import + my %perl_module = ( q{MIP::Program::HmtNote} => [qw{ hmtnote_annotate }], ); + + test_import( { perl_module_href => \%perl_module, } ); +} + +use MIP::Program::HmtNote qw{ hmtnote_annotate }; + +diag( q{Test hmtnote_annotate from HmtNote.pm} + . $COMMA + . $SPACE . q{Perl} + . $SPACE + . $PERL_VERSION + . $SPACE + . $EXECUTABLE_NAME ); + +## Base arguments +my @function_base_commands = qw{ hmtnote }; + +my %base_argument = ( + filehandle => { + input => undef, + expected_output => \@function_base_commands, + }, + stderrfile_path => { + input => q{stderrfile.test}, + expected_output => q{2> stderrfile.test}, + }, + stderrfile_path_append => { + input => q{stderrfile.test}, + expected_output => q{2>> stderrfile.test}, + }, + stdoutfile_path => { + input => q{stdoutfile.test}, + expected_output => q{1> stdoutfile.test}, + }, +); + +## Can be duplicated with %base_argument and/or %specific_argument +## to enable testing of each individual argument +my %required_argument = ( + infile_path => { + input => q{myfile.vcf}, + expected_output => q{myfile.vcf}, + }, + outfile_path => { + input => q{outfile.vcf}, + expected_output => q{outfile.vcf}, + }, +); + +my %specific_argument = ( + offline => { + input => 1, + expected_output => q{--offline}, + }, + infile_path => { + input => q{myfile.vcf}, + expected_output => q{myfile.vcf}, + }, + outfile_path => { + input => q{outfile.vcf}, + expected_output => q{outfile.vcf}, + }, +); + +## Coderef - enables generalized use of generate call +my $module_function_cref = \&hmtnote_annotate; + +## Test both base and function specific arguments +my @arguments = ( \%base_argument, \%specific_argument ); + +ARGUMENT_HASH_REF: +foreach my $argument_href (@arguments) { + + my @commands = test_function( + { + argument_href => $argument_href, + do_test_base_command => 1, + function_base_commands_ref => \@function_base_commands, + module_function_cref => $module_function_cref, + required_argument_href => \%required_argument, + } + ); +} + +done_testing(); From 6e4bea2e96dd02edb8214c10a27488394a7cc9f5 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Thu, 17 Jun 2021 13:46:42 +0200 Subject: [PATCH 009/116] docker run statement fix --- containers/hmtnote/Dockerfile | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/containers/hmtnote/Dockerfile b/containers/hmtnote/Dockerfile index 1eec118b1..c55801bc8 100644 --- a/containers/hmtnote/Dockerfile +++ b/containers/hmtnote/Dockerfile @@ -14,10 +14,8 @@ LABEL maintainer="Clinical-Genomics/MIP" RUN conda install pip python=3.7 ## Clean up after conda -RUN /opt/conda/bin/conda clean -tipsy - -RUN pip install --no-cache-dir hmtnote==0.7.2 - -RUN hmtnote dump +RUN /opt/conda/bin/conda clean -tipsy && \ + pip install --no-cache-dir hmtnote==0.7.2 && \ + hmtnote dump WORKDIR /data/ From 53d9cd6068f469099c37a2d1fa313f6ef7693577 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Thu, 17 Jun 2021 14:02:27 +0200 Subject: [PATCH 010/116] fix docker run command --- containers/hmtnote/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/containers/hmtnote/Dockerfile b/containers/hmtnote/Dockerfile index c55801bc8..52609e507 100644 --- a/containers/hmtnote/Dockerfile +++ b/containers/hmtnote/Dockerfile @@ -11,11 +11,11 @@ LABEL software.version="0.7.2" LABEL extra.binaries="HmtNote" LABEL maintainer="Clinical-Genomics/MIP" -RUN conda install pip python=3.7 +## Conda env installation + clean up +RUN conda install pip python=3.7 && \ + /opt/conda/bin/conda clean -tipsy -## Clean up after conda -RUN /opt/conda/bin/conda clean -tipsy && \ - pip install --no-cache-dir hmtnote==0.7.2 && \ +RUN pip install --no-cache-dir hmtnote==0.7.2 && \ hmtnote dump WORKDIR /data/ From 25bf59bf6589ff9e82ee9b464f48d3fcf90f788f Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 17 Jun 2021 18:17:38 +0200 Subject: [PATCH 011/116] adds gnomad mt as reference for vcf annotation --- CHANGELOG.md | 7 ++ ...l => grch37_sv_vcfanno_config_-v1.4-.toml} | 0 ...l => grch38_sv_vcfanno_config_-v0.2-.toml} | 0 templates/grch38_mip_rd_dna_config.yaml | 4 +- .../mip_download_rd_dna_config_-1.0-.yaml | 76 +++++++++++-------- templates/mip_dragen_rd_dna_config.yaml | 4 +- templates/mip_rd_dna_config.yaml | 4 +- templates/mip_rd_dna_panel_config.yaml | 2 +- templates/mip_rd_dna_vcf_rerun_config.yaml | 4 +- 9 files changed, 59 insertions(+), 42 deletions(-) rename t/data/references/{grch37_sv_vcfanno_config_-v1.3-.toml => grch37_sv_vcfanno_config_-v1.4-.toml} (100%) rename t/data/references/{grch38_sv_vcfanno_config_-v0.1-.toml => grch38_sv_vcfanno_config_-v0.2-.toml} (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 619daefad..6e704a34b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,13 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). +## [Develop] + +### References + +- gnomad: r3.0 -> r3.1.1 +- [NEW] gnomad mt: r3.1 + ## [10.0.1] - Fix to gene panel regexp diff --git a/t/data/references/grch37_sv_vcfanno_config_-v1.3-.toml b/t/data/references/grch37_sv_vcfanno_config_-v1.4-.toml similarity index 100% rename from t/data/references/grch37_sv_vcfanno_config_-v1.3-.toml rename to t/data/references/grch37_sv_vcfanno_config_-v1.4-.toml diff --git a/t/data/references/grch38_sv_vcfanno_config_-v0.1-.toml b/t/data/references/grch38_sv_vcfanno_config_-v0.2-.toml similarity index 100% rename from t/data/references/grch38_sv_vcfanno_config_-v0.1-.toml rename to t/data/references/grch38_sv_vcfanno_config_-v0.2-.toml diff --git a/templates/grch38_mip_rd_dna_config.yaml b/templates/grch38_mip_rd_dna_config.yaml index 71c3c1650..71f39299b 100755 --- a/templates/grch38_mip_rd_dna_config.yaml +++ b/templates/grch38_mip_rd_dna_config.yaml @@ -60,9 +60,9 @@ sv_rank_model_file: grch38_sv_rank_model_-v0.1-.ini sv_svdb_query_db_files: # FORMAT: filename|OUT_FREQUENCY_INFO_KEY|OUT_ALLELE_COUNT_INFO_KEY|IN_FREQUENCY_INFO_KEY|IN_ALLELE_COUNT_INFO_KEY|USE_IN_FREQUENCY_FILTER grch38_swegen_concat_sv_-2020-.vcf: swegen|AF|OCC|FRQ|OCC|1 -sv_vcfanno_config: grch38_sv_vcfanno_config_-v0.1-.toml +sv_vcfanno_config: grch38_sv_vcfanno_config_-v0.2-.toml vcf2cytosure_exclude_filter: "'swegenAF > 0.05'" -vcfanno_config: grch38_vcfanno_config_-v0.1-.toml +vcfanno_config: grch38_vcfanno_config_-v0.2-.toml ### Analysis ## Programs ## Parameters diff --git a/templates/mip_download_rd_dna_config_-1.0-.yaml b/templates/mip_download_rd_dna_config_-1.0-.yaml index 419c208c7..98946a151 100644 --- a/templates/mip_download_rd_dna_config_-1.0-.yaml +++ b/templates/mip_download_rd_dna_config_-1.0-.yaml @@ -88,6 +88,8 @@ reference: - r3.0 gnomad_chrsplit: - r3.1.1 + gnomad_mt: + - r3.1 gnomad_pli_per_gene: - r2.1.1 hapmap: @@ -110,18 +112,18 @@ reference: - 200917 - 201701 sv_vcfanno_config: - - v0.1 - - v1.2 + - v0.2 - v1.3 + - v1.4 svrank_model: - v0.1 - v1.8 vcf2cytosure_blacklist_regions: - 1.0 vcfanno_config: - - v0.1 - - v1.14 + - v0.2 - v1.15 + - v1.16 vcfanno_functions: - v1.0 reference_feature: @@ -719,6 +721,14 @@ reference_feature: outfile: grch38_gnomad_reformated_-r3.1.1-.vcf.gz outfile_index: grch38_gnomad_reformated_-r3.1.1-.vcf.gz.tbi url_prefix: https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1.1/vcf/genomes/ + gnomad_mt: + grch38: + r3.1: + file: gnomad.genomes.v3.1.sites.chrM.vcf.bgz + file_index: gnomad.genomes.v3.1.sites.chrM.vcf.bgz.tbi + outfile: gnomad_genomes_mt_-r3.1-.bgz + outfile_index: gnomad_genomes_mt_-r3.1-.bgz.tbi + url_prefix: https://storage.googleapis.com/gnomad-public/release/3.1/vcf/genomes/ gnomad_pli_per_gene: grch37: r2.1.1: @@ -803,11 +813,11 @@ reference_feature: outfile_check_method: md5sum url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/rank_model/ grch38: - v0.1: - file: grch38_rank_model_-v0.1-.ini - file_check: grch38_rank_model_-v0.1-.ini.md5 - outfile: grch38_rank_model_-v0.1-.ini - outfile_check: grch38_rank_model_-v0.1-.ini.md5 + v0.2: + file: grch38_rank_model_-v0.2-.ini + file_check: grch38_rank_model_-v0.2-.ini.md5 + outfile: grch38_rank_model_-v0.2-.ini + outfile_check: grch38_rank_model_-v0.-.ini.md5 outfile_check_method: md5sum url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/rank_model/ reduced_penetrance: @@ -835,13 +845,6 @@ reference_feature: url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/gene_panel/ sv_vcfanno_config: grch37: - v1.2: - file: grch37_sv_frequency_vcfanno_filter_config_-v1.2-.toml - file_check: grch37_sv_frequency_vcfanno_filter_config_-v1.2-.toml.md5 - outfile: grch37_sv_frequency_vcfanno_filter_config_-v1.2-.toml - outfile_check: grch37_sv_frequency_vcfanno_filter_config_-v1.2-.toml.md5 - outfile_check_method: md5sum - url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ v1.3: file: grch37_sv_vcfanno_config_-v1.3-.toml file_check: grch37_sv_vcfanno_config_-v1.3-.toml.md5 @@ -849,12 +852,19 @@ reference_feature: outfile_check: grch37_sv_vcfanno_config_-v1.3-.toml.md5 outfile_check_method: md5sum url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ + v1.4: + file: grch37_sv_frequency_vcfanno_filter_config_-v1.4-.toml + file_check: grch37_sv_frequency_vcfanno_filter_config_-v1.4-.toml.md5 + outfile: grch37_sv_frequency_vcfanno_filter_config_-v1.4-.toml + outfile_check: grch37_sv_frequency_vcfanno_filter_config_-v1.4-.toml.md5 + outfile_check_method: md5sum + url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ grch38: - v0.1: - file: grch38_sv_vcfanno_config_-v0.1-.toml - file_check: grch38_sv_vcfanno_config_-v0.1-.toml.md5 - outfile: grch38_sv_vcfanno_config_-v0.1-.toml - outfile_check: grch38_sv_vcfanno_config_-v0.1-.toml.md5 + v0.2: + file: grch38_sv_vcfanno_config_-v0.2-.toml + file_check: grch38_sv_vcfanno_config_-v0.2-.toml.md5 + outfile: grch38_sv_vcfanno_config_-v0.2-.toml + outfile_check: grch38_sv_vcfanno_config_-v0.2-.toml.md5 outfile_check_method: md5sum url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ svrank_model: @@ -885,13 +895,6 @@ reference_feature: url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/region/ vcfanno_config: grch37: - v1.14: - file: grch37_vcfanno_config_-v1.14-.toml - file_check: grch37_vcfanno_config_-v1.14-.toml.md5 - outfile: grch37_vcfanno_config_-v1.14-.toml - outfile_check: grch37_vcfanno_config_-v1.14-.toml.md5 - outfile_check_method: md5sum - url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ v1.15: file: grch37_vcfanno_config_-v1.15-.toml file_check: grch37_vcfanno_config_-v1.15-.toml.md5 @@ -899,12 +902,19 @@ reference_feature: outfile_check: grch37_vcfanno_config_-v1.15-.toml.md5 outfile_check_method: md5sum url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ + v1.16: + file: grch37_vcfanno_config_-v1.16-.toml + file_check: grch37_vcfanno_config_-v1.16-.toml.md5 + outfile: grch37_vcfanno_config_-v1.16-.toml + outfile_check: grch37_vcfanno_config_-v1.16-.toml.md5 + outfile_check_method: md5sum + url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ grch38: - v0.1: - file: grch38_vcfanno_config_-v0.1-.toml - file_check: grch38_vcfanno_config_-v0.1-.toml.md5 - outfile: grch38_vcfanno_config_-v0.1-.toml - outfile_check: grch38_vcfanno_config_-v0.1-.toml.md5 + v0.2: + file: grch38_vcfanno_config_-v0.2-.toml + file_check: grch38_vcfanno_config_-v0.2-.toml.md5 + outfile: grch38_vcfanno_config_-v0.2-.toml + outfile_check: grch38_vcfanno_config_-v0.2-.toml.md5 outfile_check_method: md5sum url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ vcfanno_functions: diff --git a/templates/mip_dragen_rd_dna_config.yaml b/templates/mip_dragen_rd_dna_config.yaml index d8ef21bf2..3fcba1f0f 100644 --- a/templates/mip_dragen_rd_dna_config.yaml +++ b/templates/mip_dragen_rd_dna_config.yaml @@ -34,8 +34,8 @@ sv_svdb_query_db_files: grch37_svdb_query_clingen_cgh_pathogenic_-v1.0.0-.vcf: clingen_cgh_pathogenic grch37_svdb_query_clingen_ngi_-v1.0.0-.vcf: clingen_ngi|AF|OCC|FRQ|OCC|1 grch37_swegen_concat_sort_-20170830-.vcf: swegen|AF|OCC|FRQ|OCC|1 -sv_vcfanno_config: grch37_sv_vcfanno_config_-v1.3-.toml -vcfanno_config: grch37_vcfanno_config_-v1.15-.toml +sv_vcfanno_config: grch37_sv_vcfanno_config_-v1.4-.toml +vcfanno_config: grch37_vcfanno_config_-v1.16-.toml ### Analysis ## Programs ## Parameters diff --git a/templates/mip_rd_dna_config.yaml b/templates/mip_rd_dna_config.yaml index 5a408fbd2..5745445b7 100755 --- a/templates/mip_rd_dna_config.yaml +++ b/templates/mip_rd_dna_config.yaml @@ -25,7 +25,7 @@ genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv human_genome_reference: grch37_homo_sapiens_-d5-.fasta rank_model_file: rank_model_-v1.32-.ini sambamba_depth_bed: grch37_scout_exons_-2017-01-.bed -sv_vcfanno_config: grch37_sv_vcfanno_config_-v1.3-.toml +sv_vcfanno_config: grch37_sv_vcfanno_config_-v1.4-.toml sv_genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv sv_rank_model_file: svrank_model_-v1.8-.ini sv_svdb_query_db_files: @@ -39,7 +39,7 @@ sv_svdb_query_db_files: grch37_svdb_query_clingen_ngi_-v1.0.0-.vcf: clingen_ngi|AF|OCC|FRQ|OCC|1 grch37_swegen_concat_sort_-20170830-.vcf: swegen|AF|OCC|FRQ|OCC|1 vcf2cytosure_blacklist: grch37_cytosure_blacklist_-1.0-.bed -vcfanno_config: grch37_vcfanno_config_-v1.15-.toml +vcfanno_config: grch37_vcfanno_config_-v1.16-.toml ### Analysis ### Programs ## Parameters diff --git a/templates/mip_rd_dna_panel_config.yaml b/templates/mip_rd_dna_panel_config.yaml index a8b28e3cb..7a2802008 100755 --- a/templates/mip_rd_dna_panel_config.yaml +++ b/templates/mip_rd_dna_panel_config.yaml @@ -24,7 +24,7 @@ genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv human_genome_reference: grch37_homo_sapiens_-d5-.fasta rank_model_file: rank_model_-v1.32-.ini sambamba_depth_bed: grch37_scout_exons_-2017-01-.bed -vcfanno_config: grch37_vcfanno_config_v1.15-.toml +vcfanno_config: grch37_vcfanno_config_v1.16-.toml ### Analysis ### Programs ## Parameters diff --git a/templates/mip_rd_dna_vcf_rerun_config.yaml b/templates/mip_rd_dna_vcf_rerun_config.yaml index 11da5e451..791a86aae 100644 --- a/templates/mip_rd_dna_vcf_rerun_config.yaml +++ b/templates/mip_rd_dna_vcf_rerun_config.yaml @@ -35,8 +35,8 @@ sv_svdb_query_db_files: grch37_svdb_query_clingen_cgh_pathogenic_-v1.0.0-.vcf: clingen_cgh_pathogenic grch37_svdb_query_clingen_ngi_-v1.0.0-.vcf: clingen_ngi|AF|OCC|FRQ|OCC|1 grch37_swegen_concat_sort_-20170830-.vcf: swegen|AF|OCC|FRQ|OCC|1 -sv_vcfanno_config: grch37_sv_vcfanno_config_-v1.3-.toml -vcfanno_config: grch37_vcfanno_config_-v1.15-.toml +sv_vcfanno_config: grch37_sv_vcfanno_config_-v1.4-.toml +vcfanno_config: grch37_vcfanno_config_-v1.16-.toml ### Analysis ## Programs ## Parameters From 8fb653d85e150c79c18a8351cf444c19f1c831de Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Fri, 18 Jun 2021 12:33:56 +0200 Subject: [PATCH 012/116] added mt_annotation to analysis --- definitions/rd_dna_initiation_map.yaml | 1 + definitions/rd_dna_parameters.yaml | 22 ++ lib/MIP/Cli/Mip/Analyse/Rd_dna.pm | 17 ++ lib/MIP/Recipes/Analysis/Mt_annotation.pm | 298 +++++++++++++++++++++ lib/MIP/Recipes/Pipeline/Analyse_rd_dna.pm | 2 + 5 files changed, 340 insertions(+) create mode 100644 lib/MIP/Recipes/Analysis/Mt_annotation.pm diff --git a/definitions/rd_dna_initiation_map.yaml b/definitions/rd_dna_initiation_map.yaml index 5160f4364..1e4bea00e 100644 --- a/definitions/rd_dna_initiation_map.yaml +++ b/definitions/rd_dna_initiation_map.yaml @@ -75,6 +75,7 @@ CHAIN_ALL: - rhocall_ar - bcftools_norm - variant_annotation + - mt_annotation - CHAIN_RHOVIZ: - rhocall_viz - chromograph_rhoviz diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index 3e8d08353..3f93ec31f 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -175,6 +175,7 @@ recipe_core_number: gzip_fastq: 0 manta: 36 markduplicates: 13 + mt_annotation: 1 multiqc_ar: 1 peddy_ar: 4 picardtools_collecthsmetrics: 1 @@ -245,6 +246,7 @@ recipe_memory: gatk_variantrecalibration: 30 glnexus_merge: 10 markduplicates: 10 + mt_annotation: 2 picardtools_collecthsmetrics: 8 picardtools_collectmultiplemetrics: 8 plink: 10 @@ -305,6 +307,7 @@ recipe_time: gzip_fastq: 2 manta: 30 markduplicates: 20 + mt_annotation: 1 multiqc_ar: 5 peddy_ar: 1 picardtools_collecthsmetrics: 10 @@ -1548,6 +1551,25 @@ bcftools_core: - bgzip - tabix type: recipe +mt_annotation: + analysis_mode: case + associated_recipe: + - mip + data_type: SCALAR + default: 1 + outfile_suffix: ".vcf.gz" + file_tag: _mt + program_executables: + - hmtnote + - bgzip + - tabix + type: recipe +mt_offline: + associated_recipe: + - mt_annotation + data_type: SCALAR + default: 1 + type: recipe_argument variant_annotation: analysis_mode: case associated_recipe: diff --git a/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm b/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm index ca832fef2..26e7b0ea3 100644 --- a/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm +++ b/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm @@ -168,6 +168,23 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v ) ); + option( + q{mt_annotation} => ( + cmd_tags => [q{Analysis recipe switch}], + documentation => q{MT annotation vcf}, + is => q{rw}, + isa => enum( [ 0, 1, 2 ] ), + ) + ); + + option( + q{mt_annotation_offline} => ( + documentation => q{MT variants using offline-mode}, + is => q{rw}, + isa => Bool, + ) + ); + option( q{gatk_disable_auto_index_and_file_lock} => ( cmd_flag => q{gatk_dis_auto_ind_fl}, diff --git a/lib/MIP/Recipes/Analysis/Mt_annotation.pm b/lib/MIP/Recipes/Analysis/Mt_annotation.pm new file mode 100644 index 000000000..4cd276c0b --- /dev/null +++ b/lib/MIP/Recipes/Analysis/Mt_annotation.pm @@ -0,0 +1,298 @@ +package MIP::Recipes::Analysis::Mt_annotation; + +use 5.026; +use Carp; +use charnames qw{ :full :short }; +use English qw{ -no_match_vars }; +use File::Basename qw{ dirname }; +use File::Spec::Functions qw{ catdir catfile devnull }; +use open qw{ :encoding(UTF-8) :std }; +use Params::Check qw{ allow check last_error }; +use utf8; +use warnings; +use warnings qw{ FATAL utf8 }; + +## CPANM +use autodie qw{ :all }; +use Readonly; + +## MIPs lib/ +use MIP::Constants qw{ $LOG_NAME $NEWLINE $UNDERSCORE }; + +BEGIN { + + require Exporter; + use base qw{ Exporter }; + + # Set the version for version checking + # Functions and variables which can be optionally exported + our @EXPORT_OK = qw{ analysis_mt_annotation }; + +} + +sub analysis_mt_annotation { + +## Function : Annotate your mitochondrial variants here, add'l info field +## Returns : +## Arguments: $active_parameter_href => Active parameters for this analysis hash {REF} +## : $case_id => Family id +## : $file_info_href => File_info hash {REF} +## : $job_id_href => Job id hash {REF} +## : $parameter_href => Parameter hash {REF} +## : $profile_base_command => Submission profile base command +## : $recipe_name => Recipe name +## : $sample_info_href => Info on samples and case hash {REF} + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $active_parameter_href; + my $file_info_href; + my $job_id_href; + my $parameter_href; + my $recipe_name; + my $sample_info_href; + + ## Default(s) + my $case_id; + my $profile_base_command; + + my $tmpl = { + active_parameter_href => { + default => {}, + defined => 1, + required => 1, + store => \$active_parameter_href, + strict_type => 1, + }, + case_id => { + default => $arg_href->{active_parameter_href}{case_id}, + store => \$case_id, + strict_type => 1, + }, + file_info_href => { + default => {}, + defined => 1, + required => 1, + store => \$file_info_href, + strict_type => 1, + }, + job_id_href => { + default => {}, + defined => 1, + required => 1, + store => \$job_id_href, + strict_type => 1, + }, + parameter_href => { + default => {}, + defined => 1, + required => 1, + store => \$parameter_href, + strict_type => 1, + }, + profile_base_command => { + default => q{sbatch}, + store => \$profile_base_command, + strict_type => 1, + }, + recipe_name => { + defined => 1, + required => 1, + store => \$recipe_name, + strict_type => 1, + }, + sample_info_href => { + default => {}, + defined => 1, + required => 1, + store => \$sample_info_href, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + use MIP::File_info qw{ get_io_files parse_io_outfiles }; + use MIP::File::Path qw { remove_file_path_suffix }; + use MIP::Program::Gnu::Coreutils qw { gnu_cp }; + use MIP::Program::HmtNote qw{ hmtnote_annotate }; + use MIP::Program::Htslib qw{ htslib_bgzip htslib_tabix }; + use MIP::Processmanagement::Processes qw{ submit_recipe }; + use MIP::Recipe qw{ parse_recipe_prerequisites }; + use MIP::Sample_info qw{ set_recipe_outfile_in_sample_info }; + use MIP::Script::Setup_script qw{ setup_script }; + + ### PREPROCESSING: + + ## Retrieve logger object + my $log = Log::Log4perl->get_logger($LOG_NAME); + + ## Unpack parameters + ## Get the io infiles per chain and id + my %io = get_io_files( + { + id => $case_id, + file_info_href => $file_info_href, + parameter_href => $parameter_href, + recipe_name => $recipe_name, + stream => q{in}, + } + ); + my $infile_name_prefix = $io{in}{file_name_prefix}; + my %infile_path = %{ $io{in}{file_path_href} }; + + my @contigs_size_ordered = @{ $file_info_href->{contigs_size_ordered} }; + my %recipe = parse_recipe_prerequisites( + { + active_parameter_href => $active_parameter_href, + parameter_href => $parameter_href, + recipe_name => $recipe_name, + } + ); + + ## Set and get the io files per chain, id and stream + %io = ( + %io, + parse_io_outfiles( + { + chain_id => $recipe{job_id_chain}, + id => $case_id, + file_info_href => $file_info_href, + file_name_prefix => $infile_name_prefix, + iterators_ref => \@contigs_size_ordered, + outdata_dir => $active_parameter_href->{outdata_dir}, + parameter_href => $parameter_href, + recipe_name => $recipe_name, + } + ) + ); + + my @outfile_paths = @{ $io{out}{file_paths} }; + my $outfile_path_prefix = $io{out}{file_path_prefix}; + my %outfile_path = %{ $io{out}{file_path_href} }; + my $outfile_suffix = $io{out}{file_suffix}; + + ## Filehandles + # Create anonymous filehandle + my $filehandle = IO::Handle->new(); + + ## Creates recipe directories (info & data & script), recipe script filenames and writes sbatch header + my ( $recipe_file_path, $recipe_info_path ) = setup_script( + { + active_parameter_href => $active_parameter_href, + core_number => $recipe{core_number}, + directory_id => $case_id, + filehandle => $filehandle, + job_id_href => $job_id_href, + memory_allocation => $recipe{memory}, + process_time => $recipe{time}, + recipe_directory => $recipe_name, + recipe_name => $recipe_name, + } + ); + + ### SHELL: + + say {$filehandle} q{## } . $recipe_name; + + say {$filehandle} q{## Copying non-MT variant files and annotating mt vcfs}; + foreach my $contig ( keys %infile_path ) { + + if ( $contig =~ / MT|M /xsm ) { + + hmtnote_annotate( + { + filehandle => $filehandle, + infile_path => $infile_path{$contig}, + offline => $active_parameter_href->{hmtnote_offline}, + outfile_path => $outfile_path{$contig}, + } + ); + print {$filehandle} $NEWLINE; + + my $outfile_no_suffix = remove_file_path_suffix( + { + file_path => $outfile_path{$contig}, + file_suffixes_ref => [qw{.gz}], + } + ); + + $outfile_path{$contig} = $outfile_no_suffix; + + htslib_bgzip( + { + filehandle => $filehandle, + infile_path => $outfile_path{$contig} . q{.gz}, + } + ); + print {$filehandle} $NEWLINE; + + htslib_tabix( + { + filehandle => $filehandle, + infile_path => $outfile_path{$contig} . q{.gz.tbi}, + preset => q{vcf}, + + } + ); + print {$filehandle} $NEWLINE; + } + else { + + gnu_cp( + { + filehandle => $filehandle, + infile_path => $infile_path{$contig}, + outfile_path => $outfile_path{$contig}, + } + ); + print {$filehandle} $NEWLINE; + + gnu_cp( + { + filehandle => $filehandle, + infile_path => $infile_path{$contig} . q{.tbi}, + outfile_path => $outfile_path{$contig} . q{.tbi}, + } + ); + print {$filehandle} $NEWLINE; + + } + } + + ## Close filehandleS + close $filehandle or $log->logcroak(q{Could not close filehandle}); + + if ( $recipe{mode} == 1 ) { + + ## Collect QC metadata info for later use + set_recipe_outfile_in_sample_info( + { + path => $outfile_paths[0], + recipe_name => $recipe_name, + sample_info_href => $sample_info_href, + } + ); + + submit_recipe( + { + base_command => $profile_base_command, + case_id => $case_id, + dependency_method => q{sample_to_case}, + job_id_chain => $recipe{job_id_chain}, + job_id_href => $job_id_href, + job_reservation_name => $active_parameter_href->{job_reservation_name}, + log => $log, + max_parallel_processes_count_href => + $file_info_href->{max_parallel_processes_count}, + recipe_file_path => $recipe_file_path, + sample_ids_ref => \@{ $active_parameter_href->{sample_ids} }, + submission_profile => $active_parameter_href->{submission_profile}, + } + ); + } + return 1; +} + +1; diff --git a/lib/MIP/Recipes/Pipeline/Analyse_rd_dna.pm b/lib/MIP/Recipes/Pipeline/Analyse_rd_dna.pm index a24395531..6dd2715b4 100644 --- a/lib/MIP/Recipes/Pipeline/Analyse_rd_dna.pm +++ b/lib/MIP/Recipes/Pipeline/Analyse_rd_dna.pm @@ -474,6 +474,7 @@ sub pipeline_analyse_rd_dna { use MIP::Recipes::Analysis::Mip_qccollect qw{ analysis_mip_qccollect }; use MIP::Recipes::Analysis::Mip_vcfparser qw{ analysis_mip_vcfparser }; use MIP::Recipes::Analysis::Mip_vercollect qw{ analysis_mip_vercollect }; + use MIP::Recipes::Analysis::Mt_annotation qw{ analysis_mt_annotation }; use MIP::Recipes::Analysis::Multiqc qw{ analysis_multiqc }; use MIP::Recipes::Analysis::Peddy qw{ analysis_peddy }; use MIP::Recipes::Analysis::Picardtools_collecthsmetrics @@ -577,6 +578,7 @@ sub pipeline_analyse_rd_dna { gzip_fastq => \&analysis_gzip_fastq, manta => \&analysis_manta, markduplicates => \&analysis_markduplicates, + mt_annotation => \&analysis_mt_annotation, multiqc_ar => \&analysis_multiqc, peddy_ar => \&analysis_peddy, picardtools_collecthsmetrics => \&analysis_picardtools_collecthsmetrics, From 19fad4fcd0172390e9a34a6b86bf9a9cdd03871c Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Fri, 18 Jun 2021 12:40:23 +0200 Subject: [PATCH 013/116] trying to fix RUN directive --- containers/hmtnote/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/containers/hmtnote/Dockerfile b/containers/hmtnote/Dockerfile index 52609e507..6948cf346 100644 --- a/containers/hmtnote/Dockerfile +++ b/containers/hmtnote/Dockerfile @@ -12,10 +12,10 @@ LABEL extra.binaries="HmtNote" LABEL maintainer="Clinical-Genomics/MIP" ## Conda env installation + clean up -RUN conda install pip python=3.7 && \ - /opt/conda/bin/conda clean -tipsy +RUN conda install pip python=3.7 -RUN pip install --no-cache-dir hmtnote==0.7.2 && \ +RUN /opt/conda/bin/conda clean -tipsy && \ + pip install --no-cache-dir hmtnote==0.7.2 && \ hmtnote dump WORKDIR /data/ From 71187b75d70dc4c1a3d82288f77929eea1536a39 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Fri, 18 Jun 2021 12:43:07 +0200 Subject: [PATCH 014/116] Added comments --- containers/hmtnote/Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/containers/hmtnote/Dockerfile b/containers/hmtnote/Dockerfile index 6948cf346..dbac2ce6a 100644 --- a/containers/hmtnote/Dockerfile +++ b/containers/hmtnote/Dockerfile @@ -12,10 +12,11 @@ LABEL extra.binaries="HmtNote" LABEL maintainer="Clinical-Genomics/MIP" ## Conda env installation + clean up -RUN conda install pip python=3.7 +RUN conda install pip python=3.7 && \ + /opt/conda/bin/conda clean -tipsy -RUN /opt/conda/bin/conda clean -tipsy && \ - pip install --no-cache-dir hmtnote==0.7.2 && \ +# Install HmtNote +RUN pip install --no-cache-dir hmtnote==0.7.2 && \ hmtnote dump WORKDIR /data/ From 39b11fd6930c42d0413753994e1563d8db78b858 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Fri, 18 Jun 2021 13:14:00 +0200 Subject: [PATCH 015/116] remove .gz extn the right way --- lib/MIP/Recipes/Analysis/Mt_annotation.pm | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/lib/MIP/Recipes/Analysis/Mt_annotation.pm b/lib/MIP/Recipes/Analysis/Mt_annotation.pm index 4cd276c0b..96290538a 100644 --- a/lib/MIP/Recipes/Analysis/Mt_annotation.pm +++ b/lib/MIP/Recipes/Analysis/Mt_annotation.pm @@ -201,29 +201,28 @@ sub analysis_mt_annotation { if ( $contig =~ / MT|M /xsm ) { - hmtnote_annotate( + my $outfile_no_suffix = remove_file_path_suffix( { - filehandle => $filehandle, - infile_path => $infile_path{$contig}, - offline => $active_parameter_href->{hmtnote_offline}, - outfile_path => $outfile_path{$contig}, + file_path => $outfile_path{$contig}, + file_suffixes_ref => [qw{.gz}], } ); print {$filehandle} $NEWLINE; - my $outfile_no_suffix = remove_file_path_suffix( + hmtnote_annotate( { - file_path => $outfile_path{$contig}, - file_suffixes_ref => [qw{.gz}], + filehandle => $filehandle, + infile_path => $infile_path{$contig}, + offline => $active_parameter_href->{hmtnote_offline}, + outfile_path => $outfile_no_suffix, } ); - - $outfile_path{$contig} = $outfile_no_suffix; + print {$filehandle} $NEWLINE; htslib_bgzip( { filehandle => $filehandle, - infile_path => $outfile_path{$contig} . q{.gz}, + infile_path => $outfile_no_suffix, } ); print {$filehandle} $NEWLINE; @@ -231,7 +230,7 @@ sub analysis_mt_annotation { htslib_tabix( { filehandle => $filehandle, - infile_path => $outfile_path{$contig} . q{.gz.tbi}, + infile_path => $outfile_path{$contig}, preset => q{vcf}, } From bde2d4522ba360604c50e096f396ff60d35f911c Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Fri, 18 Jun 2021 13:25:58 +0200 Subject: [PATCH 016/116] remove .gz extn the right way --- lib/MIP/Recipes/Analysis/Mt_annotation.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/MIP/Recipes/Analysis/Mt_annotation.pm b/lib/MIP/Recipes/Analysis/Mt_annotation.pm index 96290538a..a34473ae3 100644 --- a/lib/MIP/Recipes/Analysis/Mt_annotation.pm +++ b/lib/MIP/Recipes/Analysis/Mt_annotation.pm @@ -207,7 +207,6 @@ sub analysis_mt_annotation { file_suffixes_ref => [qw{.gz}], } ); - print {$filehandle} $NEWLINE; hmtnote_annotate( { From a963ea2849d677b50eaf6eb71e142b2e1f445c72 Mon Sep 17 00:00:00 2001 From: jemten Date: Fri, 18 Jun 2021 16:41:30 +0200 Subject: [PATCH 017/116] fixing mt download and adding test --- definitions/download_parameters.yaml | 9 + lib/MIP/Recipes/Download/Gnomad.pm | 169 +++++++++++++++++- lib/MIP/Recipes/Pipeline/Download.pm | 4 +- .../test_data/download_active_parameters.yaml | 11 ++ t/download_gnomad_mt.t | 92 ++++++++++ .../mip_download_rd_dna_config_-1.0-.yaml | 4 +- 6 files changed, 285 insertions(+), 4 deletions(-) create mode 100644 t/download_gnomad_mt.t diff --git a/definitions/download_parameters.yaml b/definitions/download_parameters.yaml index 71f62aa09..9564958d9 100644 --- a/definitions/download_parameters.yaml +++ b/definitions/download_parameters.yaml @@ -251,6 +251,13 @@ gnomad_chrsplit: - bgzip - tabix type: recipe +gnomad_mt: + analysis_mode: case + associated_recipe: + - mip + data_type: SCALAR + default: 1 + type: recipe gnomad_pli_per_gene: analysis_mode: case associated_recipe: @@ -383,6 +390,7 @@ recipe_core_number: giab: 1 gnomad: 1 gnomad_chrsplit: 13 + gnomad_mt: 1 gnomad_pli_per_gene: 1 hapmap: 1 human_reference: 1 @@ -436,6 +444,7 @@ recipe_time: giab: 1 gnomad: 25 gnomad_chrsplit: 32 + gnomad_mt: 1 gnomad_pli_per_gene: 1 hapmap: 1 human_reference: 1 diff --git a/lib/MIP/Recipes/Download/Gnomad.pm b/lib/MIP/Recipes/Download/Gnomad.pm index 2cfe4ba31..ec18e2062 100644 --- a/lib/MIP/Recipes/Download/Gnomad.pm +++ b/lib/MIP/Recipes/Download/Gnomad.pm @@ -32,7 +32,7 @@ BEGIN { use base qw{ Exporter }; # Functions and variables which can be optionally exported - our @EXPORT_OK = qw{ download_gnomad download_gnomad_xargs}; + our @EXPORT_OK = qw{ download_gnomad download download_gnomad_mt download_gnomad_xargs}; } @@ -265,6 +265,173 @@ sub download_gnomad { return 1; } +sub download_gnomad_mt { + +## Function : Download gnomad mitochondrial +## Returns : +## Arguments: $active_parameter_href => Active parameters for this download hash {REF} +## : $genome_version => Human genome version +## : $job_id_href => The job_id hash {REF} +## : $profile_base_command => Submission profile base command +## : $quiet => Quiet (no output) +## : $recipe_name => Recipe name +## : $reference_href => Reference hash {REF} +## : $reference_version => Reference version +## : $temp_directory => Temporary directory for recipe +## : $verbose => Verbosity + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $active_parameter_href; + my $genome_version; + my $job_id_href; + my $recipe_name; + my $reference_href; + my $reference_version; + + ## Default(s) + my $profile_base_command; + my $quiet; + my $temp_directory; + my $verbose; + + my $tmpl = { + active_parameter_href => { + default => {}, + defined => 1, + required => 1, + store => \$active_parameter_href, + strict_type => 1, + }, + genome_version => { + store => \$genome_version, + strict_type => 1, + }, + job_id_href => { + default => {}, + defined => 1, + required => 1, + store => \$job_id_href, + strict_type => 1, + }, + profile_base_command => { + default => q{sbatch}, + store => \$profile_base_command, + strict_type => 1, + }, + quiet => { + allow => [ undef, 0, 1 ], + default => 1, + store => \$quiet, + strict_type => 1, + }, + recipe_name => { + defined => 1, + required => 1, + store => \$recipe_name, + strict_type => 1, + }, + reference_href => { + default => {}, + defined => 1, + required => 1, + store => \$reference_href, + strict_type => 1, + }, + reference_version => { + defined => 1, + required => 1, + store => \$reference_version, + strict_type => 1, + }, + temp_directory => { + store => \$temp_directory, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + use MIP::Processmanagement::Slurm_processes qw{ slurm_submit_job_no_dependency_dead_end }; + use MIP::Recipes::Download::Get_reference qw{ get_reference }; + use MIP::Recipe qw{ parse_recipe_prerequisites }; + use MIP::Script::Setup_script qw{ setup_script }; + + ### PREPROCESSING: + + ### PREPROCESSING: + + ## Retrieve logger object + my $log = Log::Log4perl->get_logger( uc q{mip_download} ); + + ## Unpack parameters + my $reference_dir = $active_parameter_href->{reference_dir}; + + my %recipe = parse_recipe_prerequisites( + { + active_parameter_href => $active_parameter_href, + recipe_name => $recipe_name, + } + ); + + ## Filehandle(s) + # Create anonymous filehandle + my $filehandle = IO::Handle->new(); + + ## Creates recipe directories (info & data & script), recipe script filenames and writes sbatch header + my ( $recipe_file_path, $recipe_info_path ) = setup_script( + { + active_parameter_href => $active_parameter_href, + core_number => $recipe{core_number}, + directory_id => q{mip_download}, + filehandle => $filehandle, + info_file_id => $genome_version . $UNDERSCORE . $reference_version, + job_id_href => $job_id_href, + memory_allocation => $recipe{memory}, + outdata_dir => $reference_dir, + outscript_dir => $reference_dir, + process_time => $recipe{time}, + recipe_data_directory_path => $active_parameter_href->{reference_dir}, + recipe_directory => $recipe_name . $UNDERSCORE . $reference_version, + recipe_name => $recipe_name, + source_environment_commands_ref => $recipe{load_env_ref}, + } + ); + + ### SHELL: + + say {$filehandle} q{## } . $recipe_name; + + get_reference( + { + filehandle => $filehandle, + recipe_name => $recipe_name, + reference_dir => $reference_dir, + reference_href => $reference_href, + quiet => $quiet, + verbose => $verbose, + } + ); + + ## Close filehandleS + close $filehandle or $log->logcroak(q{Could not close filehandle}); + + if ( $recipe{mode} == 1 ) { + + ## No upstream or downstream dependencies + slurm_submit_job_no_dependency_dead_end( + { + base_command => $profile_base_command, + job_id_href => $job_id_href, + log => $log, + sbatch_file_name => $recipe_file_path, + } + ); + } + return 1; +} + sub download_gnomad_xargs { ## Function : Download gnomad using xargs diff --git a/lib/MIP/Recipes/Pipeline/Download.pm b/lib/MIP/Recipes/Pipeline/Download.pm index 28d5c642c..760a4327a 100644 --- a/lib/MIP/Recipes/Pipeline/Download.pm +++ b/lib/MIP/Recipes/Pipeline/Download.pm @@ -96,7 +96,8 @@ sub pipeline_download { use MIP::Recipes::Download::Genomic_superdups qw{ download_genomic_superdups }; use MIP::Recipes::Download::Get_reference qw{ get_reference }; use MIP::Recipes::Download::Giab qw{ download_giab }; - use MIP::Recipes::Download::Gnomad qw{ download_gnomad download_gnomad_xargs }; + use MIP::Recipes::Download::Gnomad + qw{ download_gnomad download_gnomad_mt download_gnomad_xargs }; use MIP::Recipes::Download::Gnomad_pli_per_gene qw{ download_gnomad_pli_per_gene }; use MIP::Recipes::Download::Hapmap qw{ download_hapmap }; use MIP::Recipes::Download::Human_reference qw{ download_human_reference }; @@ -139,6 +140,7 @@ sub pipeline_download { gnomad => \&download_gnomad, gnomad_pli_per_gene => \&download_gnomad_pli_per_gene, gnomad_chrsplit => \&download_gnomad_xargs, + gnomad_mt => \&download_gnomad_mt, hapmap => \&download_hapmap, human_reference => \&download_human_reference, manta_call_regions => \&download_manta_call_regions, diff --git a/t/data/test_data/download_active_parameters.yaml b/t/data/test_data/download_active_parameters.yaml index 17d956804..520b5786d 100644 --- a/t/data/test_data/download_active_parameters.yaml +++ b/t/data/test_data/download_active_parameters.yaml @@ -94,6 +94,8 @@ reference: - r2.0.1 gnomad_chrsplit: - r3.1.1 + gnomad_mt: + - r3.1 gnomad_pli_per_gene: - r2.1.1 hapmap: @@ -570,6 +572,15 @@ reference_feature: outfile: grch38_gnomad_reformated_-r3.1.1-.vcf.gz outfile_index: grch38_gnomad_reformated_-r3.1.1-.vcf.gz.tbi url_prefix: https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1.1/vcf/genomes/ + gnomad_mt: + grch37: {} + grch38: + r3.1: + file: gnomad.genomes.v3.1.sites.chrM.vcf.bgz + file_index: gnomad.genomes.v3.1.sites.chrM.vcf.bgz.tbi + outfile: gnomad_genomes_mt_-r3.1-.vcf.gz + outfile_index: gnomad_genomes_mt_-r3.1-.vcf.gz.tbi + url_prefix: https://storage.googleapis.com/gnomad-public/release/3.1/vcf/genomes/ gnomad_pli_per_gene: grch37: r2.1.1: diff --git a/t/download_gnomad_mt.t b/t/download_gnomad_mt.t new file mode 100644 index 000000000..f671edc88 --- /dev/null +++ b/t/download_gnomad_mt.t @@ -0,0 +1,92 @@ +#!/usr/bin/env perl + +use 5.026; +use Carp; +use charnames qw{ :full :short }; +use English qw{ -no_match_vars }; +use File::Basename qw{ dirname }; +use File::Spec::Functions qw{ catdir catfile }; +use File::Temp; +use FindBin qw{ $Bin }; +use open qw{ :encoding(UTF-8) :std }; +use Params::Check qw{ allow check last_error }; +use Test::More; +use utf8; +use warnings qw{ FATAL utf8 }; + +## CPANM +use autodie qw { :all }; +use Modern::Perl qw{ 2018 }; +use Readonly; + +## MIPs lib/ +use lib catdir( dirname($Bin), q{lib} ); +use MIP::Constants qw{ $COMMA $SPACE }; +use MIP::Test::Fixtures qw{ test_log test_mip_hashes }; + +BEGIN { + + use MIP::Test::Fixtures qw{ test_import }; + +### Check all internal dependency modules and imports +## Modules with import + my %perl_module = ( + q{MIP::Recipes::Download::Gnomad} => [qw{ download_gnomad_mt }], + q{MIP::Test::Fixtures} => [qw{ test_log test_mip_hashes }], + ); + + test_import( { perl_module_href => \%perl_module, } ); +} + +use MIP::Recipes::Download::Gnomad qw{ download_gnomad_mt }; + +diag( q{Test download_gnomad_mt from Gnomad.pm} + . $COMMA + . $SPACE . q{Perl} + . $SPACE + . $PERL_VERSION + . $SPACE + . $EXECUTABLE_NAME ); + +my $test_dir = File::Temp->newdir(); +my $file_path = catfile( $test_dir, q{recipe_script.sh} ); +test_log( { log_name => uc q{mip_download}, no_screen => 1, } ); + +## Given download parameters for recipe +my $genome_version = q{grch38}; +my $recipe_name = q{gnomad_mt}; +my $reference_version = q{r3.1}; +my $slurm_mock_cmd = catfile( $Bin, qw{ data modules slurm-mock.pl } ); + +my %active_parameter = test_mip_hashes( + { + mip_hash_name => q{download_active_parameter}, + } +); +$active_parameter{$recipe_name} = 1; +$active_parameter{project_id} = q{test}; +$active_parameter{reference_dir} = catfile($test_dir); +$active_parameter{recipe_core_number}{$recipe_name} = 1; +$active_parameter{recipe_time}{$recipe_name} = 1; +my $reference_href = + $active_parameter{reference_feature}{$recipe_name}{$genome_version}{$reference_version}; + +my %job_id; + +my $is_ok = download_gnomad_mt( + { + active_parameter_href => \%active_parameter, + genome_version => $genome_version, + job_id_href => \%job_id, + profile_base_command => $slurm_mock_cmd, + recipe_name => $recipe_name, + reference_href => $reference_href, + reference_version => $reference_version, + temp_directory => catfile($test_dir), + } +); + +## Then +ok( $is_ok, q{ Executed download recipe } . $recipe_name ); + +done_testing(); diff --git a/templates/mip_download_rd_dna_config_-1.0-.yaml b/templates/mip_download_rd_dna_config_-1.0-.yaml index 98946a151..7c404816c 100644 --- a/templates/mip_download_rd_dna_config_-1.0-.yaml +++ b/templates/mip_download_rd_dna_config_-1.0-.yaml @@ -726,8 +726,8 @@ reference_feature: r3.1: file: gnomad.genomes.v3.1.sites.chrM.vcf.bgz file_index: gnomad.genomes.v3.1.sites.chrM.vcf.bgz.tbi - outfile: gnomad_genomes_mt_-r3.1-.bgz - outfile_index: gnomad_genomes_mt_-r3.1-.bgz.tbi + outfile: gnomad_genomes_mt_-r3.1-.vcf.gz + outfile_index: gnomad_genomes_mt_-r3.1-.vcf.gz.tbi url_prefix: https://storage.googleapis.com/gnomad-public/release/3.1/vcf/genomes/ gnomad_pli_per_gene: grch37: From b860ee2fce54e6f83d76207b16513e22ccc90cb1 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 22 Jun 2021 10:42:40 +0200 Subject: [PATCH 018/116] increase recipe time for fusion_report --- CHANGELOG.md | 3 ++- definitions/rd_rna_parameters.yaml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6603f9782..bd162e612 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [10.0.2] -- Glnexus are used regardless of how many samples that are analysed. +- Glnexus are used to genotype the gvcf regardless of how many samples that are analysed. +- Resource bump to the MIP RNA recipe fusion_report ## [10.0.1] diff --git a/definitions/rd_rna_parameters.yaml b/definitions/rd_rna_parameters.yaml index 2c8ba5ed3..ee7be7a46 100755 --- a/definitions/rd_rna_parameters.yaml +++ b/definitions/rd_rna_parameters.yaml @@ -212,7 +212,7 @@ recipe_core_number: build_sj_tracks: 1 dna_vcf_reformat: 1 fastqc_ar: 0 - fusion_report: 1 + fusion_report: 2 gatk_asereadcounter: 1 gatk_baserecalibration: 13 gatk_haplotypecaller: 13 From cead0b753004779314b30ccd342728c55d23c0d6 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 23 Jun 2021 17:09:23 +0200 Subject: [PATCH 019/116] postprocess mt file for grch37 --- lib/MIP/Recipes/Download/Gnomad.pm | 90 +++++++++++++++++++ .../test_data/download_active_parameters.yaml | 12 ++- t/download_gnomad_mt.t | 2 +- .../mip_download_rd_dna_config_-1.0-.yaml | 11 ++- 4 files changed, 109 insertions(+), 6 deletions(-) diff --git a/lib/MIP/Recipes/Download/Gnomad.pm b/lib/MIP/Recipes/Download/Gnomad.pm index ec18e2062..92e028935 100644 --- a/lib/MIP/Recipes/Download/Gnomad.pm +++ b/lib/MIP/Recipes/Download/Gnomad.pm @@ -414,6 +414,16 @@ sub download_gnomad_mt { } ); + if ( $genome_version eq q{grch37} ) { + + _reformat_for_grch37( + { + filehandle => $filehandle, + infile_path => catfile( $reference_dir, $reference_href->{outfile} ) + } + ); + } + ## Close filehandleS close $filehandle or $log->logcroak(q{Could not close filehandle}); @@ -968,4 +978,84 @@ sub _annotate_and_calculate_afpopmax { return; } +sub _reformat_for_grch37 { + +## Function : Rename chrM to MT for grch37 +## Returns : +## Arguments: $filehandle => Filehandle +## : $infile_path => Path to reformatted file + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $filehandle; + my $infile_path; + + my $tmpl = { + filehandle => { + required => 1, + store => \$filehandle, + }, + infile_path => { + defined => 1, + required => 1, + store => \$infile_path, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + use MIP::Program::Gnu::Coreutils qw{ gnu_mv }; + use MIP::Language::Perl qw{ perl_nae_oneliners }; + use MIP::Program::Bcftools qw{ bcftools_view }; + use MIP::Program::Htslib qw{ htslib_bgzip htslib_tabix }; + + bcftools_view( + { + filehandle => $filehandle, + infile_path => $infile_path, + } + ); + print {$filehandle} $PIPE . $SPACE; + + perl_nae_oneliners( + { + filehandle => $filehandle, + oneliner_name => q{synonyms_grch38_to_grch37}, + } + ); + print {$filehandle} $PIPE . $SPACE; + + my $temp_outfile_path = $infile_path . q{_temp.vcf.gz}; + htslib_bgzip( + { + filehandle => $filehandle, + write_to_stdout => 1, + stdoutfile_path => $temp_outfile_path, + } + ); + say {$filehandle} $NEWLINE; + + gnu_mv( + { + filehandle => $filehandle, + infile_path => $temp_outfile_path, + outfile_path => $infile_path, + } + ); + say {$filehandle} $NEWLINE; + + htslib_tabix( + { + filehandle => $filehandle, + preset => q{vcf}, + infile_path => $infile_path, + } + ); + say {$filehandle} $NEWLINE; + + return; +} + 1; diff --git a/t/data/test_data/download_active_parameters.yaml b/t/data/test_data/download_active_parameters.yaml index 520b5786d..7e7dfdb2b 100644 --- a/t/data/test_data/download_active_parameters.yaml +++ b/t/data/test_data/download_active_parameters.yaml @@ -573,13 +573,19 @@ reference_feature: outfile_index: grch38_gnomad_reformated_-r3.1.1-.vcf.gz.tbi url_prefix: https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1.1/vcf/genomes/ gnomad_mt: - grch37: {} + grch37: + r3.1: + file: gnomad.genomes.v3.1.sites.chrM.vcf.bgz + file_index: gnomad.genomes.v3.1.sites.chrM.vcf.bgz.tbi + outfile: grch37_gnomad_genomes_mt_-r3.1-.vcf.gz + outfile_index: grch37_gnomad_genomes_mt_-r3.1-.vcf.gz.tbi + url_prefix: https://storage.googleapis.com/gnomad-public/release/3.1/vcf/genomes/ grch38: r3.1: file: gnomad.genomes.v3.1.sites.chrM.vcf.bgz file_index: gnomad.genomes.v3.1.sites.chrM.vcf.bgz.tbi - outfile: gnomad_genomes_mt_-r3.1-.vcf.gz - outfile_index: gnomad_genomes_mt_-r3.1-.vcf.gz.tbi + outfile: grch38_gnomad_genomes_mt_-r3.1-.vcf.gz + outfile_index: grch38_gnomad_genomes_mt_-r3.1-.vcf.gz.tbi url_prefix: https://storage.googleapis.com/gnomad-public/release/3.1/vcf/genomes/ gnomad_pli_per_gene: grch37: diff --git a/t/download_gnomad_mt.t b/t/download_gnomad_mt.t index f671edc88..408cdf5a6 100644 --- a/t/download_gnomad_mt.t +++ b/t/download_gnomad_mt.t @@ -53,7 +53,7 @@ my $file_path = catfile( $test_dir, q{recipe_script.sh} ); test_log( { log_name => uc q{mip_download}, no_screen => 1, } ); ## Given download parameters for recipe -my $genome_version = q{grch38}; +my $genome_version = q{grch37}; my $recipe_name = q{gnomad_mt}; my $reference_version = q{r3.1}; my $slurm_mock_cmd = catfile( $Bin, qw{ data modules slurm-mock.pl } ); diff --git a/templates/mip_download_rd_dna_config_-1.0-.yaml b/templates/mip_download_rd_dna_config_-1.0-.yaml index 7c404816c..c37e412b7 100644 --- a/templates/mip_download_rd_dna_config_-1.0-.yaml +++ b/templates/mip_download_rd_dna_config_-1.0-.yaml @@ -722,12 +722,19 @@ reference_feature: outfile_index: grch38_gnomad_reformated_-r3.1.1-.vcf.gz.tbi url_prefix: https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1.1/vcf/genomes/ gnomad_mt: + grch37: + r3.1: + file: gnomad.genomes.v3.1.sites.chrM.vcf.bgz + file_index: gnomad.genomes.v3.1.sites.chrM.vcf.bgz.tbi + outfile: grch37_gnomad_genomes_mt_-r3.1-.vcf.gz + outfile_index: grch37_gnomad_genomes_mt_-r3.1-.vcf.gz.tbi + url_prefix: https://storage.googleapis.com/gnomad-public/release/3.1/vcf/genomes/ grch38: r3.1: file: gnomad.genomes.v3.1.sites.chrM.vcf.bgz file_index: gnomad.genomes.v3.1.sites.chrM.vcf.bgz.tbi - outfile: gnomad_genomes_mt_-r3.1-.vcf.gz - outfile_index: gnomad_genomes_mt_-r3.1-.vcf.gz.tbi + outfile: grch38_gnomad_genomes_mt_-r3.1-.vcf.gz + outfile_index: grch38_gnomad_genomes_mt_-r3.1-.vcf.gz.tbi url_prefix: https://storage.googleapis.com/gnomad-public/release/3.1/vcf/genomes/ gnomad_pli_per_gene: grch37: From 177b5682a1fdd7a5b6c7e10d8e9e59ffc1f97388 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 29 Jun 2021 16:28:01 +0200 Subject: [PATCH 020/116] sorting AB and aligning fat commas --- lib/MIP/Recipes/Download/Gnomad.pm | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/MIP/Recipes/Download/Gnomad.pm b/lib/MIP/Recipes/Download/Gnomad.pm index 92e028935..f4c222c39 100644 --- a/lib/MIP/Recipes/Download/Gnomad.pm +++ b/lib/MIP/Recipes/Download/Gnomad.pm @@ -354,14 +354,12 @@ sub download_gnomad_mt { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; use MIP::Processmanagement::Slurm_processes qw{ slurm_submit_job_no_dependency_dead_end }; - use MIP::Recipes::Download::Get_reference qw{ get_reference }; use MIP::Recipe qw{ parse_recipe_prerequisites }; + use MIP::Recipes::Download::Get_reference qw{ get_reference }; use MIP::Script::Setup_script qw{ setup_script }; ### PREPROCESSING: - ### PREPROCESSING: - ## Retrieve logger object my $log = Log::Log4perl->get_logger( uc q{mip_download} ); @@ -982,8 +980,8 @@ sub _reformat_for_grch37 { ## Function : Rename chrM to MT for grch37 ## Returns : -## Arguments: $filehandle => Filehandle -## : $infile_path => Path to reformatted file +## Arguments: $filehandle => Filehandle +## : $infile_path => Path to reformatted file my ($arg_href) = @_; @@ -1006,9 +1004,9 @@ sub _reformat_for_grch37 { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; - use MIP::Program::Gnu::Coreutils qw{ gnu_mv }; use MIP::Language::Perl qw{ perl_nae_oneliners }; use MIP::Program::Bcftools qw{ bcftools_view }; + use MIP::Program::Gnu::Coreutils qw{ gnu_mv }; use MIP::Program::Htslib qw{ htslib_bgzip htslib_tabix }; bcftools_view( @@ -1031,8 +1029,8 @@ sub _reformat_for_grch37 { htslib_bgzip( { filehandle => $filehandle, - write_to_stdout => 1, stdoutfile_path => $temp_outfile_path, + write_to_stdout => 1, } ); say {$filehandle} $NEWLINE; @@ -1049,8 +1047,8 @@ sub _reformat_for_grch37 { htslib_tabix( { filehandle => $filehandle, - preset => q{vcf}, infile_path => $infile_path, + preset => q{vcf}, } ); say {$filehandle} $NEWLINE; From ddc795a62c11de0c2987afbc557ee47bdd60bbde Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Wed, 30 Jun 2021 11:44:43 +0200 Subject: [PATCH 021/116] testing suite COMPLETED --- lib/MIP/Recipes/Analysis/Mt_annotation.pm | 20 ++-- t/analysis_mt_annotation.t | 111 ++++++++++++++++++++++ 2 files changed, 121 insertions(+), 10 deletions(-) create mode 100644 t/analysis_mt_annotation.t diff --git a/lib/MIP/Recipes/Analysis/Mt_annotation.pm b/lib/MIP/Recipes/Analysis/Mt_annotation.pm index a34473ae3..73c88df83 100644 --- a/lib/MIP/Recipes/Analysis/Mt_annotation.pm +++ b/lib/MIP/Recipes/Analysis/Mt_annotation.pm @@ -234,7 +234,7 @@ sub analysis_mt_annotation { } ); - print {$filehandle} $NEWLINE; + say {$filehandle} $NEWLINE; } else { @@ -254,12 +254,12 @@ sub analysis_mt_annotation { outfile_path => $outfile_path{$contig} . q{.tbi}, } ); - print {$filehandle} $NEWLINE; + say {$filehandle} $NEWLINE; } } - ## Close filehandleS + ## Close filehandle close $filehandle or $log->logcroak(q{Could not close filehandle}); if ( $recipe{mode} == 1 ) { @@ -275,13 +275,13 @@ sub analysis_mt_annotation { submit_recipe( { - base_command => $profile_base_command, - case_id => $case_id, - dependency_method => q{sample_to_case}, - job_id_chain => $recipe{job_id_chain}, - job_id_href => $job_id_href, - job_reservation_name => $active_parameter_href->{job_reservation_name}, - log => $log, + base_command => $profile_base_command, + case_id => $case_id, + dependency_method => q{sample_to_case}, + job_id_chain => $recipe{job_id_chain}, + job_id_href => $job_id_href, + job_reservation_name => $active_parameter_href->{job_reservation_name}, + log => $log, max_parallel_processes_count_href => $file_info_href->{max_parallel_processes_count}, recipe_file_path => $recipe_file_path, diff --git a/t/analysis_mt_annotation.t b/t/analysis_mt_annotation.t new file mode 100644 index 000000000..53584c7d3 --- /dev/null +++ b/t/analysis_mt_annotation.t @@ -0,0 +1,111 @@ +#!/usr/bin/env perl + +use 5.026; +use Carp; +use charnames qw{ :full :short }; +use English qw{ -no_match_vars }; +use File::Basename qw{ dirname }; +use File::Spec::Functions qw{ catdir catfile }; +use FindBin qw{ $Bin }; +use open qw{ :encoding(UTF-8) :std }; +use Params::Check qw{ allow check last_error }; +use Test::More; +use utf8; +use warnings qw{ FATAL utf8 }; + +## CPANM +use autodie qw { :all }; +use Modern::Perl qw{ 2018 }; +use Test::Trap; + +## MIPs lib/ +use lib catdir( dirname($Bin), q{lib} ); +use MIP::Constants qw{ $COLON $COMMA $SPACE }; +use MIP::Test::Fixtures qw{ test_add_io_for_recipe test_log test_mip_hashes }; + +BEGIN { + + use MIP::Test::Fixtures qw{ test_import }; + +### Check all internal dependency modules and imports +## Modules with import + my %perl_module = ( + q{MIP::Recipes::Analysis::Mt_annotation} => [qw{ analysis_mt_annotation }], + q{MIP::Test::Fixtures} => [qw{ test_add_io_for_recipe test_log test_mip_hashes }], + ); + + test_import( { perl_module_href => \%perl_module, } ); +} + +use MIP::Recipes::Analysis::Mt_annotation qw{ analysis_mt_annotation }; + +diag( q{Test analysis_mt_annotation from Mt_annotation.pm} + . $COMMA + . $SPACE . q{Perl} + . $SPACE + . $PERL_VERSION + . $SPACE + . $EXECUTABLE_NAME ); + +test_log( { log_name => q{MIP}, no_screen => 1, } ); + +## Given analysis parameters +my $recipe_name = q{mt_annotation}; +my $slurm_mock_cmd = catfile( $Bin, qw{ data modules slurm-mock.pl } ); + +my %active_parameter = test_mip_hashes( + { + mip_hash_name => q{active_parameter}, + recipe_name => $recipe_name, + } +); +$active_parameter{$recipe_name} = 1; +$active_parameter{recipe_core_number}{$recipe_name} = 1; +$active_parameter{recipe_time}{$recipe_name} = 1; +my $case_id = $active_parameter{case_id}; + +my %file_info = test_mip_hashes( + { + mip_hash_name => q{file_info}, + recipe_name => $recipe_name, + } +); + +my %job_id; +my %parameter = test_mip_hashes( + { + mip_hash_name => q{recipe_parameter}, + recipe_name => $recipe_name, + } +); +test_add_io_for_recipe( + { + file_info_href => \%file_info, + id => $case_id, + parameter_href => \%parameter, + recipe_name => $recipe_name, + step => q{vcf}, + } +); + +$parameter{$recipe_name}{outfile_suffix} .= q{.gz}; + +my %sample_info; + +my $is_ok = analysis_mt_annotation( + { + active_parameter_href => \%active_parameter, + case_id => $case_id, + file_info_href => \%file_info, + job_id_href => \%job_id, + parameter_href => \%parameter, + profile_base_command => $slurm_mock_cmd, + recipe_name => $recipe_name, + sample_info_href => \%sample_info, + } +); + +## Then return TRUE +ok( $is_ok, q{ Executed analysis recipe } . $recipe_name ); + +done_testing(); From 27b031e579cba8ce9ad5c94755243fecaeef8df4 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Wed, 30 Jun 2021 11:48:54 +0200 Subject: [PATCH 022/116] maybe this fixes the docker label fail --- containers/hmtnote/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/hmtnote/Dockerfile b/containers/hmtnote/Dockerfile index dbac2ce6a..0d4f9e4e5 100644 --- a/containers/hmtnote/Dockerfile +++ b/containers/hmtnote/Dockerfile @@ -4,7 +4,7 @@ FROM clinicalgenomics/mip_base:2.1 ################## METADATA ###################### -LABEL base_image="clinicalgenomics/mip_base:2.1" +LABEL base_image.label="clinicalgenomics/mip_base:2.1" LABEL version="1" LABEL software="HmtNote" LABEL software.version="0.7.2" From 140d3afa11db338b5cdc1bc7675bececb0325712 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Wed, 30 Jun 2021 11:50:00 +0200 Subject: [PATCH 023/116] maybe this fixes the docker label fail-try 2 --- containers/hmtnote/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/hmtnote/Dockerfile b/containers/hmtnote/Dockerfile index 0d4f9e4e5..dbac2ce6a 100644 --- a/containers/hmtnote/Dockerfile +++ b/containers/hmtnote/Dockerfile @@ -4,7 +4,7 @@ FROM clinicalgenomics/mip_base:2.1 ################## METADATA ###################### -LABEL base_image.label="clinicalgenomics/mip_base:2.1" +LABEL base_image="clinicalgenomics/mip_base:2.1" LABEL version="1" LABEL software="HmtNote" LABEL software.version="0.7.2" From 86c7ee06d3c962adf203a05de84f90afb42ab95c Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Wed, 30 Jun 2021 11:56:07 +0200 Subject: [PATCH 024/116] updated changelog to include new module --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d22ba5785..439a9dca1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [Develop] +- HmtNote: annotate mitochondrial variants in VCF file + +### Tools + +HmtNote: 0.7.2 + ### References - gnomad: r3.0 -> r3.1.1 From 16704a6294724242b1829e4386ea3e775382d23a Mon Sep 17 00:00:00 2001 From: Anders Jemt Date: Wed, 30 Jun 2021 12:00:44 +0200 Subject: [PATCH 025/116] Update Dockerfile Green check marks are just so much nicer than red crosses --- containers/hmtnote/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/hmtnote/Dockerfile b/containers/hmtnote/Dockerfile index dbac2ce6a..ebade96d7 100644 --- a/containers/hmtnote/Dockerfile +++ b/containers/hmtnote/Dockerfile @@ -4,7 +4,7 @@ FROM clinicalgenomics/mip_base:2.1 ################## METADATA ###################### -LABEL base_image="clinicalgenomics/mip_base:2.1" +LABEL base-image="clinicalgenomics/mip_base:2.1" LABEL version="1" LABEL software="HmtNote" LABEL software.version="0.7.2" From 0de9ea0f7ad3447a079d1f3dcb7338d784676dfb Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 30 Jun 2021 16:32:34 +0200 Subject: [PATCH 026/116] set rna resources via cli --- definitions/analyse_parameters.yaml | 20 ++++++++++++++++- definitions/rd_dna_panel_parameters.yaml | 18 --------------- definitions/rd_dna_parameters.yaml | 18 --------------- lib/MIP/Cli/Mip/Analyse.pm | 28 ++++++++++++++++++++++++ lib/MIP/Cli/Mip/Analyse/Rd_dna.pm | 27 ----------------------- lib/MIP/Cli/Mip/Analyse/Rd_dna_panel.pm | 26 ---------------------- 6 files changed, 47 insertions(+), 90 deletions(-) diff --git a/definitions/analyse_parameters.yaml b/definitions/analyse_parameters.yaml index 38aa296d4..ce6e3e545 100644 --- a/definitions/analyse_parameters.yaml +++ b/definitions/analyse_parameters.yaml @@ -168,6 +168,12 @@ project_id: - mip data_type: SCALAR type: mip +recipe_bind_path: + associated_recipe: + - mip + data_type: HASH + mandatory: no + type: mip reference_dir: associated_recipe: - mip @@ -192,7 +198,19 @@ sample_info_file: data_type: SCALAR type: path update_path: absolute_path -recipe_bind_path: +set_recipe_core_number: + associated_recipe: + - mip + data_type: HASH + mandatory: no + type: mip +set_recipe_memory: + associated_recipe: + - mip + data_type: HASH + mandatory: no + type: mip +set_recipe_time: associated_recipe: - mip data_type: HASH diff --git a/definitions/rd_dna_panel_parameters.yaml b/definitions/rd_dna_panel_parameters.yaml index d9f1c9d38..6b36a0b55 100755 --- a/definitions/rd_dna_panel_parameters.yaml +++ b/definitions/rd_dna_panel_parameters.yaml @@ -153,12 +153,6 @@ recipe_core_number: vcfparser_ar: 1 version_collect_ar: 1 type: mip -set_recipe_core_number: - associated_recipe: - - mip - data_type: HASH - mandatory: no - type: mip ## Memory per parallel process/core recipe_memory: associated_recipe: @@ -180,12 +174,6 @@ recipe_memory: vcfparser_ar: 7 version_collect_ar: 8 type: mip -set_recipe_memory: - associated_recipe: - - mip - data_type: HASH - mandatory: no - type: mip recipe_time: associated_recipe: - mip @@ -224,12 +212,6 @@ recipe_time: vcfparser_ar: 1 version_collect_ar: 1 type: mip -set_recipe_time: - associated_recipe: - - mip - data_type: HASH - mandatory: no - type: mip infile_dirs: associated_recipe: - mip diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index 3e8d08353..d3cdc2cea 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -203,12 +203,6 @@ recipe_core_number: version_collect_ar: 1 vcf2cytosure_ar: 0 type: mip -set_recipe_core_number: - associated_recipe: - - mip - data_type: HASH - mandatory: no - type: mip ## GPU number recipe_gpu_number: associated_recipe: @@ -262,12 +256,6 @@ recipe_memory: vcf2cytosure_ar: 6 version_collect_ar: 8 type: mip -set_recipe_memory: - associated_recipe: - - mip - data_type: HASH - mandatory: no - type: mip recipe_time: associated_recipe: - mip @@ -338,12 +326,6 @@ recipe_time: vcfparser_ar: 5 version_collect_ar: 1 type: mip -set_recipe_time: - associated_recipe: - - mip - data_type: HASH - mandatory: no - type: mip infile_dirs: associated_recipe: - mip diff --git a/lib/MIP/Cli/Mip/Analyse.pm b/lib/MIP/Cli/Mip/Analyse.pm index ffa001059..50da33766 100644 --- a/lib/MIP/Cli/Mip/Analyse.pm +++ b/lib/MIP/Cli/Mip/Analyse.pm @@ -311,6 +311,34 @@ q{Default: jobid, jobname%50, account, partition, alloccpus, TotalCPU, elapsed, ) ); + option( + q{set_recipe_core_number} => ( + cmd_tags => [q{recipe_name=X(cores)}], + documentation => q{Set the number of cores for specific recipe(s)}, + is => q{rw}, + isa => HashRef, + ) + ); + + option( + q{set_recipe_memory} => ( + cmd_aliases => [qw{ srm }], + cmd_tags => [q{recipe_name=X(G)}], + documentation => q{Set the memory for specific recipe(s)}, + is => q{rw}, + isa => HashRef, + ) + ); + + option( + q{set_recipe_time} => ( + cmd_tags => [q{recipe_name=time(hours)}], + documentation => q{Set the time allocation for specific recipe(s)}, + is => q{rw}, + isa => HashRef, + ) + ); + option( q{slurm_quality_of_service} => ( cmd_aliases => [qw{ qos }], diff --git a/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm b/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm index ca832fef2..37957a4a6 100644 --- a/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm +++ b/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm @@ -203,15 +203,6 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v ) ); - option( - q{set_recipe_core_number} => ( - cmd_tags => [q{recipe_name=X(cores)}], - documentation => q{Set the number of cores for specific recipe(s)}, - is => q{rw}, - isa => HashRef, - ) - ); - has( q{recipe_gpu_number} => ( cmd_tags => [q{recipe_name=X(gpus)}], @@ -248,16 +239,6 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v ) ); - option( - q{set_recipe_memory} => ( - cmd_aliases => [qw{ srm }], - cmd_tags => [q{recipe_name=X(G)}], - documentation => q{Set the memory for specific recipe(s)}, - is => q{rw}, - isa => HashRef, - ) - ); - has( q{recipe_time} => ( cmd_tags => [q{recipe_name=time(hours)}], @@ -267,14 +248,6 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v ) ); - option( - q{set_recipe_time} => ( - cmd_tags => [q{recipe_name=time(hours)}], - documentation => q{Set the time allocation for specific recipe(s)}, - is => q{rw}, - isa => HashRef, - ) - ); option( q{infile_dirs} => ( cmd_aliases => [qw{ ifd }], diff --git a/lib/MIP/Cli/Mip/Analyse/Rd_dna_panel.pm b/lib/MIP/Cli/Mip/Analyse/Rd_dna_panel.pm index 3bc7d45d1..d0a38a4cc 100644 --- a/lib/MIP/Cli/Mip/Analyse/Rd_dna_panel.pm +++ b/lib/MIP/Cli/Mip/Analyse/Rd_dna_panel.pm @@ -206,15 +206,6 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v ) ); - option( - q{set_recipe_core_number} => ( - cmd_tags => [q{recipe_name=X(cores)}], - documentation => q{Set the number of cores for specific recipe(s)}, - is => q{rw}, - isa => HashRef, - ) - ); - has( q{recipe_memory} => ( cmd_tags => [q{recipe_name=X(G)}], @@ -224,15 +215,6 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v ) ); - option( - q{set_recipe_memory} => ( - cmd_tags => [q{recipe_name=X(G)}], - documentation => q{Set the memory for specific recipe(s)}, - is => q{rw}, - isa => HashRef, - ) - ); - has( q{recipe_time} => ( cmd_tags => [q{recipe_name=time(hours)}], @@ -242,14 +224,6 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v ) ); - option( - q{set_recipe_time} => ( - cmd_tags => [q{recipe_name=time(hours)}], - documentation => q{Set the time allocation for specific recipe(s)}, - is => q{rw}, - isa => HashRef, - ) - ); option( q{infile_dirs} => ( cmd_aliases => [qw{ ifd }], From 86cda9c7114f1da04d177a2e9f0f59ffdc16562c Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 30 Jun 2021 17:25:33 +0200 Subject: [PATCH 027/116] fixing download paths --- templates/mip_download_rd_dna_config_-1.0-.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/templates/mip_download_rd_dna_config_-1.0-.yaml b/templates/mip_download_rd_dna_config_-1.0-.yaml index c37e412b7..45b794dd0 100644 --- a/templates/mip_download_rd_dna_config_-1.0-.yaml +++ b/templates/mip_download_rd_dna_config_-1.0-.yaml @@ -728,14 +728,14 @@ reference_feature: file_index: gnomad.genomes.v3.1.sites.chrM.vcf.bgz.tbi outfile: grch37_gnomad_genomes_mt_-r3.1-.vcf.gz outfile_index: grch37_gnomad_genomes_mt_-r3.1-.vcf.gz.tbi - url_prefix: https://storage.googleapis.com/gnomad-public/release/3.1/vcf/genomes/ + url_prefix: https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1/vcf/genomes/ grch38: r3.1: file: gnomad.genomes.v3.1.sites.chrM.vcf.bgz file_index: gnomad.genomes.v3.1.sites.chrM.vcf.bgz.tbi outfile: grch38_gnomad_genomes_mt_-r3.1-.vcf.gz outfile_index: grch38_gnomad_genomes_mt_-r3.1-.vcf.gz.tbi - url_prefix: https://storage.googleapis.com/gnomad-public/release/3.1/vcf/genomes/ + url_prefix: https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1/vcf/genomes/ gnomad_pli_per_gene: grch37: r2.1.1: @@ -860,10 +860,10 @@ reference_feature: outfile_check_method: md5sum url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ v1.4: - file: grch37_sv_frequency_vcfanno_filter_config_-v1.4-.toml - file_check: grch37_sv_frequency_vcfanno_filter_config_-v1.4-.toml.md5 - outfile: grch37_sv_frequency_vcfanno_filter_config_-v1.4-.toml - outfile_check: grch37_sv_frequency_vcfanno_filter_config_-v1.4-.toml.md5 + file: grch37_sv_vcfanno_config_-v1.4-.toml + file_check: grch37_sv_vcfanno_config_-v1.4-.toml.md5 + outfile: grch37_sv_vcfanno_config_-v1.4-.toml + outfile_check: grch37_sv_vcfanno_config_-v1.4-.toml.md5 outfile_check_method: md5sum url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ grch38: From c334c45a6421c9683797e2a8095f64c51c9b419e Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Thu, 1 Jul 2021 14:46:58 +0200 Subject: [PATCH 028/116] Update lib/MIP/Program/HmtNote.pm update format Co-authored-by: Anders Jemt --- lib/MIP/Program/HmtNote.pm | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/MIP/Program/HmtNote.pm b/lib/MIP/Program/HmtNote.pm index 32637720c..be6ab52b1 100644 --- a/lib/MIP/Program/HmtNote.pm +++ b/lib/MIP/Program/HmtNote.pm @@ -56,8 +56,6 @@ sub hmtnote_annotate { my $stdinfile_path; my $stdoutfile_path; - ## Default(s) - my $tmpl = { filehandle => { store => \$filehandle, From 6e8aa2c44004e996d498faf7809c9aabe8c46dcb Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Thu, 1 Jul 2021 14:48:43 +0200 Subject: [PATCH 029/116] Update lib/MIP/Recipes/Analysis/Mt_annotation.pm Co-authored-by: Anders Jemt --- lib/MIP/Recipes/Analysis/Mt_annotation.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/MIP/Recipes/Analysis/Mt_annotation.pm b/lib/MIP/Recipes/Analysis/Mt_annotation.pm index 73c88df83..ae4979a1c 100644 --- a/lib/MIP/Recipes/Analysis/Mt_annotation.pm +++ b/lib/MIP/Recipes/Analysis/Mt_annotation.pm @@ -24,7 +24,6 @@ BEGIN { require Exporter; use base qw{ Exporter }; - # Set the version for version checking # Functions and variables which can be optionally exported our @EXPORT_OK = qw{ analysis_mt_annotation }; From 193fbaaa2fef3484f139a5103fb7eac7d032529c Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 23 Aug 2021 18:44:35 +0200 Subject: [PATCH 030/116] Add mitodel recipe (#1933) * Add mitodel recipe * Update test * Update tag * Update with suggestions from code review * Refactor * Remove insert size defaults for samtools stat * Remove insert size defaults for samtools stats * Replace contig iterators with infile name prefix * Write scripts and outputs under sample directory * Update test for coverage --- definitions/rd_dna_initiation_map.yaml | 2 + definitions/rd_dna_parameters.yaml | 14 + lib/MIP/Program/Samtools.pm | 63 ++--- lib/MIP/Recipes/Analysis/Mitodel.pm | 299 +++++++++++++++++++++ lib/MIP/Recipes/Pipeline/Analyse_rd_dna.pm | 4 +- t/analysis_mitodel.t | 126 +++++++++ t/samtools_stats.t | 9 +- 7 files changed, 482 insertions(+), 35 deletions(-) create mode 100644 lib/MIP/Recipes/Analysis/Mitodel.pm create mode 100644 t/analysis_mitodel.t diff --git a/definitions/rd_dna_initiation_map.yaml b/definitions/rd_dna_initiation_map.yaml index 1e4bea00e..91e57f7a3 100644 --- a/definitions/rd_dna_initiation_map.yaml +++ b/definitions/rd_dna_initiation_map.yaml @@ -18,6 +18,8 @@ CHAIN_ALL: - CHAIN_TCOV: - tiddit_coverage - chromograph_cov + - CHAIN_MTDEL: + - mitodel - CHAIN_SBCOV: - sambamba_depth - CHAIN_SMN: diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index c8a08c25b..d92fc3ff9 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -175,6 +175,7 @@ recipe_core_number: gzip_fastq: 0 manta: 36 markduplicates: 13 + mitodel: 1 mt_annotation: 1 multiqc_ar: 1 peddy_ar: 4 @@ -240,6 +241,7 @@ recipe_memory: gatk_variantrecalibration: 30 glnexus_merge: 10 markduplicates: 10 + mitodel: 2 mt_annotation: 2 picardtools_collecthsmetrics: 8 picardtools_collectmultiplemetrics: 8 @@ -295,6 +297,7 @@ recipe_time: gzip_fastq: 2 manta: 30 markduplicates: 20 + mitodel: 2 mt_annotation: 1 multiqc_ar: 5 peddy_ar: 1 @@ -1533,6 +1536,17 @@ bcftools_core: - bgzip - tabix type: recipe +mitodel: + analysis_mode: sample + associated_recipe: + - mip + data_type: SCALAR + default: 1 + outfile_suffix: ".txt" + file_tag: _mitodel + program_executables: + - samtools + type: recipe mt_annotation: analysis_mode: case associated_recipe: diff --git a/lib/MIP/Program/Samtools.pm b/lib/MIP/Program/Samtools.pm index 956198307..855218def 100644 --- a/lib/MIP/Program/Samtools.pm +++ b/lib/MIP/Program/Samtools.pm @@ -325,11 +325,10 @@ sub samtools_faidx { store => \$infile_path, strict_type => 1, }, - outfile_path => { store => \$outfile_path, strict_type => 1, }, - regions_ref => { default => [], store => \$regions_ref, strict_type => 1, }, - stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, - stderrfile_path_append => - { store => \$stderrfile_path_append, strict_type => 1, }, + outfile_path => { store => \$outfile_path, strict_type => 1, }, + regions_ref => { default => [], store => \$regions_ref, strict_type => 1, }, + stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, + stderrfile_path_append => { store => \$stderrfile_path_append, strict_type => 1, }, }; check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; @@ -412,10 +411,8 @@ sub samtools_flagstat { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; - my @commands = ( - get_executable_base_command( { base_command => $BASE_COMMAND, } ), - qw{ flagstat } - ); + my @commands = + ( get_executable_base_command( { base_command => $BASE_COMMAND, } ), qw{ flagstat } ); push @commands, $infile_path; @@ -484,10 +481,8 @@ sub samtools_idxstats { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; - my @commands = ( - get_executable_base_command( { base_command => $BASE_COMMAND, } ), - qw{ idxstats } - ); + my @commands = + ( get_executable_base_command( { base_command => $BASE_COMMAND, } ), qw{ idxstats } ); push @commands, $infile_path; @@ -541,10 +536,9 @@ sub samtools_index { store => \$infile_path, strict_type => 1, }, - stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, - stderrfile_path_append => - { store => \$stderrfile_path_append, strict_type => 1, }, - stdoutfile_path => { store => \$stdoutfile_path, strict_type => 1, }, + stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, + stderrfile_path_append => { store => \$stderrfile_path_append, strict_type => 1, }, + stdoutfile_path => { store => \$stdoutfile_path, strict_type => 1, }, }; check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; @@ -643,10 +637,9 @@ sub samtools_merge { store => \$region, strict_type => 1, }, - stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, - stderrfile_path_append => - { store => \$stderrfile_path_append, strict_type => 1, }, - stdoutfile_path => { + stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, + stderrfile_path_append => { store => \$stderrfile_path_append, strict_type => 1, }, + stdoutfile_path => { strict_type => 1, store => \$stdoutfile_path, }, @@ -775,10 +768,9 @@ sub samtools_sort { store => \$referencefile_path, strict_type => 1, }, - stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, - stderrfile_path_append => - { store => \$stderrfile_path_append, strict_type => 1, }, - stdoutfile_path => { + stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, + stderrfile_path_append => { store => \$stderrfile_path_append, strict_type => 1, }, + stdoutfile_path => { strict_type => 1, store => \$stdoutfile_path, }, @@ -855,6 +847,7 @@ sub samtools_stats { ## : $auto_detect_input_format => Ignored (input format is auto-detected) ## : $filehandle => Sbatch filehandle to write to ## : $infile_path => Infile path +## : $insert_size => Maximum insert size ## : $outfile_path => Outfile path ## : $regions_ref => Regions to process {REF} ## : $remove_overlap => Remove overlaps of paired-end reads from coverage and base count computations @@ -867,6 +860,7 @@ sub samtools_stats { ## Flatten argument(s) my $filehandle; my $infile_path; + my $insert_size; my $outfile_path; my $regions_ref; my $stderrfile_path; @@ -891,6 +885,11 @@ sub samtools_stats { store => \$infile_path, strict_type => 1, }, + insert_size => { + allow => qr/ ^\d+$ /sxm, + store => \$insert_size, + strict_type => 1, + }, outfile_path => { store => \$outfile_path, strict_type => 1, }, remove_overlap => { allow => [ undef, 0, 1 ], @@ -898,11 +897,10 @@ sub samtools_stats { store => \$remove_overlap, strict_type => 1, }, - regions_ref => { default => [], store => \$regions_ref, strict_type => 1, }, - stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, - stderrfile_path_append => - { store => \$stderrfile_path_append, strict_type => 1, }, - stdoutfile_path => { + regions_ref => { default => [], store => \$regions_ref, strict_type => 1, }, + stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, + stderrfile_path_append => { store => \$stderrfile_path_append, strict_type => 1, }, + stdoutfile_path => { strict_type => 1, store => \$stdoutfile_path, }, @@ -918,6 +916,11 @@ sub samtools_stats { push @commands, q{-s}; } + if ($insert_size) { + + push @commands, q{--insert-size} . $SPACE . $insert_size; + } + if ($remove_overlap) { push @commands, q{--remove-overlaps}; diff --git a/lib/MIP/Recipes/Analysis/Mitodel.pm b/lib/MIP/Recipes/Analysis/Mitodel.pm new file mode 100644 index 000000000..0729f16f3 --- /dev/null +++ b/lib/MIP/Recipes/Analysis/Mitodel.pm @@ -0,0 +1,299 @@ +package MIP::Recipes::Analysis::Mitodel; + +use 5.026; +use Carp; +use charnames qw{ :full :short }; +use English qw{ -no_match_vars }; +use File::Basename qw{ dirname }; +use File::Spec::Functions qw{ catdir catfile devnull }; +use open qw{ :encoding(UTF-8) :std }; +use Params::Check qw{ allow check last_error }; +use utf8; +use warnings; +use warnings qw{ FATAL utf8 }; + +## CPANM +use autodie qw{ :all }; +use List::MoreUtils qw{ first_value }; +use Readonly; + +## MIPs lib/ +use MIP::Constants qw{ $DASH $LOG_NAME $NEWLINE $PIPE $SPACE $UNDERSCORE }; + +BEGIN { + + require Exporter; + use base qw{ Exporter }; + + # Functions and variables which can be optionally exported + our @EXPORT_OK = qw{ analysis_mitodel }; + +} + +sub analysis_mitodel { + +## Function : Report mitochondria deletion signatures from WGS data +## Returns : +## Arguments: $active_parameter_href => Active parameters for this analysis hash {REF} +## : $file_info_href => File_info hash {REF} +## : $job_id_href => Job id hash {REF} +## : $parameter_href => Parameter hash {REF} +## : $profile_base_command => Submission profile base command +## : $recipe_name => Recipe name +## : $sample_id => Sample id +## : $sample_info_href => Info on samples and case hash {REF} + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $active_parameter_href; + my $file_info_href; + my $job_id_href; + my $parameter_href; + my $recipe_name; + my $sample_id; + my $sample_info_href; + + ## Default(s) + my $case_id; + my $profile_base_command; + + my $tmpl = { + active_parameter_href => { + default => {}, + defined => 1, + required => 1, + store => \$active_parameter_href, + strict_type => 1, + }, + case_id => { + default => $arg_href->{active_parameter_href}{case_id}, + store => \$case_id, + strict_type => 1, + }, + file_info_href => { + default => {}, + defined => 1, + required => 1, + store => \$file_info_href, + strict_type => 1, + }, + job_id_href => { + default => {}, + defined => 1, + required => 1, + store => \$job_id_href, + strict_type => 1, + }, + parameter_href => { + default => {}, + defined => 1, + required => 1, + store => \$parameter_href, + strict_type => 1, + }, + profile_base_command => { + default => q{sbatch}, + store => \$profile_base_command, + strict_type => 1, + }, + recipe_name => { + defined => 1, + required => 1, + store => \$recipe_name, + strict_type => 1, + }, + sample_id => { + defined => 1, + required => 1, + store => \$sample_id, + strict_type => 1, + }, + sample_info_href => { + default => {}, + defined => 1, + required => 1, + store => \$sample_info_href, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + use MIP::File_info qw{ get_io_files parse_io_outfiles }; + use MIP::File::Path qw { remove_file_path_suffix }; + use MIP::Language::Awk qw{ awk }; + use MIP::Program::Gnu::Software::Gnu_grep qw{ gnu_grep }; + use MIP::Program::Samtools qw{ samtools_stats }; + use MIP::Processmanagement::Processes qw{ submit_recipe }; + use MIP::Recipe qw{ parse_recipe_prerequisites }; + use MIP::Sample_info qw{ set_file_path_to_store set_recipe_outfile_in_sample_info }; + use MIP::Script::Setup_script qw{ setup_script }; + + ### PREPROCESSING: + + ## Retrieve logger object + my $log = Log::Log4perl->get_logger($LOG_NAME); + + ## Unpack parameters + ## Get the io infiles per chain and id + my %io = get_io_files( + { + id => $sample_id, + file_info_href => $file_info_href, + parameter_href => $parameter_href, + recipe_name => $recipe_name, + stream => q{in}, + } + ); + + my $infile_name_prefix = $io{in}{file_name_prefix}; + my @infile_paths = @{ $io{in}{file_paths} }; + + my $infile_path = first_value { / $infile_name_prefix [.]M|chrM /sxm } @infile_paths; + + if ( not $infile_path ) { + + $log->warn( + qq{Mitochondrial contig is not part of analysis contig set - skipping $recipe_name}); + return 1; + } + + my %recipe = parse_recipe_prerequisites( + { + active_parameter_href => $active_parameter_href, + parameter_href => $parameter_href, + recipe_name => $recipe_name, + } + ); + + ## Set and get the io files per chain, id and stream + %io = ( + %io, + parse_io_outfiles( + { + chain_id => $recipe{job_id_chain}, + id => $sample_id, + file_info_href => $file_info_href, + file_name_prefixes_ref => [$infile_name_prefix], + outdata_dir => $active_parameter_href->{outdata_dir}, + parameter_href => $parameter_href, + recipe_name => $recipe_name, + } + ) + ); + + my $outfile_path = $io{out}{file_path}; + + ## Filehandles + # Create anonymous filehandle + my $filehandle = IO::Handle->new(); + + ## Creates recipe directories (info & data & script), recipe script filenames and writes sbatch header + my ( $recipe_file_path, $recipe_info_path ) = setup_script( + { + active_parameter_href => $active_parameter_href, + core_number => $recipe{core_number}, + directory_id => $sample_id, + filehandle => $filehandle, + job_id_href => $job_id_href, + memory_allocation => $recipe{memory}, + process_time => $recipe{time}, + recipe_directory => $recipe_name, + recipe_name => $recipe_name, + } + ); + + ### SHELL: + + say {$filehandle} q{## } . $recipe_name; + + samtools_stats( + { + auto_detect_input_format => 1, + filehandle => $filehandle, + infile_path => $infile_path, + insert_size => 16_000, + remove_overlap => 0, + } + ); + print {$filehandle} $PIPE . $SPACE; + + gnu_grep( + { + filehandle => $filehandle, + infile_path => $DASH, + pattern => q{^IS}, + } + ); + print {$filehandle} $PIPE . $SPACE; + + my $awk_statement = + + # identification of read pairs that are separated by >1.2 kb but <15 kb + q?($2>=1200 && $2<=15000) {sum=sum+$3}? + + # identification of normal read pairs which are <1.2 kb but >15 kb + . q?($2<1200 || $2>15000) {sum_norm=sum_norm+$3}? + + # Add end rule + . q?END? + + # ratio of discordant to normal read pairs + . q?{print "intermediate discordant ", sum, "normal ", sum_norm, "ratio ppk", sum*1000/(sum_norm+sum)}?; + + awk( + { + filehandle => $filehandle, + statement => $awk_statement, + stdoutfile_path => $outfile_path, + + } + ); + print {$filehandle} $NEWLINE; + + ## Close filehandle + close $filehandle or $log->logcroak(q{Could not close filehandle}); + + if ( $recipe{mode} == 1 ) { + + ## Collect QC metadata info for later use + set_recipe_outfile_in_sample_info( + { + path => $outfile_path, + recipe_name => $recipe_name, + sample_info_href => $sample_info_href, + } + ); + + set_file_path_to_store( + { + format => q{meta}, + id => $sample_id, + path => $outfile_path, + recipe_name => $recipe_name, + sample_info_href => $sample_info_href, + } + ); + + submit_recipe( + { + base_command => $profile_base_command, + case_id => $case_id, + dependency_method => q{sample_to_island}, + job_id_chain => $recipe{job_id_chain}, + job_id_href => $job_id_href, + job_reservation_name => $active_parameter_href->{job_reservation_name}, + log => $log, + max_parallel_processes_count_href => + $file_info_href->{max_parallel_processes_count}, + recipe_file_path => $recipe_file_path, + sample_id => $sample_id, + submission_profile => $active_parameter_href->{submission_profile}, + } + ); + } + return 1; +} + +1; diff --git a/lib/MIP/Recipes/Pipeline/Analyse_rd_dna.pm b/lib/MIP/Recipes/Pipeline/Analyse_rd_dna.pm index 6dd2715b4..8efc5d425 100644 --- a/lib/MIP/Recipes/Pipeline/Analyse_rd_dna.pm +++ b/lib/MIP/Recipes/Pipeline/Analyse_rd_dna.pm @@ -129,7 +129,7 @@ sub parse_rd_dna { Readonly my @MIP_VEP_PLUGINS => qw{ sv_vep_plugin vep_plugin }; Readonly my @ONLY_WGS_VARIANT_CALLER_RECIPES => qw{ cnvnator_ar delly_reformat tiddit }; Readonly my @ONLY_WGS_RECIPIES => - qw{ chromograph_rhoviz cnvnator_ar delly_call delly_reformat expansionhunter + qw{ chromograph_rhoviz cnvnator_ar delly_call delly_reformat expansionhunter mitodel samtools_subsample_mt smncopynumbercaller star_caller telomerecat_ar tiddit }; Readonly my @REMOVE_CONFIG_KEYS => qw{ associated_recipe }; @@ -474,6 +474,7 @@ sub pipeline_analyse_rd_dna { use MIP::Recipes::Analysis::Mip_qccollect qw{ analysis_mip_qccollect }; use MIP::Recipes::Analysis::Mip_vcfparser qw{ analysis_mip_vcfparser }; use MIP::Recipes::Analysis::Mip_vercollect qw{ analysis_mip_vercollect }; + use MIP::Recipes::Analysis::Mitodel qw{ analysis_mitodel }; use MIP::Recipes::Analysis::Mt_annotation qw{ analysis_mt_annotation }; use MIP::Recipes::Analysis::Multiqc qw{ analysis_multiqc }; use MIP::Recipes::Analysis::Peddy qw{ analysis_peddy }; @@ -578,6 +579,7 @@ sub pipeline_analyse_rd_dna { gzip_fastq => \&analysis_gzip_fastq, manta => \&analysis_manta, markduplicates => \&analysis_markduplicates, + mitodel => \&analysis_mitodel, mt_annotation => \&analysis_mt_annotation, multiqc_ar => \&analysis_multiqc, peddy_ar => \&analysis_peddy, diff --git a/t/analysis_mitodel.t b/t/analysis_mitodel.t new file mode 100644 index 000000000..e48b8bf6b --- /dev/null +++ b/t/analysis_mitodel.t @@ -0,0 +1,126 @@ +#!/usr/bin/env perl + +use 5.026; +use Carp; +use charnames qw{ :full :short }; +use English qw{ -no_match_vars }; +use File::Basename qw{ dirname }; +use File::Spec::Functions qw{ catdir catfile }; +use FindBin qw{ $Bin }; +use open qw{ :encoding(UTF-8) :std }; +use Params::Check qw{ allow check last_error }; +use Test::More; +use utf8; +use warnings qw{ FATAL utf8 }; + +## CPANM +use autodie qw { :all }; +use Modern::Perl qw{ 2018 }; +use Test::Trap; + +## MIPs lib/ +use lib catdir( dirname($Bin), q{lib} ); +use MIP::Constants qw{ $COLON $COMMA $SPACE }; +use MIP::Test::Fixtures qw{ test_add_io_for_recipe test_log test_mip_hashes }; + +BEGIN { + + use MIP::Test::Fixtures qw{ test_import }; + +### Check all internal dependency modules and imports +## Modules with import + my %perl_module = ( + q{MIP::Recipes::Analysis::Mitodel} => [qw{ analysis_mitodel }], + q{MIP::Test::Fixtures} => [qw{ test_add_io_for_recipe test_log test_mip_hashes }], + ); + + test_import( { perl_module_href => \%perl_module, } ); +} + +use MIP::Recipes::Analysis::Mitodel qw{ analysis_mitodel }; + +diag( q{Test analysis_mitodel from Mitodel.pm} + . $COMMA + . $SPACE . q{Perl} + . $SPACE + . $PERL_VERSION + . $SPACE + . $EXECUTABLE_NAME ); + +test_log( { log_name => q{MIP}, no_screen => 1, } ); + +## Given analysis parameters +my $recipe_name = q{mitodel}; +my $slurm_mock_cmd = catfile( $Bin, qw{ data modules slurm-mock.pl } ); + +my %active_parameter = test_mip_hashes( + { + mip_hash_name => q{active_parameter}, + recipe_name => $recipe_name, + } +); +$active_parameter{$recipe_name} = 1; +$active_parameter{recipe_core_number}{$recipe_name} = 1; +$active_parameter{recipe_time}{$recipe_name} = 1; +my $sample_id = $active_parameter{sample_ids}[0]; + +my %file_info = test_mip_hashes( + { + mip_hash_name => q{file_info}, + recipe_name => $recipe_name, + } +); + +my %job_id; +my %parameter = test_mip_hashes( + { + mip_hash_name => q{recipe_parameter}, + recipe_name => $recipe_name, + } +); +test_add_io_for_recipe( + { + file_info_href => \%file_info, + id => $sample_id, + parameter_href => \%parameter, + recipe_name => $recipe_name, + step => q{bam}, + } +); + +my %sample_info; + +my $is_ok = analysis_mitodel( + { + active_parameter_href => \%active_parameter, + file_info_href => \%file_info, + job_id_href => \%job_id, + parameter_href => \%parameter, + profile_base_command => $slurm_mock_cmd, + recipe_name => $recipe_name, + sample_id => $sample_id, + sample_info_href => \%sample_info, + } +); + +## Then return TRUE +ok( $is_ok, q{ Executed analysis recipe } . $recipe_name ); + +delete $file_info{io}{TEST}{ADM1059A1}{mitodel}{in}{file_paths}[-1]; + +$is_ok = analysis_mitodel( + { + active_parameter_href => \%active_parameter, + file_info_href => \%file_info, + job_id_href => \%job_id, + parameter_href => \%parameter, + profile_base_command => $slurm_mock_cmd, + recipe_name => $recipe_name, + sample_id => $sample_id, + sample_info_href => \%sample_info, + } +); + +ok( $is_ok, qq{ Skipped analysis recipe $recipe_name when no Mitochondrial contig} ); + +done_testing(); diff --git a/t/samtools_stats.t b/t/samtools_stats.t index 8b141df82..1ebdbc6a3 100644 --- a/t/samtools_stats.t +++ b/t/samtools_stats.t @@ -23,16 +23,13 @@ use lib catdir( dirname($Bin), q{lib} ); use MIP::Constants qw{ $COMMA $SPACE }; use MIP::Test::Commands qw{ test_function }; - BEGIN { use MIP::Test::Fixtures qw{ test_import }; ### Check all internal dependency modules and imports ## Modules with import - my %perl_module = ( - q{MIP::Program::Samtools} => [qw{ samtools_stats }], -); + my %perl_module = ( q{MIP::Program::Samtools} => [qw{ samtools_stats }], ); test_import( { perl_module_href => \%perl_module, } ); } @@ -89,6 +86,10 @@ my %specific_argument = ( input => 1, expected_output => q{-s}, }, + insert_size => { + input => q{16000}, + expected_output => q{--insert-size 16000}, + }, outfile_path => { input => q{outpath}, expected_output => q{> outpath}, From 19f1ee4efb1f03b93afa49cdc67b5b804ab5796c Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 24 Aug 2021 13:19:38 +0200 Subject: [PATCH 031/116] sed command for adding INFO tag to vcf --- lib/MIP/Recipes/Analysis/Glnexus.pm | 49 ++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/lib/MIP/Recipes/Analysis/Glnexus.pm b/lib/MIP/Recipes/Analysis/Glnexus.pm index c3b68507a..4e8fd379d 100644 --- a/lib/MIP/Recipes/Analysis/Glnexus.pm +++ b/lib/MIP/Recipes/Analysis/Glnexus.pm @@ -17,7 +17,7 @@ use autodie qw{ :all }; use Readonly; ## MIPs lib/ -use MIP::Constants qw{ $DASH $LOG_NAME $NEWLINE $PIPE $SPACE }; +use MIP::Constants qw{ $DASH $LOG_NAME $NEWLINE $PIPE $SINGLE_QUOTE $SPACE }; BEGIN { @@ -114,7 +114,8 @@ sub analysis_glnexus { use MIP::File_info qw{ get_io_files parse_io_outfiles }; use MIP::Program::Bcftools qw{ bcftools_norm }; use MIP::Program::Glnexus qw{ glnexus_merge }; - use MIP::Program::Htslib qw{ htslib_tabix }; + use MIP::Program::Gnu::Software::Gnu_sed qw{ gnu_sed }; + use MIP::Program::Htslib qw{ htslib_bgzip htslib_tabix }; use MIP::Processmanagement::Processes qw{ submit_recipe }; use MIP::Recipe qw{ parse_recipe_prerequisites }; use MIP::Sample_info qw{ set_file_path_to_store set_recipe_outfile_in_sample_info }; @@ -220,13 +221,32 @@ sub analysis_glnexus { { filehandle => $filehandle, infile_path => $DASH, - outfile_path => $outfile_path, - output_type => q{z}, + output_type => q{v}, reference_path => $active_parameter_href->{human_genome_reference}, remove_duplicates => 1, threads => $core_number, } ); + print {$filehandle} $PIPE . $SPACE; + + ## Add to info filed so that scout can identify the caller + my $sed_script = _build_sed_script( {} ); + gnu_sed( + { + filehandle => $filehandle, + script => $sed_script, + } + ); + print {$filehandle} $PIPE . $SPACE; + + htslib_bgzip( + { + filehandle => $filehandle, + stdoutfile_path => $outfile_path, + threads => $core_number, + + } + ); say {$filehandle} $NEWLINE; htslib_tabix { @@ -280,4 +300,25 @@ sub analysis_glnexus { return 1; } +sub _build_sed_script { + + ## Function : Build sed script to add caller information to vcf + + my $header_info = + q{##INFO=}; + my $info_tag = q{FOUND_IN=deepvariant}; + + my $sed_script = $SINGLE_QUOTE + ## Find first occurence of ##INFO + . q{0,/^##INFO.*/} + + ## Prepend header to line + . q{s//} . $header_info . q{\n&/; } + + ## Append new info tag to all INFO columns (8th) + . q{s/[^\t]*/&;} . $info_tag . q{/8} . $SINGLE_QUOTE; + + return $sed_script; +} + 1; From 7ec0a75c15cff17e6067bf4083e36dbf3b165786 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 24 Aug 2021 13:58:29 +0200 Subject: [PATCH 032/116] fixed info type --- lib/MIP/Recipes/Analysis/Glnexus.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MIP/Recipes/Analysis/Glnexus.pm b/lib/MIP/Recipes/Analysis/Glnexus.pm index 4e8fd379d..868061d01 100644 --- a/lib/MIP/Recipes/Analysis/Glnexus.pm +++ b/lib/MIP/Recipes/Analysis/Glnexus.pm @@ -305,7 +305,7 @@ sub _build_sed_script { ## Function : Build sed script to add caller information to vcf my $header_info = - q{##INFO=}; + q{##INFO=}; my $info_tag = q{FOUND_IN=deepvariant}; my $sed_script = $SINGLE_QUOTE From 7b73620ad002207c2d9a3dc6418f64105c73a366 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 25 Aug 2021 13:37:52 +0200 Subject: [PATCH 033/116] turning off haplotypecaller --- CHANGELOG.md | 2 ++ definitions/rd_dna_parameters.yaml | 13 ++++++++----- lib/MIP/Cli/Mip/Analyse/Rd_dna.pm | 1 + templates/grch38_mip_rd_dna_config.yaml | 4 ---- templates/mip_rd_dna_config.yaml | 4 ---- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b773341e6..03cbb905f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [Develop] - HmtNote: annotate mitochondrial variants in VCF file +- Mitochondrial deletion analysis +- GATK Haplotypecaller has been turned off in favour of Deepvariant ### Tools diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index d92fc3ff9..9ebf11f6e 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -523,7 +523,7 @@ gatk_baserecalibration: associated_recipe: - mip data_type: SCALAR - default: 1 + default: 0 file_tag: _brecal outfile_suffix: ".bam" program_executables: @@ -1192,7 +1192,7 @@ gatk_haplotypecaller: associated_recipe: - mip data_type: SCALAR - default: 1 + default: 0 file_tag: _haptc outfile_suffix: ".vcf" program_executables: @@ -1263,7 +1263,7 @@ gatk_genotypegvcfs: associated_recipe: - mip data_type: SCALAR - default: 1 + default: 0 file_tag: _gent outfile_suffix: ".vcf" program_executables: @@ -1288,7 +1288,7 @@ gatk_gathervcfs: associated_recipe: - mip data_type: SCALAR - default: 1 + default: 0 file_tag: "" outfile_suffix: ".vcf" program_executables: @@ -1306,7 +1306,7 @@ gatk_variantrecalibration: associated_recipe: - mip data_type: SCALAR - default: 1 + default: 0 file_tag: _vrecal program_executables: - bcftools @@ -1459,12 +1459,15 @@ gatk_combinevariants_prioritize_caller: associated_recipe: - gatk_combinevariantcallsets data_type: SCALAR + default: deepvariant mandatory: no type: recipe_argument gatk_combinevariants_callers_to_combine: associated_recipe: - gatk_combinevariantcallsets data_type: ARRAY + default: + - glnexus_merge mandatory: no type: recipe_argument prepareforvariantannotationblock: diff --git a/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm b/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm index 964e7fe8a..97b31e119 100644 --- a/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm +++ b/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm @@ -1397,6 +1397,7 @@ q{Number of hom-ref genotypes to infer at sites not present in a panel. Connecte q{gatk_combinevariants_callers_to_combine} => ( cmd_flag => q{gatk_combinevar_use_callers}, documentation => q{Combine vcf output from these recipes}, + cmd_tags => [q{Defaults: glnexus_merge}], is => q{rw}, isa => ArrayRef [ enum( [qw{ gatk_variantrecalibration glnexus_merge }] ), ], ) diff --git a/templates/grch38_mip_rd_dna_config.yaml b/templates/grch38_mip_rd_dna_config.yaml index 71f39299b..f5134c3ed 100755 --- a/templates/grch38_mip_rd_dna_config.yaml +++ b/templates/grch38_mip_rd_dna_config.yaml @@ -66,10 +66,6 @@ vcfanno_config: grch38_vcfanno_config_-v0.2-.toml ### Analysis ## Programs ## Parameters -gatk_combinevariants_prioritize_caller: deepvariant,haplotypecaller -gatk_combinevariants_callers_to_combine: - - gatk_variantrecalibration - - glnexus_merge gatk_path: /opt/conda/opt/gatk-3.8 qccollect_sampleinfo_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_qc_sample_info.yaml picardtools_path: /usr/picard/ diff --git a/templates/mip_rd_dna_config.yaml b/templates/mip_rd_dna_config.yaml index 5745445b7..96a0b9f79 100755 --- a/templates/mip_rd_dna_config.yaml +++ b/templates/mip_rd_dna_config.yaml @@ -47,10 +47,6 @@ fqf_annotations: - GNOMADAF - GNOMADAF_popmax - SWEGENAF -gatk_combinevariants_prioritize_caller: deepvariant,haplotypecaller -gatk_combinevariants_callers_to_combine: - - gatk_variantrecalibration - - glnexus_merge gatk_path: /usr picardtools_path: /usr/picard qccollect_sampleinfo_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_qc_sample_info.yaml From c87e7e3ea242b466f68cf2fe2e705eb4f7c66d53 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Wed, 25 Aug 2021 16:32:22 +0200 Subject: [PATCH 034/116] updated htslib --- CHANGELOG.md | 3 +++ containers/htslib/Dockerfile | 10 +++++----- documentation/Setup.md | 2 +- templates/mip_install_config.yaml | 3 +-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b773341e6..a9c07b9c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,13 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [Develop] - HmtNote: annotate mitochondrial variants in VCF file +- Updating to latest and greatest versions ### Tools HmtNote: 0.7.2 +htslib: 1.10.2 -> 1.13 + ### References diff --git a/containers/htslib/Dockerfile b/containers/htslib/Dockerfile index ebf93e7a5..8c120e75f 100644 --- a/containers/htslib/Dockerfile +++ b/containers/htslib/Dockerfile @@ -1,17 +1,17 @@ ################## BASE IMAGE ###################### -FROM clinicalgenomics/mip_base:2.0 +FROM clinicalgenomics/mip_base:2.1 ################## METADATA ###################### -LABEL base_image="clinicalgenomics/mip_base:2.0" -LABEL version="5" +LABEL base_image="clinicalgenomics/mip_base:2.1" +LABEL version="6" LABEL software="htslib" -LABEL software.version="1.10.2" +LABEL software.version="1.13" LABEL extra.binaries="bcftools, bgzip, samtools, tabix" LABEL maintainer="Clinical-Genomics/MIP" -RUN conda install bcftools=1.10.2=hd2cd319_0 htslib=1.10.2=h78d89cc_0 samtools=1.10=h9402c20_2 +RUN conda install bcftools=1.13=h3a49de5_0 htslib=1.13=h9093b5e_0 samtools=1.13=h8c37831_0 ## Clean up after conda RUN /opt/conda/bin/conda clean -ya diff --git a/documentation/Setup.md b/documentation/Setup.md index b9ce86bf0..505d27dd1 100644 --- a/documentation/Setup.md +++ b/documentation/Setup.md @@ -54,7 +54,7 @@ You can speed up, for instance, the Readonly module by also installing the compa - [GENMOD] (version: 3.7.3) - [Gffcompare] (version: 0.11.2) - [Glnexus] (version: 1.3.1) -- [Htslib] (version: 1.10.2) +- [Htslib] (version: 1.13) - [Manta] (version: 1.6.0) - [Megafusion] (version: 66a3a80) - [MultiQC] (version: 1.10.1) diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index 822bd9e3a..5f87c2f24 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -108,7 +108,7 @@ container: bgzip: samtools: tabix: - uri: docker.io/clinicalgenomics/htslib:1.10.2 + uri: docker.io/clinicalgenomics/htslib:1.13 manta: executable: configManta.py: @@ -253,4 +253,3 @@ container: executable: vcf2cytosure: uri: docker.io/jemten/vcf2cytosure:0.5.1 - From f55d02cba88797581a1aa1be617f5996382ea3c9 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Wed, 25 Aug 2021 16:39:15 +0200 Subject: [PATCH 035/116] fixed Dockerfile new naming conventions --- containers/htslib/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/htslib/Dockerfile b/containers/htslib/Dockerfile index 8c120e75f..bc7081ada 100644 --- a/containers/htslib/Dockerfile +++ b/containers/htslib/Dockerfile @@ -4,7 +4,7 @@ FROM clinicalgenomics/mip_base:2.1 ################## METADATA ###################### -LABEL base_image="clinicalgenomics/mip_base:2.1" +LABEL base-image="clinicalgenomics/mip_base:2.1" LABEL version="6" LABEL software="htslib" LABEL software.version="1.13" From 18c3f86351b09fb0c84e1563cf6a676cc9e7b728 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Wed, 25 Aug 2021 16:57:25 +0200 Subject: [PATCH 036/116] updated cyrius --- CHANGELOG.md | 1 + containers/cyrius/Dockerfile | 22 +++++++++++----------- documentation/Setup.md | 2 +- templates/mip_install_config.yaml | 2 +- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9c07b9c9..366c9a039 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). HmtNote: 0.7.2 htslib: 1.10.2 -> 1.13 +cyrius v1.1 -> v1.1.1 ### References diff --git a/containers/cyrius/Dockerfile b/containers/cyrius/Dockerfile index 2820e5102..ff7160c72 100644 --- a/containers/cyrius/Dockerfile +++ b/containers/cyrius/Dockerfile @@ -4,28 +4,28 @@ FROM clinicalgenomics/mip_base:2.1 ################## METADATA ###################### -LABEL base_image="clinicalgenomics/mip_base:2.1" -LABEL version="2" +LABEL base-image="clinicalgenomics/mip_base:2.1" +LABEL version="3" LABEL software="Cyrius" -LABEL software.version="v1.1" +LABEL software.version="v1.1.1" LABEL extra.binaries="star_caller.py" LABEL maintainer="Clinical-Genomics/MIP" WORKDIR /opt/conda/share ## Pysam errors when pip installing -RUN conda install pysam=0.16.0 && \ +RUN conda install pysam=0.16.0.1 && \ conda clean -ya ## Download and extract -RUN wget --no-verbose --no-check-certificate https://github.com/Illumina/Cyrius/archive/v1.1.zip && \ - unzip v1.1.zip && \ - rm v1.1.zip +RUN wget --no-verbose --no-check-certificate https://github.com/Illumina/Cyrius/archive/v1.1.1.zip && \ + unzip v1.1.1.zip && \ + rm v1.1.1.zip ## Move to directory and install requirements -RUN cd Cyrius-1.1 && \ - python -m pip install --no-cache-dir -r requirements.txt +RUN cd Cyrius-1.1.1 && \ + python -m pip install --no-cache-dir -r requirements.txt -RUN chmod a+x /opt/conda/share/Cyrius-1.1/star_caller.py +RUN chmod a+x /opt/conda/share/Cyrius-1.1.1/star_caller.py -ENV PATH ${PATH}:/opt/conda/share/Cyrius-1.1 +ENV PATH ${PATH}:/opt/conda/share/Cyrius-1.1.1 diff --git a/documentation/Setup.md b/documentation/Setup.md index 505d27dd1..d7748e34f 100644 --- a/documentation/Setup.md +++ b/documentation/Setup.md @@ -45,7 +45,7 @@ You can speed up, for instance, the Readonly module by also installing the compa - [Chanjo] (version: 4.6.0) - [Chromograph] (version: 1.1) - [Cnvnator] (version: 0.4.1) -- [Cyrius] (version: v1.1) +- [Cyrius] (version: v1.1.1) - [Expansionhunter] (version 4.0.2) - [FastQC] (version: 0.11.9) - [Deepvariant] (version: 1.1.0) diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index 5f87c2f24..cc844283d 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -52,7 +52,7 @@ container: cyrius: executable: star_caller.py: - uri: docker.io/clinicalgenomics/cyrius:v1.1 + uri: docker.io/clinicalgenomics/cyrius:v1.1.1 deepvariant: executable: run_deepvariant: /opt/deepvariant/bin/run_deepvariant From 6183380d59571dc12af1c5b924cddbba09f3d7b9 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Wed, 25 Aug 2021 17:00:48 +0200 Subject: [PATCH 037/116] updated gatk4 --- CHANGELOG.md | 2 +- documentation/Setup.md | 2 +- templates/mip_install_config.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 366c9a039..0048ff05e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). HmtNote: 0.7.2 htslib: 1.10.2 -> 1.13 cyrius v1.1 -> v1.1.1 - +gatk 4.2.0.0 -> 4.2.2.0 ### References diff --git a/documentation/Setup.md b/documentation/Setup.md index d7748e34f..65aedbb82 100644 --- a/documentation/Setup.md +++ b/documentation/Setup.md @@ -50,7 +50,7 @@ You can speed up, for instance, the Readonly module by also installing the compa - [FastQC] (version: 0.11.9) - [Deepvariant] (version: 1.1.0) - [Delly] (version 0.8.7) -- [GATK] (version: 3.8.1 and 4.2.0.0) +- [GATK] (version: 3.8.1 and 4.2.2.0) - [GENMOD] (version: 3.7.3) - [Gffcompare] (version: 0.11.2) - [Glnexus] (version: 1.3.1) diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index cc844283d..5843d854d 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -85,7 +85,7 @@ container: gatk4: executable: gatk: - uri: docker.io/broadinstitute/gatk:4.2.0.0 + uri: docker.io/broadinstitute/gatk:4.2.2.0 genmod: executable: genmod: From d4b6a9d07f1096c2f828ae3ddbea5204e63ded25 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Wed, 25 Aug 2021 17:05:31 +0200 Subject: [PATCH 038/116] updated glnexus --- CHANGELOG.md | 1 + documentation/Setup.md | 2 +- templates/mip_install_config.yaml | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0048ff05e..0280604de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ HmtNote: 0.7.2 htslib: 1.10.2 -> 1.13 cyrius v1.1 -> v1.1.1 gatk 4.2.0.0 -> 4.2.2.0 +glnexus v1.3.1 -> v1.4.1 ### References diff --git a/documentation/Setup.md b/documentation/Setup.md index 65aedbb82..69e68821a 100644 --- a/documentation/Setup.md +++ b/documentation/Setup.md @@ -53,7 +53,7 @@ You can speed up, for instance, the Readonly module by also installing the compa - [GATK] (version: 3.8.1 and 4.2.2.0) - [GENMOD] (version: 3.7.3) - [Gffcompare] (version: 0.11.2) -- [Glnexus] (version: 1.3.1) +- [Glnexus] (version: v1.4.1) - [Htslib] (version: 1.13) - [Manta] (version: 1.6.0) - [Megafusion] (version: 66a3a80) diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index 5843d854d..2f307d486 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -97,7 +97,7 @@ container: glnexus: executable: glnexus_cli: - uri: quay.io/mlin/glnexus:v1.3.1 + uri: ghcr.io/dnanexus-rnd/glnexus:v1.4.1 hmtnote: executable: hmtnote: From 39e23f8ffe960664a67788297cd1ebc379396733 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Wed, 25 Aug 2021 17:07:38 +0200 Subject: [PATCH 039/116] updated multiqc --- CHANGELOG.md | 1 + documentation/Setup.md | 2 +- templates/mip_install_config.yaml | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0280604de..9a05f2dfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ htslib: 1.10.2 -> 1.13 cyrius v1.1 -> v1.1.1 gatk 4.2.0.0 -> 4.2.2.0 glnexus v1.3.1 -> v1.4.1 +multiqc 1.10.1 -> v1.11 ### References diff --git a/documentation/Setup.md b/documentation/Setup.md index 69e68821a..2fd2f6a8f 100644 --- a/documentation/Setup.md +++ b/documentation/Setup.md @@ -57,7 +57,7 @@ You can speed up, for instance, the Readonly module by also installing the compa - [Htslib] (version: 1.13) - [Manta] (version: 1.6.0) - [Megafusion] (version: 66a3a80) -- [MultiQC] (version: 1.10.1) +- [MultiQC] (version: v1.11) - [Pdfmerger] (version: 1.0) - [Peddy] (version: 0.4.3) - [PicardTools] (version: 2.25.0) diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index 2f307d486..83516d039 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -125,7 +125,7 @@ container: multiqc: executable: multiqc: - uri: docker.io/ewels/multiqc:1.10.1 + uri: docker.io/ewels/multiqc:v1.11 perl: executable: perl: From d3ae3eb8b1ddbf651c9d746781f17d463fdc1d5d Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Wed, 25 Aug 2021 17:13:39 +0200 Subject: [PATCH 040/116] updated vep --- CHANGELOG.md | 1 + documentation/Setup.md | 2 +- templates/mip_install_config.yaml | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a05f2dfb..e95e4fd0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ cyrius v1.1 -> v1.1.1 gatk 4.2.0.0 -> 4.2.2.0 glnexus v1.3.1 -> v1.4.1 multiqc 1.10.1 -> v1.11 +vep release_103.1 -> release_104.3 ### References diff --git a/documentation/Setup.md b/documentation/Setup.md index 2fd2f6a8f..6ad7f04fd 100644 --- a/documentation/Setup.md +++ b/documentation/Setup.md @@ -81,7 +81,7 @@ You can speed up, for instance, the Readonly module by also installing the compa - [Varg] (version: 1.2.0) - [Vcf2cytosure] (version: 0.5.1) - [Vcfanno] (version: 0.3.2) -- [VEP] (version: 103.1) with plugin "ExACpLI", "MaxEntScan, LoFtool", "SpliceAI" +- [VEP] (version: 104.3) with plugin "dbNSFP", "DisGeNET", "ExACpLI", "MaxEntScan, LoFtool", "SpliceAI" The version number after the software name are tested for compatibility with MIP. diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index 83516d039..b7e8dffa4 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -248,7 +248,7 @@ container: vep: executable: vep: - uri: docker.io/ensemblorg/ensembl-vep:release_103.1 + uri: docker.io/ensemblorg/ensembl-vep:release_104.3 vcf2cytosure: executable: vcf2cytosure: From ac2dde148a0bb114c46468344b84fdebc11c3595 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 25 Aug 2021 17:50:55 +0200 Subject: [PATCH 041/116] changes after code review --- lib/MIP/Recipes/Analysis/Glnexus.pm | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/MIP/Recipes/Analysis/Glnexus.pm b/lib/MIP/Recipes/Analysis/Glnexus.pm index 868061d01..e7f9f6f63 100644 --- a/lib/MIP/Recipes/Analysis/Glnexus.pm +++ b/lib/MIP/Recipes/Analysis/Glnexus.pm @@ -229,12 +229,11 @@ sub analysis_glnexus { ); print {$filehandle} $PIPE . $SPACE; - ## Add to info filed so that scout can identify the caller - my $sed_script = _build_sed_script( {} ); + ## Add to info field so that scout can identify the caller gnu_sed( { filehandle => $filehandle, - script => $sed_script, + script => _build_sed_script( {} ), } ); print {$filehandle} $PIPE . $SPACE; From 6df8c166aea9142ed47f79840ca12a057dcfa01b Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Thu, 26 Aug 2021 09:49:07 +0200 Subject: [PATCH 042/116] updated deepvariant, deeptrio, and star-fusion --- CHANGELOG.md | 7 +++++-- documentation/Setup.md | 5 +++-- templates/mip_install_config.yaml | 6 +++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e95e4fd0f..6c9ad1027 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,12 +10,15 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Tools -HmtNote: 0.7.2 -htslib: 1.10.2 -> 1.13 cyrius v1.1 -> v1.1.1 +deepvariant 1.1.0-gpu -> 1.2.0-gpu +deeptrio 1.1.0-gpu -> 1.2.0-gpu gatk 4.2.0.0 -> 4.2.2.0 glnexus v1.3.1 -> v1.4.1 +HmtNote: 0.7.2 +htslib: 1.10.2 -> 1.13 multiqc 1.10.1 -> v1.11 +star-fusion 1.10.0 -> 1.10.1 vep release_103.1 -> release_104.3 ### References diff --git a/documentation/Setup.md b/documentation/Setup.md index 6ad7f04fd..9883add06 100644 --- a/documentation/Setup.md +++ b/documentation/Setup.md @@ -48,7 +48,8 @@ You can speed up, for instance, the Readonly module by also installing the compa - [Cyrius] (version: v1.1.1) - [Expansionhunter] (version 4.0.2) - [FastQC] (version: 0.11.9) -- [Deepvariant] (version: 1.1.0) +- [Deeptrio] (version: 1.2.0) +- [Deepvariant] (version: 1.2.0) - [Delly] (version 0.8.7) - [GATK] (version: 3.8.1 and 4.2.2.0) - [GENMOD] (version: 3.7.3) @@ -70,7 +71,7 @@ You can speed up, for instance, the Readonly module by also installing the compa - [Sambamba] (version: 0.6.8) - [Samtools] (version: 1.11) - [SMNCopyNumberCaller] (version: v1.1.1) -- [STAR-Fusion] (version: 1.10.0) +- [STAR-Fusion] (version: 1.10.1) - [STAR] (version: 2.7.8a) - [Stranger] (version: 0.8.0) - [StringTie] (version: 2.1.3b) diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index b7e8dffa4..882910a74 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -60,12 +60,12 @@ container: call_variants: /opt/deepvariant/bin/call_variants postprocess_variants: /opt/deepvariant/bin/postprocess_variants gpu_support: 1 - uri: docker.io/google/deepvariant:1.1.0-gpu + uri: docker.io/google/deepvariant:1.2.0-gpu deeptrio: executable: run_deeptrio: /opt/deepvariant/bin/deeptrio/run_deeptrio gpu_support: 1 - uri: docker.io/google/deepvariant:deeptrio-1.1.0-gpu + uri: docker.io/google/deepvariant:deeptrio-1.2.0-gpu delly: executable: delly: @@ -197,7 +197,7 @@ container: remove_long_intron_readthru_transcripts.pl: /usr/local/src/STAR-Fusion/ctat-genome-lib-builder/util/remove_long_intron_readthru_transcripts.pl restrict_genome_to_chr_entries.pl: /usr/local/src/STAR-Fusion/ctat-genome-lib-builder/util/restrict_genome_to_chr_entries.pl STAR-Fusion: /usr/local/src/STAR-Fusion/STAR-Fusion - uri: docker.io/trinityctat/starfusion:1.10.0 + uri: docker.io/trinityctat/starfusion:1.10.1 stranger: executable: stranger: From 916a9cc8400185e06174716e7c58fa9e794c81a8 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Thu, 26 Aug 2021 11:21:16 +0200 Subject: [PATCH 043/116] removing the plugins --- documentation/Setup.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/Setup.md b/documentation/Setup.md index 9883add06..878b622bc 100644 --- a/documentation/Setup.md +++ b/documentation/Setup.md @@ -82,7 +82,7 @@ You can speed up, for instance, the Readonly module by also installing the compa - [Varg] (version: 1.2.0) - [Vcf2cytosure] (version: 0.5.1) - [Vcfanno] (version: 0.3.2) -- [VEP] (version: 104.3) with plugin "dbNSFP", "DisGeNET", "ExACpLI", "MaxEntScan, LoFtool", "SpliceAI" +- [VEP] (version: 104.3) with plugin "ExACpLI", "MaxEntScan, LoFtool", "SpliceAI" The version number after the software name are tested for compatibility with MIP. From fce14ed22115ed76f144cb0f4800932ab411c249 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 26 Aug 2021 17:57:25 +0200 Subject: [PATCH 044/116] feat(qccollect): limit output --- definitions/rd_dna_panel_parameters.yaml | 6 ++ definitions/rd_dna_parameters.yaml | 6 ++ definitions/rd_rna_parameters.yaml | 6 ++ lib/MIP/Cli/Mip/Qccollect.pm | 10 ++ lib/MIP/Main/Qccollect.pm | 119 ++++++++++++++++++++++ lib/MIP/Program/Mip.pm | 12 +++ lib/MIP/Recipes/Analysis/Mip_qccollect.pm | 12 +++ t/mip_qccollect.t | 4 + t/mip_qccollect.test | 1 + 9 files changed, 176 insertions(+) diff --git a/definitions/rd_dna_panel_parameters.yaml b/definitions/rd_dna_panel_parameters.yaml index 6b36a0b55..37cd9d20e 100755 --- a/definitions/rd_dna_panel_parameters.yaml +++ b/definitions/rd_dna_panel_parameters.yaml @@ -1188,6 +1188,12 @@ qccollect_eval_metric_file: is_reference: 1 reference: reference_dir type: path +qccollect_limit_qc_output: + associated_recipe: + - qccollect_ar + data_type: SCALAR + default: 1 + type: recipe_argument qccollect_regexp_file: associated_recipe: - qccollect_ar diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index 9ebf11f6e..26889a192 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -2060,6 +2060,12 @@ qccollect_eval_metric_file: is_reference: 1 reference: reference_dir type: path +qccollect_limit_qc_output: + associated_recipe: + - qccollect_ar + data_type: SCALAR + default: 1 + type: recipe_argument qccollect_store_metrics_outfile: associated_recipe: - qccollect_ar diff --git a/definitions/rd_rna_parameters.yaml b/definitions/rd_rna_parameters.yaml index ee7be7a46..3a16adab8 100755 --- a/definitions/rd_rna_parameters.yaml +++ b/definitions/rd_rna_parameters.yaml @@ -1076,6 +1076,12 @@ qccollect_eval_metric_file: is_reference: 1 reference: reference_dir type: path +qccollect_limit_qc_output: + associated_recipe: + - qccollect_ar + data_type: SCALAR + default: 1 + type: recipe_argument qccollect_regexp_file: associated_recipe: - qccollect_ar diff --git a/lib/MIP/Cli/Mip/Qccollect.pm b/lib/MIP/Cli/Mip/Qccollect.pm index 00ef5b488..c2e206810 100644 --- a/lib/MIP/Cli/Mip/Qccollect.pm +++ b/lib/MIP/Cli/Mip/Qccollect.pm @@ -40,6 +40,7 @@ sub run { # Flatten argument(s) my $eval_metric_file = $arg_href->{eval_metric_file}; my $evaluate_plink_gender = $arg_href->{evaluate_plink_gender}; + my $limit_qc_output = $arg_href->{limit_qc_output}; my $log_file = $arg_href->{log_file}; my $outfile = $arg_href->{outfile}; my $print_regexp_outfile = $arg_href->{print_regexp_outfile}; @@ -72,6 +73,7 @@ sub run { { eval_metric_file => $eval_metric_file, evaluate_plink_gender => $evaluate_plink_gender, + limit_qc_output => $limit_qc_output, outfile => $outfile, regexp_file => $regexp_file, sample_info_file => $sample_info_file, @@ -107,6 +109,14 @@ sub _build_usage { ) ); + option( + q{limit_qc_output} => ( + documentation => q{Only print a limited number of qc mettrics}, + is => q{rw}, + isa => Bool, + ) + ); + option( q{log_file} => ( cmd_aliases => [qw{ l log }], diff --git a/lib/MIP/Main/Qccollect.pm b/lib/MIP/Main/Qccollect.pm index 690b660ac..663f7a903 100644 --- a/lib/MIP/Main/Qccollect.pm +++ b/lib/MIP/Main/Qccollect.pm @@ -19,6 +19,7 @@ use warnings qw{ FATAL utf8 }; ## CPANM use autodie qw{ open close :all }; use Modern::Perl qw{ 2018 }; +use Readonly; ## MIPs lib/ use MIP::Constants qw{ $LOG_NAME }; @@ -46,6 +47,7 @@ sub mip_qccollect { ## Returns : ## Arguments: $eval_metric_file => File with evaluation metrics ## : $evaluate_plink_gender => Evaluate plink gender +## : $limit_qc_output => Only print a limited number of qc values ## : $outfile => Data metric output file ## : $regexp_file => Regular expression file ## : $sample_info_file => Sample info file @@ -57,6 +59,7 @@ sub mip_qccollect { ## Flatten argument(s) my $eval_metric_file; my $evaluate_plink_gender; + my $limit_qc_output; my $outfile; my $regexp_file; my $sample_info_file; @@ -78,6 +81,10 @@ sub mip_qccollect { store => \$outfile, strict_type => 1, }, + limit_qc_output => { + store => \$limit_qc_output, + strict_type => 1, + }, regexp_file => { defined => 1, required => 1, @@ -195,6 +202,13 @@ sub mip_qccollect { } ); + parse_limit_qc_output( + { + limit_qc_output => $limit_qc_output, + qc_href => \%qc_data, + } + ); + ## Writes a qc data hash to file write_to_file( { @@ -507,4 +521,109 @@ sub sample_qc { return; } +sub parse_limit_qc_output { + +## Function : Restrict output +## Returns : +## Arguments: $limit_qc_output => Remove keys from regexp hash +## : $qc_href => qccollect regexp hash {REF} + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $limit_qc_output; + my $qc_href; + + my $tmpl = { + limit_qc_output => { + store => \$limit_qc_output, + strict_type => 1, + }, + qc_href => { + default => {}, + defined => 1, + required => 1, + store => \$qc_href, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + ## Delete surplus metrics + delete $qc_href->{metrics}; + + return if not $limit_qc_output; + + Readonly my @QC_TO_SKIP => qw{ collectmultiplemetrics variantevalexome }; + + # comp_overlap_data_header + # count_variants_data_header + # indel_summary_data_header + # multiallelic_summary_data_header + # titv_variant_evaluator_data_header + # variant_summary_header + # validation_report_header + + foreach my $key (@QC_TO_SKIP) { + + _delete_key( + { + data_href => $qc_href, + key => $key, + } + ); + } + return; +} + +sub _delete_key { + +## Function : Delete key from nested hash +## Returns : +## Arguments: $data_href => Data {REF} +## : $key => Key + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $data_href; + my $key; + + my $tmpl = { + data_href => { + default => {}, + defined => 1, + required => 1, + store => \$data_href, + strict_type => 1, + }, + key => { + store => \$key, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + ## Copy hash to enable recursive removal of keys + my %info = %{$data_href}; + + if ( $info{$key} ) { + + delete $info{$key}; + } + elsif ( ref $info{$key} eq q{HASH} ) { + + _delete_key( + { + data_href => $info{$key}, + key => $key, + } + ); + } + + return; +} + 1; diff --git a/lib/MIP/Program/Mip.pm b/lib/MIP/Program/Mip.pm index 51a5a28dc..27ea62a38 100644 --- a/lib/MIP/Program/Mip.pm +++ b/lib/MIP/Program/Mip.pm @@ -147,6 +147,7 @@ sub mip_qccollect { ## Arguments: $eval_metric_file => Mip qc evaluation metrics file ## : $filehandle => Filehandle to write to ## : $infile_path => Infile path +## : $limit_qc_output => Limit the qc metrics that is printed to the qc outfile ## : $log_file_path => Log file path ## : $outfile_path => Outfile path ## : $regexp_file_path => Regular expression file @@ -172,6 +173,7 @@ sub mip_qccollect { ## Default(s) my $skip_evaluation; + my $limit_qc_output; my $tmpl = { eval_metric_file => { @@ -187,6 +189,12 @@ sub mip_qccollect { store => \$infile_path, strict_type => 1, }, + limit_qc_output => { + allow => [ undef, 0, 1 ], + default => 1, + store => \$limit_qc_output, + strict_type => 1, + }, log_file_path => { store => \$log_file_path, strict_type => 1, }, outfile_path => { defined => 1, @@ -239,6 +247,10 @@ sub mip_qccollect { push @commands, q{--log_file} . $SPACE . $log_file_path; } + if ($limit_qc_output) { + + push @commands, q{--limit_qc_output}; + } push @commands, q{--outfile} . $SPACE . $outfile_path; push @commands, q{--regexp_file} . $SPACE . $regexp_file_path; diff --git a/lib/MIP/Recipes/Analysis/Mip_qccollect.pm b/lib/MIP/Recipes/Analysis/Mip_qccollect.pm index 79067e2d1..bbdb29c7d 100644 --- a/lib/MIP/Recipes/Analysis/Mip_qccollect.pm +++ b/lib/MIP/Recipes/Analysis/Mip_qccollect.pm @@ -208,6 +208,18 @@ sub analysis_mip_qccollect { path => $active_parameter_href->{qccollect_store_metrics_outfile}, recipe_name => $recipe_name, sample_info_href => $sample_info_href, + tag => q{deliverable}, + } + ); + + set_file_path_to_store( + { + format => q{meta}, + id => $case_id, + path => $active_parameter_href->{qccollect_eval_metric_file}, + recipe_name => $recipe_name, + sample_info_href => $sample_info_href, + tag => q{audit}, } ); diff --git a/t/mip_qccollect.t b/t/mip_qccollect.t index 97d91574a..0a10659bb 100644 --- a/t/mip_qccollect.t +++ b/t/mip_qccollect.t @@ -99,6 +99,10 @@ my %specific_argument = ( . $SPACE . catfile(qw{ outdata_dir case_id case_id_qc_sample_info.yaml }), }, + limit_qc_output => { + input => 1, + expected_output => q{--limit_qc_output}, + }, log_file_path => { input => catfile(qw{ outcase_directory case_id _qccollect.log }), expected_output => q{--log_file} diff --git a/t/mip_qccollect.test b/t/mip_qccollect.test index ca0d83948..272199367 100644 --- a/t/mip_qccollect.test +++ b/t/mip_qccollect.test @@ -83,6 +83,7 @@ my $cmds_ref = [ $sample_info_file, q{--evaluate_plink_gender}, q{--store_metrics_outfile}, $store_metrics_outfile, q{--outfile}, $outfile, + q{--limit_qc_output} ]; my %process_return = child_process( From 5529e6a3b2618be5de925bd1a8a5f7253d95b366 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 26 Aug 2021 18:16:01 +0200 Subject: [PATCH 045/116] fixing store path --- lib/MIP/Recipes/Analysis/Mip_qccollect.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MIP/Recipes/Analysis/Mip_qccollect.pm b/lib/MIP/Recipes/Analysis/Mip_qccollect.pm index bbdb29c7d..0cb606702 100644 --- a/lib/MIP/Recipes/Analysis/Mip_qccollect.pm +++ b/lib/MIP/Recipes/Analysis/Mip_qccollect.pm @@ -216,7 +216,7 @@ sub analysis_mip_qccollect { { format => q{meta}, id => $case_id, - path => $active_parameter_href->{qccollect_eval_metric_file}, + path => $outfile_path, recipe_name => $recipe_name, sample_info_href => $sample_info_href, tag => q{audit}, From 8910ed5b96cea5df329396dcaec4c6e1e48a26c3 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 26 Aug 2021 18:55:44 +0200 Subject: [PATCH 046/116] fixing delete key sub --- lib/MIP/Main/Qccollect.pm | 52 +++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/lib/MIP/Main/Qccollect.pm b/lib/MIP/Main/Qccollect.pm index 663f7a903..741caed88 100644 --- a/lib/MIP/Main/Qccollect.pm +++ b/lib/MIP/Main/Qccollect.pm @@ -555,22 +555,14 @@ sub parse_limit_qc_output { return if not $limit_qc_output; - Readonly my @QC_TO_SKIP => qw{ collectmultiplemetrics variantevalexome }; + Readonly my @QC_TO_SKIP => qw{ collectmultiplemetrics variantevalall variantevalexome }; - # comp_overlap_data_header - # count_variants_data_header - # indel_summary_data_header - # multiallelic_summary_data_header - # titv_variant_evaluator_data_header - # variant_summary_header - # validation_report_header - - foreach my $key (@QC_TO_SKIP) { + foreach my $delete_key (@QC_TO_SKIP) { _delete_key( { data_href => $qc_href, - key => $key, + delete_key => $delete_key, } ); } @@ -581,14 +573,14 @@ sub _delete_key { ## Function : Delete key from nested hash ## Returns : -## Arguments: $data_href => Data {REF} -## : $key => Key +## Arguments: $data_href => Data {REF} +## : $delete_key => Key to remove my ($arg_href) = @_; ## Flatten argument(s) my $data_href; - my $key; + my $delete_key; my $tmpl = { data_href => { @@ -598,31 +590,33 @@ sub _delete_key { store => \$data_href, strict_type => 1, }, - key => { - store => \$key, + delete_key => { + store => \$delete_key, strict_type => 1, }, }; check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; - ## Copy hash to enable recursive removal of keys - my %info = %{$data_href}; + KEY_VALUE_PAIR: + while ( my ( $key, $value ) = each %{ $data_href } ) { - if ( $info{$key} ) { + if ($key eq $delete_key) { - delete $info{$key}; - } - elsif ( ref $info{$key} eq q{HASH} ) { + delete $data_href->{$delete_key}; + return; + } - _delete_key( - { - data_href => $info{$key}, - key => $key, - } - ); - } + elsif ( ref $data_href->{$key} eq q{HASH} ) { + _delete_key( + { + data_href => $value, + delete_key => $delete_key, + } + ); + } + } return; } From e44c6c56f4a9c4ddc97b1a8aa3ceb27748149a4b Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 31 Aug 2021 14:15:19 +0200 Subject: [PATCH 047/116] review changes --- lib/MIP/Main/Qccollect.pm | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/MIP/Main/Qccollect.pm b/lib/MIP/Main/Qccollect.pm index 741caed88..e5533d5cf 100644 --- a/lib/MIP/Main/Qccollect.pm +++ b/lib/MIP/Main/Qccollect.pm @@ -47,7 +47,7 @@ sub mip_qccollect { ## Returns : ## Arguments: $eval_metric_file => File with evaluation metrics ## : $evaluate_plink_gender => Evaluate plink gender -## : $limit_qc_output => Only print a limited number of qc values +## : $limit_qc_output => Only print a limited number of qc metrics ## : $outfile => Data metric output file ## : $regexp_file => Regular expression file ## : $sample_info_file => Sample info file @@ -526,7 +526,7 @@ sub parse_limit_qc_output { ## Function : Restrict output ## Returns : ## Arguments: $limit_qc_output => Remove keys from regexp hash -## : $qc_href => qccollect regexp hash {REF} +## : $qc_href => Qccollect regexp hash {REF} my ($arg_href) = @_; @@ -555,14 +555,14 @@ sub parse_limit_qc_output { return if not $limit_qc_output; - Readonly my @QC_TO_SKIP => qw{ collectmultiplemetrics variantevalall variantevalexome }; + Readonly my @SKIP_QC_METRICS => qw{ collectmultiplemetrics variantevalall variantevalexome }; - foreach my $delete_key (@QC_TO_SKIP) { + foreach my $delete_metric_key (@SKIP_QC_METRICS) { _delete_key( { - data_href => $qc_href, - delete_key => $delete_key, + data_href => $qc_href, + delete_key => $delete_metric_key, } ); } @@ -599,9 +599,9 @@ sub _delete_key { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; KEY_VALUE_PAIR: - while ( my ( $key, $value ) = each %{ $data_href } ) { + while ( my ( $key, $value ) = each %{$data_href} ) { - if ($key eq $delete_key) { + if ( $key eq $delete_key ) { delete $data_href->{$delete_key}; return; @@ -611,8 +611,8 @@ sub _delete_key { _delete_key( { - data_href => $value, - delete_key => $delete_key, + data_href => $value, + delete_key => $delete_key, } ); } From c1cdf637c0b32d7c5f22f3d1628be30a9737357a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 3 Sep 2021 13:54:45 +0200 Subject: [PATCH 048/116] add ghcr to parse_container_uri sub (#1941) * add ghcr to parse_container_uri sub * fix repeat declarations --- lib/MIP/Environment/Container.pm | 2 +- t/parse_container_uri.t | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/MIP/Environment/Container.pm b/lib/MIP/Environment/Container.pm index 984bfae1e..1bd8285b2 100644 --- a/lib/MIP/Environment/Container.pm +++ b/lib/MIP/Environment/Container.pm @@ -337,7 +337,7 @@ sub parse_container_uri { return if ( ${$uri_ref} =~ m{ \A docker:[/]{2} }xms ); - if ( ${$uri_ref} =~ /\A quay|docker[.]io /xms ) { + if ( ${$uri_ref} =~ /\A ghcr|quay|docker[.]io /xms ) { ${$uri_ref} = q{docker://} . ${$uri_ref}; } diff --git a/t/parse_container_uri.t b/t/parse_container_uri.t index c97e4667e..ebd4192be 100644 --- a/t/parse_container_uri.t +++ b/t/parse_container_uri.t @@ -21,16 +21,13 @@ use Modern::Perl qw{ 2018 }; use lib catdir( dirname($Bin), q{lib} ); use MIP::Constants qw{ $COMMA $SPACE }; - BEGIN { use MIP::Test::Fixtures qw{ test_import }; ### Check all internal dependency modules and imports ## Modules with import - my %perl_module = ( - q{MIP::Environment::Container} => [qw{ parse_container_uri }], -); + my %perl_module = ( q{MIP::Environment::Container} => [qw{ parse_container_uri }], ); test_import( { perl_module_href => \%perl_module, } ); } @@ -87,4 +84,19 @@ parse_container_uri( ## Then leave uri unchanged $expected_uri = q{docker://quay.io/clinicalgenomics/chanjo:4.2.0}; is( $uri, $expected_uri, q{Parse quay uri for singularity} ); + +## Given a ghcr uri +$uri = q{ghcr.io/dnanexus-rnd/glnexus:v1.4.1}; +## When container manager is singularity +parse_container_uri( + { + container_manager => q{singularity}, + uri_ref => \$uri, + } +); + +## Then prepend docker:// +$expected_uri = q{docker://ghcr.io/dnanexus-rnd/glnexus:v1.4.1}; +is( $uri, $expected_uri, q{Parse uri for singularity} ); + done_testing(); From 860e5bbbb780d1cf42db78a0ca0c468476cbb520 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 7 Sep 2021 15:27:46 +0200 Subject: [PATCH 049/116] Update glnexus uri and dockerfile (#1942) * Update glnexus uri and dockerfile * Update dockerfile * Fix codefactor errors * Remove apt cache --- containers/glnexus/Dockerfile | 26 ++++++++++++++++++++++++++ templates/mip_install_config.yaml | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 containers/glnexus/Dockerfile diff --git a/containers/glnexus/Dockerfile b/containers/glnexus/Dockerfile new file mode 100644 index 000000000..e0f6852fb --- /dev/null +++ b/containers/glnexus/Dockerfile @@ -0,0 +1,26 @@ +################## BASE IMAGE ###################### + +FROM clinicalgenomics/mip_base:2.1 + +################## METADATA ###################### + +LABEL base-image="clinicalgenomics/mip_base:2.1" +LABEL version="1" +LABEL software="glnexus" +LABEL software.version="1.4.1" +LABEL extra.binaries="glnexus" +LABEL maintainer="Clinical-Genomics/MIP" + +WORKDIR /app + +ENV PATH="/app:${PATH}" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libc6 \ + ca-certificates && \ + apt-get clean && \ + apt-get purge && \ + rm -rf /var/lib/apt/lists/* && \ + wget --no-verbose https://github.com/dnanexus-rnd/GLnexus/releases/download/v1.4.1/glnexus_cli && \ + chmod 700 /app/glnexus_cli diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index 882910a74..09553a1ee 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -97,7 +97,7 @@ container: glnexus: executable: glnexus_cli: - uri: ghcr.io/dnanexus-rnd/glnexus:v1.4.1 + uri: docker.io/clinicalgenomics/glnexus:v1.4.1 hmtnote: executable: hmtnote: From cadbd2a1e2310be840d0273a4c6e7113286f5825 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 7 Sep 2021 17:06:28 +0200 Subject: [PATCH 050/116] feat(refrences): rna update --- ...gtf => grch38_gencode_annotation_-v38-.gtf} | 0 ....fa => grch38_gencode_transcripts_-v38-.fa} | 0 templates/grch38_mip_rd_rna_config.yaml | 4 ++-- .../mip_download_rd_rna_config_-1.0-.yaml | 18 ++++++++++++------ 4 files changed, 14 insertions(+), 8 deletions(-) rename t/data/references/{grch38_gencode_annotation_-v37-.gtf => grch38_gencode_annotation_-v38-.gtf} (100%) rename t/data/references/{grch38_gencode_transcripts_-v37-.fa => grch38_gencode_transcripts_-v38-.fa} (100%) diff --git a/t/data/references/grch38_gencode_annotation_-v37-.gtf b/t/data/references/grch38_gencode_annotation_-v38-.gtf similarity index 100% rename from t/data/references/grch38_gencode_annotation_-v37-.gtf rename to t/data/references/grch38_gencode_annotation_-v38-.gtf diff --git a/t/data/references/grch38_gencode_transcripts_-v37-.fa b/t/data/references/grch38_gencode_transcripts_-v38-.fa similarity index 100% rename from t/data/references/grch38_gencode_transcripts_-v37-.fa rename to t/data/references/grch38_gencode_transcripts_-v38-.fa diff --git a/templates/grch38_mip_rd_rna_config.yaml b/templates/grch38_mip_rd_rna_config.yaml index 87d349271..eac765bae 100644 --- a/templates/grch38_mip_rd_rna_config.yaml +++ b/templates/grch38_mip_rd_rna_config.yaml @@ -25,8 +25,8 @@ sample_info_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_i ## References human_genome_reference: grch38_homo_sapiens_-gencode_pri-.fasta star_fusion_genome_lib_dir: cluster_constant_path!/references/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play/ctat_genome_lib_build_dir -transcript_annotation: grch38_gencode_annotation_-v37-.gtf -transcript_sequence: grch38_gencode_transcripts_-v37-.fa +transcript_annotation: grch38_gencode_annotation_-v38-.gtf +transcript_sequence: grch38_gencode_transcripts_-v38-.fa gatk_haplotypecaller_snp_known_set: grch38_dbsnp_-146-.vcf.gz gatk_baserecalibration_known_sites: - grch38_1000g_indels_-phase1-.vcf.gz diff --git a/templates/mip_download_rd_rna_config_-1.0-.yaml b/templates/mip_download_rd_rna_config_-1.0-.yaml index 91e51aa21..243f880b7 100644 --- a/templates/mip_download_rd_rna_config_-1.0-.yaml +++ b/templates/mip_download_rd_rna_config_-1.0-.yaml @@ -16,10 +16,11 @@ reference: - gold_standard_dbsnp - 146 gencode_annotation: - - v34 - v37 + - v38 gencode_transcript: - v37 + - v38 human_reference: - decoy_5 - assembly38 @@ -42,16 +43,16 @@ reference_feature: outfile_decompress: gzip url_prefix: ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_37/GRCh37_mapping/ grch38: - v34: - file: gencode.v34.annotation.gtf.gz - outfile: grch38_gencode_annotation_-v34-.gtf.gz - outfile_decompress: gzip - url_prefix: ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_34/ v37: file: gencode.v37.primary_assembly.annotation.gtf.gz outfile: grch38_gencode_annotation_-v37-.gtf.gz outfile_decompress: gzip url_prefix: ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_37/ + v38: + file: gencode.v38.primary_assembly.annotation.gtf.gz + outfile: grch38_gencode_annotation_-v38-.gtf.gz + outfile_decompress: gzip + url_prefix: ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/ gencode_transcript: grch37: v37: @@ -65,6 +66,11 @@ reference_feature: outfile: grch38_gencode_transcripts_-v37-.fa.gz outfile_decompress: gzip url_prefix: ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_37/ + v38: + file: gencode.v38.transcripts.fa.gz + outfile: grch38_gencode_transcripts_-v38-.fa.gz + outfile_decompress: gzip + url_prefix: ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/ ctat_resource_lib: grch37: gencode_v19_ctat_lib_mar012021: From 90a2ae73bb1f6f488de11d7526e379cc12436030 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 8 Sep 2021 10:44:27 +0200 Subject: [PATCH 051/116] Update glnexus exec permission (#1943) --- containers/glnexus/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/glnexus/Dockerfile b/containers/glnexus/Dockerfile index e0f6852fb..26dd59b3a 100644 --- a/containers/glnexus/Dockerfile +++ b/containers/glnexus/Dockerfile @@ -23,4 +23,4 @@ RUN apt-get update && \ apt-get purge && \ rm -rf /var/lib/apt/lists/* && \ wget --no-verbose https://github.com/dnanexus-rnd/GLnexus/releases/download/v1.4.1/glnexus_cli && \ - chmod 700 /app/glnexus_cli + chmod 755 /app/glnexus_cli From 7a3ee8404e758a59d5d26289a8118377b45a8d82 Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Thu, 9 Sep 2021 11:45:19 +0200 Subject: [PATCH 052/116] updated reference to the offline flag, rm'd undef for the offline flag --- lib/MIP/Program/HmtNote.pm | 2 +- lib/MIP/Recipes/Analysis/Mt_annotation.pm | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/MIP/Program/HmtNote.pm b/lib/MIP/Program/HmtNote.pm index be6ab52b1..c43850aa1 100644 --- a/lib/MIP/Program/HmtNote.pm +++ b/lib/MIP/Program/HmtNote.pm @@ -67,7 +67,7 @@ sub hmtnote_annotate { strict_type => 1, }, offline => { - allow => [ undef, 0, 1 ], + allow => [ 0, 1 ], store => \$offline, strict_type => 1, }, diff --git a/lib/MIP/Recipes/Analysis/Mt_annotation.pm b/lib/MIP/Recipes/Analysis/Mt_annotation.pm index ae4979a1c..b3543478d 100644 --- a/lib/MIP/Recipes/Analysis/Mt_annotation.pm +++ b/lib/MIP/Recipes/Analysis/Mt_annotation.pm @@ -211,7 +211,7 @@ sub analysis_mt_annotation { { filehandle => $filehandle, infile_path => $infile_path{$contig}, - offline => $active_parameter_href->{hmtnote_offline}, + offline => $active_parameter_href->{mt_offline}, outfile_path => $outfile_no_suffix, } ); @@ -274,13 +274,13 @@ sub analysis_mt_annotation { submit_recipe( { - base_command => $profile_base_command, - case_id => $case_id, - dependency_method => q{sample_to_case}, - job_id_chain => $recipe{job_id_chain}, - job_id_href => $job_id_href, - job_reservation_name => $active_parameter_href->{job_reservation_name}, - log => $log, + base_command => $profile_base_command, + case_id => $case_id, + dependency_method => q{sample_to_case}, + job_id_chain => $recipe{job_id_chain}, + job_id_href => $job_id_href, + job_reservation_name => $active_parameter_href->{job_reservation_name}, + log => $log, max_parallel_processes_count_href => $file_info_href->{max_parallel_processes_count}, recipe_file_path => $recipe_file_path, From f9bf4bf92d4739bc326da8c93af261c105675daf Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Thu, 9 Sep 2021 12:10:30 +0200 Subject: [PATCH 053/116] fixing git checks --- lib/MIP/Program/HmtNote.pm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/MIP/Program/HmtNote.pm b/lib/MIP/Program/HmtNote.pm index c43850aa1..8629f95d9 100644 --- a/lib/MIP/Program/HmtNote.pm +++ b/lib/MIP/Program/HmtNote.pm @@ -49,13 +49,15 @@ sub hmtnote_annotate { ## Flatten argument(s) my $filehandle; my $infile_path; - my $offline; my $outfile_path; my $stderrfile_path; my $stderrfile_path_append; my $stdinfile_path; my $stdoutfile_path; + ## Default(s) + my $offline; + my $tmpl = { filehandle => { store => \$filehandle, @@ -68,6 +70,7 @@ sub hmtnote_annotate { }, offline => { allow => [ 0, 1 ], + default => 1, store => \$offline, strict_type => 1, }, From 536de3ea864e176fd9bfe25cc25dc627e969ee0a Mon Sep 17 00:00:00 2001 From: Mei Wu Date: Thu, 9 Sep 2021 13:17:06 +0200 Subject: [PATCH 054/116] I hope this is the final fix --- t/analysis_mt_annotation.t | 2 ++ 1 file changed, 2 insertions(+) diff --git a/t/analysis_mt_annotation.t b/t/analysis_mt_annotation.t index 53584c7d3..d7861e1f3 100644 --- a/t/analysis_mt_annotation.t +++ b/t/analysis_mt_annotation.t @@ -51,6 +51,7 @@ test_log( { log_name => q{MIP}, no_screen => 1, } ); ## Given analysis parameters my $recipe_name = q{mt_annotation}; +my $offline = q{mt_offline}; my $slurm_mock_cmd = catfile( $Bin, qw{ data modules slurm-mock.pl } ); my %active_parameter = test_mip_hashes( @@ -60,6 +61,7 @@ my %active_parameter = test_mip_hashes( } ); $active_parameter{$recipe_name} = 1; +$active_parameter{$offline} = 1; $active_parameter{recipe_core_number}{$recipe_name} = 1; $active_parameter{recipe_time}{$recipe_name} = 1; my $case_id = $active_parameter{case_id}; From 923a17458a7fbe254ef8ed086df85bae1ad83a82 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 9 Sep 2021 15:24:09 +0200 Subject: [PATCH 055/116] updates clinvar and loqusdb references --- CHANGELOG.md | 1 + templates/grch38_mip_rd_rna_config.yaml | 2 +- .../mip_download_rd_dna_config_-1.0-.yaml | 36 +++++++++---------- templates/mip_rd_dna_config.yaml | 18 +++++----- templates/mip_rd_dna_vcf_rerun_config.yaml | 4 +-- 5 files changed, 31 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc521a914..05873bd5d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ vep release_103.1 -> release_104.3 - gnomad: r3.0 -> r3.1.1 - [NEW] gnomad mt: r3.1 +- clinvvar: 20210415 -> 20210828 ## [10.2.0] diff --git a/templates/grch38_mip_rd_rna_config.yaml b/templates/grch38_mip_rd_rna_config.yaml index eac765bae..6a4d77c15 100644 --- a/templates/grch38_mip_rd_rna_config.yaml +++ b/templates/grch38_mip_rd_rna_config.yaml @@ -41,4 +41,4 @@ fusion_cytoband_path: /arriba_v2.1.0/database/cytobands_hg38_GRCh38_v2.1.0.tsv fusion_protein_domain_path: /arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3 picardtools_path: /usr/picard qccollect_sampleinfo_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_qc_sample_info.yaml -vep_directory_cache: cluster_constant_path!/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-103/cache/ +vep_directory_cache: cluster_constant_path!/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-104/cache/ diff --git a/templates/mip_download_rd_dna_config_-1.0-.yaml b/templates/mip_download_rd_dna_config_-1.0-.yaml index 45b794dd0..5b3ffbcbd 100644 --- a/templates/mip_download_rd_dna_config_-1.0-.yaml +++ b/templates/mip_download_rd_dna_config_-1.0-.yaml @@ -26,8 +26,8 @@ reference: chromograph_cytoband: - v1.0 clinvar: - - 20200905 - 20210415 + - 20210828 dbnsfp: - 3.5a - 4.0b2a @@ -122,8 +122,8 @@ reference: - 1.0 vcfanno_config: - v0.2 - - v1.15 - v1.16 + - v1.17 vcfanno_functions: - v1.0 reference_feature: @@ -314,15 +314,6 @@ reference_feature: url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/region/ clinvar: grch37: - 20200905: - file: clinvar_20200905.vcf.gz - file_check: clinvar_20200905.vcf.gz.md5 - file_index: clinvar_20200905.vcf.gz.tbi - outfile: grch37_clinvar_-20200905-.vcf.gz - outfile_check: grch37_clinvar_-20200905-.vcf.gz.md5 - outfile_index: grch37_clinvar_-20200905-.vcf.gz.tbi - outfile_check_method: md5sum - url_prefix: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/archive_2.0/2020/ 20210415: file: clinvar_20210415.vcf.gz file_check: clinvar_20210415.vcf.gz.md5 @@ -332,6 +323,15 @@ reference_feature: outfile_index: grch37_clinvar_-20210415-.vcf.gz.tbi outfile_check_method: md5sum url_prefix: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/weekly/ + 20210828: + file: clinvar_20210828.vcf.gz + file_check: clinvar_20210828.vcf.gz.md5 + file_index: clinvar_20210828.vcf.gz.tbi + outfile: grch37_clinvar_-20210828-.vcf.gz + outfile_check: grch37_clinvar_-20210828-.vcf.gz.md5 + outfile_index: grch37_clinvar_-20210828-.vcf.gz.tbi + outfile_check_method: md5sum + url_prefix: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/weekly grch38: 20200905: file: clinvar_20200905.vcf.gz @@ -902,13 +902,6 @@ reference_feature: url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/region/ vcfanno_config: grch37: - v1.15: - file: grch37_vcfanno_config_-v1.15-.toml - file_check: grch37_vcfanno_config_-v1.15-.toml.md5 - outfile: grch37_vcfanno_config_-v1.15-.toml - outfile_check: grch37_vcfanno_config_-v1.15-.toml.md5 - outfile_check_method: md5sum - url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ v1.16: file: grch37_vcfanno_config_-v1.16-.toml file_check: grch37_vcfanno_config_-v1.16-.toml.md5 @@ -916,6 +909,13 @@ reference_feature: outfile_check: grch37_vcfanno_config_-v1.16-.toml.md5 outfile_check_method: md5sum url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ + v1.17: + file: grch37_vcfanno_config_-v1.17-.toml + file_check: grch37_vcfanno_config_-v1.17-.toml.md5 + outfile: grch37_vcfanno_config_-v1.17-.toml + outfile_check: grch37_vcfanno_config_-v1.17-.toml.md5 + outfile_check_method: md5sum + url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/annotation/ grch38: v0.2: file: grch38_vcfanno_config_-v0.2-.toml diff --git a/templates/mip_rd_dna_config.yaml b/templates/mip_rd_dna_config.yaml index 96a0b9f79..efd5d6704 100755 --- a/templates/mip_rd_dna_config.yaml +++ b/templates/mip_rd_dna_config.yaml @@ -31,7 +31,7 @@ sv_rank_model_file: svrank_model_-v1.8-.ini sv_svdb_query_db_files: # FORMAT: filename|OUT_FREQUENCY_INFO_KEY|OUT_ALLELE_COUNT_INFO_KEY|IN_FREQUENCY_INFO_KEY|IN_ALLELE_COUNT_INFO_KEY|USE_IN_FREQUENCY_FILTER grch37_gnomad_reformated_-r2.1.1_sv-.vcf.gz: gnomad_sv|AF|AC|AF|AC|1 - grch37_loqusdb_sv_variants_export-20210416-.vcf: clinical_genomics_loqus|Frq|Obs|Frq|Obs + grch37_loqusdb_sv_variants_export-20210907-.vcf: clinical_genomics_loqus|Frq|Obs|Frq|Obs grch37_mip_sv_svdb_export_-2018-10-09-.vcf: clinical_genomics_mip|AF|OCC|FRQ|OCC|1 grch37_svdb_query_decipher_-v1.0.0-.vcf: decipher|AF|OCC|FRQ|OCC grch37_svdb_query_clingen_cgh_benign_-v1.0.0-.vcf: clingen_cgh_benign @@ -39,7 +39,7 @@ sv_svdb_query_db_files: grch37_svdb_query_clingen_ngi_-v1.0.0-.vcf: clingen_ngi|AF|OCC|FRQ|OCC|1 grch37_swegen_concat_sort_-20170830-.vcf: swegen|AF|OCC|FRQ|OCC|1 vcf2cytosure_blacklist: grch37_cytosure_blacklist_-1.0-.bed -vcfanno_config: grch37_vcfanno_config_-v1.16-.toml +vcfanno_config: grch37_vcfanno_config_-v1.17-.toml ### Analysis ### Programs ## Parameters @@ -70,10 +70,10 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210415-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_-20210828-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT -vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-103/cache/ -vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins +vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ +vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins vep_plugin: dbNSFP: exist_check: @@ -95,15 +95,15 @@ vep_plugin: LoFtool: exist_check: - type: file - path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt + path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt parameters: - - cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt + - cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt MaxEntScan: exist_check: - type: directory - path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/fordownload + path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload parameters: - - cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/fordownload + - cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload - SWA - NCSS SpliceAI: diff --git a/templates/mip_rd_dna_vcf_rerun_config.yaml b/templates/mip_rd_dna_vcf_rerun_config.yaml index 791a86aae..53f17ec55 100644 --- a/templates/mip_rd_dna_vcf_rerun_config.yaml +++ b/templates/mip_rd_dna_vcf_rerun_config.yaml @@ -28,7 +28,7 @@ sv_rank_model_file: svrank_model_-v1.8-.ini # FORMAT: filename|OUT_FREQUENCY_INFO_KEY|OUT_ALLELE_COUNT_INFO_KEY|IN_FREQUENCY_INFO_KEY|IN_ALLELE_COUNT_INFO_KEY|USE_IN_FREQUENCY_FILTER sv_svdb_query_db_files: grch37_gnomad_reformated_-r2.1.1_sv-.vcf.gz: gnomad_sv|AF|AC|AF|AC|1 - grch37_loqusdb_sv_variants_export-20210416-.vcf: clinical_genomics_loqus|Frq|Obs|Frq|Obs + grch37_loqusdb_sv_variants_export-20210907-.vcf: clinical_genomics_loqus|Frq|Obs|Frq|Obs grch37_mip_sv_svdb_export_-2018-10-09-.vcf: clinical_genomics_mip|AF|OCC|FRQ|OCC|1 grch37_svdb_query_decipher_-v1.0.0-.vcf: decipher|AF|OCC|FRQ|OCC grch37_svdb_query_clingen_cgh_benign_-v1.0.0-.vcf: clingen_cgh_benign @@ -36,7 +36,7 @@ sv_svdb_query_db_files: grch37_svdb_query_clingen_ngi_-v1.0.0-.vcf: clingen_ngi|AF|OCC|FRQ|OCC|1 grch37_swegen_concat_sort_-20170830-.vcf: swegen|AF|OCC|FRQ|OCC|1 sv_vcfanno_config: grch37_sv_vcfanno_config_-v1.4-.toml -vcfanno_config: grch37_vcfanno_config_-v1.16-.toml +vcfanno_config: grch37_vcfanno_config_-v1.17-.toml ### Analysis ## Programs ## Parameters From 4c9294d04f1de811892a0127f4c67e335543cccb Mon Sep 17 00:00:00 2001 From: Anders Jemt Date: Thu, 9 Sep 2021 15:30:40 +0200 Subject: [PATCH 056/116] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05873bd5d..d543a3e7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ vep release_103.1 -> release_104.3 - gnomad: r3.0 -> r3.1.1 - [NEW] gnomad mt: r3.1 -- clinvvar: 20210415 -> 20210828 +- clinvar: 20210415 -> 20210828 ## [10.2.0] From 8a361133eb81a06a9b16e890baae54947ebfd221 Mon Sep 17 00:00:00 2001 From: jemten Date: Sun, 12 Sep 2021 11:35:40 +0200 Subject: [PATCH 057/116] feat(singularity): store and run .sif images --- CHANGELOG.md | 1 + definitions/analyse_parameters.yaml | 2 +- definitions/download_parameters.yaml | 4 +- definitions/dragen_rd_dna_parameters.yaml | 2 +- definitions/install_parameters.yaml | 13 + definitions/rd_dna_panel_parameters.yaml | 2 +- definitions/rd_dna_parameters.yaml | 2 +- definitions/rd_dna_vcf_rerun_parameters.yaml | 2 +- definitions/rd_rna_parameters.yaml | 2 +- lib/MIP/Active_parameter.pm | 6 +- lib/MIP/Cli/Mip/Analyse.pm | 2 +- lib/MIP/Cli/Mip/Download.pm | 2 +- lib/MIP/Cli/Mip/Install.pm | 21 ++ lib/MIP/Config.pm | 16 +- lib/MIP/Environment/Container.pm | 246 ++++++++++++++++- lib/MIP/Main/Install.pm | 1 - lib/MIP/Parameter.pm | 18 +- lib/MIP/Program/Docker.pm | 72 ++++- lib/MIP/Recipes/Install/Container.pm | 37 ++- .../test_data/install_active_parameters.yaml | 6 +- .../test_data/miptest_container_config.yaml | 257 ++++++++++++++++++ t/docker_pull.t | 105 +++++++ t/get_install_containers.t | 17 +- t/mip_analyse_dragen_rd_dna.test | 8 +- t/mip_analyse_rd_dna.test | 8 +- t/mip_analyse_rd_dna_panel.test | 8 +- t/mip_analyse_rd_dna_vcf_rerun.test | 8 +- t/mip_analyse_rd_rna.test | 8 +- t/mip_install.test | 24 ++ t/parse_containers.t | 7 +- t/set_executable_container_cmd.t | 8 +- 31 files changed, 830 insertions(+), 85 deletions(-) create mode 100644 t/data/test_data/miptest_container_config.yaml create mode 100644 t/docker_pull.t diff --git a/CHANGELOG.md b/CHANGELOG.md index dc521a914..6c1dc4dae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Updating to latest and greatest versions - Mitochondrial deletion analysis - GATK Haplotypecaller has been turned off in favour of Deepvariant +- Introduces possibility to store singularity images locally as a .sif file ### Tools diff --git a/definitions/analyse_parameters.yaml b/definitions/analyse_parameters.yaml index 8c383b542..b1e5ae1b5 100644 --- a/definitions/analyse_parameters.yaml +++ b/definitions/analyse_parameters.yaml @@ -84,7 +84,7 @@ case_id: - mip data_type: SCALAR type: mip -install_config_file: +container_config_file: associated_recipe: - mip data_type: SCALAR diff --git a/definitions/download_parameters.yaml b/definitions/download_parameters.yaml index 9564958d9..cfcb2bbcf 100644 --- a/definitions/download_parameters.yaml +++ b/definitions/download_parameters.yaml @@ -124,7 +124,7 @@ custom_default_parameters: - mip data_type: ARRAY default: - - install_config_file + - container_config_file - reference_dir - temp_directory type: mip @@ -281,7 +281,7 @@ human_reference: program_executables: - samtools type: recipe -install_config_file: +container_config_file: associated_recipe: - mip data_type: SCALAR diff --git a/definitions/dragen_rd_dna_parameters.yaml b/definitions/dragen_rd_dna_parameters.yaml index ae8a1258b..b67228fba 100755 --- a/definitions/dragen_rd_dna_parameters.yaml +++ b/definitions/dragen_rd_dna_parameters.yaml @@ -8,7 +8,7 @@ custom_default_parameters: - conda_path - exome_target_bed - infile_dirs - - install_config_file + - container_config_file - pedigree_fam_file - picardtools_path - reference_dir diff --git a/definitions/install_parameters.yaml b/definitions/install_parameters.yaml index 59b8493aa..7aaa11c18 100644 --- a/definitions/install_parameters.yaml +++ b/definitions/install_parameters.yaml @@ -147,12 +147,25 @@ container: data_type: HASH mandatory: no type: mip +container_directory_path: + associated_recipe: + - mip + data_type: SCALAR + mandatory: no + type: path + update_path: absolute_path select_programs: associated_recipe: - mip data_type: ARRAY mandatory: no type: mip +singularity_local_install: + associated_recipe: + - mip + data_type: SCALAR + mandatory: no + type: mip skip_programs: associated_recipe: - mip diff --git a/definitions/rd_dna_panel_parameters.yaml b/definitions/rd_dna_panel_parameters.yaml index 37cd9d20e..b662e9c92 100755 --- a/definitions/rd_dna_panel_parameters.yaml +++ b/definitions/rd_dna_panel_parameters.yaml @@ -10,7 +10,7 @@ custom_default_parameters: - bwa_mem2_build_reference - exome_target_bed - infile_dirs - - install_config_file + - container_config_file - pedigree_fam_file - picardtools_path - qccollect_store_metrics_outfile diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index 26889a192..086729033 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -10,7 +10,7 @@ custom_default_parameters: - bwa_mem2_build_reference - exome_target_bed - infile_dirs - - install_config_file + - container_config_file - pedigree_fam_file - picardtools_path - qccollect_store_metrics_outfile diff --git a/definitions/rd_dna_vcf_rerun_parameters.yaml b/definitions/rd_dna_vcf_rerun_parameters.yaml index a06b2fbae..a449fa8f1 100755 --- a/definitions/rd_dna_vcf_rerun_parameters.yaml +++ b/definitions/rd_dna_vcf_rerun_parameters.yaml @@ -8,7 +8,7 @@ custom_default_parameters: - conda_path - exome_target_bed - infile_dirs - - install_config_file + - container_config_file - pedigree_fam_file - picardtools_path - reference_dir diff --git a/definitions/rd_rna_parameters.yaml b/definitions/rd_rna_parameters.yaml index 3a16adab8..72cb2dc53 100755 --- a/definitions/rd_rna_parameters.yaml +++ b/definitions/rd_rna_parameters.yaml @@ -8,7 +8,7 @@ custom_default_parameters: - conda_path - fusion_select_file - infile_dirs - - install_config_file + - container_config_file - pedigree_fam_file - picardtools_path - qccollect_store_metrics_outfile diff --git a/lib/MIP/Active_parameter.pm b/lib/MIP/Active_parameter.pm index 6c3cb8ea7..342eb93eb 100644 --- a/lib/MIP/Active_parameter.pm +++ b/lib/MIP/Active_parameter.pm @@ -50,9 +50,9 @@ BEGIN { remove_sample_id_from_gender set_conda_paths set_default_analysis_type + set_default_container_config_file set_default_human_genome set_default_infile_dirs - set_default_install_config_file set_default_parameter set_default_pedigree_fam_file set_default_program_test_file @@ -2215,7 +2215,7 @@ sub set_include_y { return; } -sub set_default_install_config_file { +sub set_default_container_config_file { ## Function : Set default install config file to active parameters ## Returns : @@ -2242,7 +2242,7 @@ sub set_default_install_config_file { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; ## Build default for mip install config - my $path = catfile( $Bin, qw{ templates mip_install_config.yaml } ); + my $path = catfile( $Bin, qw{ templates mip_container_config.yaml } ); $active_parameter_href->{$parameter_name} = $path; return; diff --git a/lib/MIP/Cli/Mip/Analyse.pm b/lib/MIP/Cli/Mip/Analyse.pm index 4a24a85bf..2beea32d9 100644 --- a/lib/MIP/Cli/Mip/Analyse.pm +++ b/lib/MIP/Cli/Mip/Analyse.pm @@ -155,7 +155,7 @@ q{Check analysis output and sets the analysis run status flag to finished in sam ); option( - q{install_config_file} => ( + q{container_config_file} => ( documentation => q{File with install configuration parameters in YAML format}, is => q{rw}, isa => Str, diff --git a/lib/MIP/Cli/Mip/Download.pm b/lib/MIP/Cli/Mip/Download.pm index 466201ccc..38126fb91 100644 --- a/lib/MIP/Cli/Mip/Download.pm +++ b/lib/MIP/Cli/Mip/Download.pm @@ -148,7 +148,7 @@ sub _build_usage { ); option( - q{install_config_file} => ( + q{container_config_file} => ( documentation => q{File with install configuration parameters in YAML format}, is => q{rw}, isa => Str, diff --git a/lib/MIP/Cli/Mip/Install.pm b/lib/MIP/Cli/Mip/Install.pm index c605712b7..2d52c80ba 100644 --- a/lib/MIP/Cli/Mip/Install.pm +++ b/lib/MIP/Cli/Mip/Install.pm @@ -78,6 +78,16 @@ sub _build_usage { ) ); + option( + q{container_directory_path} => ( + cmd_tags => [q{Default: "//bin"}], + documentation => + q{Save singularity images to directory. Requires singularity_local_install option}, + is => q{rw}, + isa => Str, + ) + ); + option( q{environment_name} => ( cmd_aliases => [qw{ envn }], @@ -143,6 +153,17 @@ sub _build_usage { ), ); + option( + q{singularity_local_install} => ( + cmd_tags => [q{Default: off}], + documentation => +q{Save singularity images to sif and update run instructions for offline mip execution}, + is => q{rw}, + isa => Bool, + required => 0, + ), + ); + option( q{skip_programs} => ( cmd_aliases => [qw{ skip }], diff --git a/lib/MIP/Config.pm b/lib/MIP/Config.pm index 1f7ea8fb6..fb134d308 100644 --- a/lib/MIP/Config.pm +++ b/lib/MIP/Config.pm @@ -94,32 +94,32 @@ sub check_cmd_config_vs_definition_file { sub get_install_containers { ## Function : Get install containers from install config file -## Returns : $install_config{container} -## Arguments: $install_config_file => File with containers from install config +## Returns : $container_config +## Arguments: $container_config_file => File with containers from install config my ($arg_href) = @_; ## Flatten argument(s) - my $install_config_file; + my $container_config_file; my $tmpl = { - install_config_file => { + container_config_file => { defined => 1, required => 1, - store => \$install_config_file, + store => \$container_config_file, strict_type => 1, }, }; check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; - my %install_config = read_from_file( + my %container_config = read_from_file( { format => q{yaml}, - path => $install_config_file, + path => $container_config_file, } ); - return %{ $install_config{container} }; + return %container_config; } sub parse_config { diff --git a/lib/MIP/Environment/Container.pm b/lib/MIP/Environment/Container.pm index 1bd8285b2..39c203625 100644 --- a/lib/MIP/Environment/Container.pm +++ b/lib/MIP/Environment/Container.pm @@ -4,7 +4,9 @@ use 5.026; use Carp; use charnames qw{ :full :short }; use English qw{ -no_match_vars }; -use File::Spec::Functions qw{ catfile }; +use File::Basename qw{ fileparse }; +use File::Path qw{ make_path }; +use File::Spec::Functions qw{ catfile catdir }; use open qw{ :encoding(UTF-8) :std }; use Params::Check qw{ allow check last_error }; use utf8; @@ -26,8 +28,11 @@ BEGIN { our @EXPORT_OK = qw{ build_container_cmd get_recipe_executable_bind_path + parse_container_config + parse_container_path parse_container_uri parse_containers + pull_container run_container set_executable_container_cmd }; @@ -280,7 +285,7 @@ sub parse_containers { %{ $active_parameter_href->{container} } = get_install_containers( - { install_config_file => $active_parameter_href->{install_config_file}, } ); + { container_config_file => $active_parameter_href->{container_config_file}, } ); my %dynamic_parameter = ( reference_dir => $active_parameter_href->{reference_dir}, ); update_with_dynamic_config_parameters( @@ -304,6 +309,142 @@ sub parse_containers { return 1; } +sub parse_container_config { + +## Function : Parse container config and write to conda env +## Returns : +## Arguments: $conda_environment_path => Conda environment path +## : $container_href => Container hash {REF} + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $conda_environment_path; + my $container_href; + + my $tmpl = { + conda_environment_path => { + store => \$conda_environment_path, + strict_type => 1, + }, + container_href => { + default => {}, + store => \$container_href, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + use MIP::Config qw{ get_install_containers }; + use MIP::Io::Write qw{ write_to_file }; + + ## Replace the uri with path + IMAGE: + foreach my $image ( keys %{$container_href} ) { + + if ( $container_href->{$image}{path} ) { + + $container_href->{$image}{uri} = delete $container_href->{$image}{path}; + } + } + + ## Make directory if it doesn't exist + my $container_config_dir_path = catdir( $conda_environment_path, qw{ bin templates } ); + + make_path($container_config_dir_path); + + ## Fetch possible old config + my $container_config_path = catfile( $container_config_dir_path, q{mip_container_config.yaml} ); + my %container_config = ( not -e $container_config_path ) ? () : get_install_containers( + { + container_config_file => $container_config_path, + } + ); + + ## Merge hashes + %container_config = ( %container_config, %{$container_href} ); + + write_to_file( + { + data_href => \%container_config, + format => q{yaml}, + path => $container_config_path, + } + ); + return; +} + +sub parse_container_path { + +## Function : Parse container download path +## Returns : +## Arguments: $conda_environment_path => Conda environment path +## : $container_directory_path => Container directory path +## : $container_href => Container hash {REF} +## : $container_manager => Container manager +## : $local_install => Local install switch + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $conda_environment_path; + my $container_directory_path; + my $container_href; + my $container_manager; + my $local_install; + + my $tmpl = { + conda_environment_path => { + store => \$conda_environment_path, + strict_type => 1, + }, + container_directory_path => { + store => \$container_directory_path, + strict_type => 1, + }, + container_href => { + default => {}, + store => \$container_href, + strict_type => 1, + }, + container_manager => { + allow => [qw{ docker singularity }], + required => 1, + store => \$container_manager, + strict_type => 1, + }, + local_install => { + allow => [ undef, 0, 1 ], + store => \$local_install, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + ## Retrieve logger object + my $log = Log::Log4perl->get_logger($LOG_NAME); + + ## Don't set variable for docker and unless local singularity installation + return if $container_manager eq q{docker}; + return if not $local_install; + + ## Check for user input and default + $container_directory_path = $container_directory_path + // catdir( $conda_environment_path, q{bin} ); + + make_path($container_directory_path); + + ## Set container outpath + $container_href->{path} = + catfile( $container_directory_path, fileparse( $container_href->{uri} ) . q{.sif} ); + + $log->info( q{Saving image to} . $COLON . $SPACE . $container_href->{path} ); + + return; +} + sub parse_container_uri { ## Function : Parse container uri for selected container manager @@ -345,6 +486,107 @@ sub parse_container_uri { return; } +sub pull_container { + +## Function : Pull a docker or singularity container +## Returns : @commands +## Arguments: $container_manager => Container manager +## : $container_path => Path to container +## : $filehandle => Filehandle to write to +## : $stderrfile_path_append => Append stderr info to file path +## : $stdinfile_path => Stdinfile path +## : $stdoutfile_path => Stdoutfile path + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $container_manager; + my $container_uri; + my $container_outpath; + my $filehandle; + my $stderrfile_path; + my $stderrfile_path_append; + my $stdoutfile_path; + + ## Default(s) + my $force; + + my $tmpl = { + container_manager => { + allow => [qw{ docker singularity }], + required => 1, + store => \$container_manager, + strict_type => 1, + }, + container_outpath => { + store => \$container_outpath, + strict_type => 1, + }, + container_uri => { + defined => 1, + required => 1, + store => \$container_uri, + strict_type => 1, + }, + filehandle => { + store => \$filehandle, + }, + force => { + allow => [ undef, 0, 1 ], + default => 1, + store => \$force, + strict_type => 1, + }, + stderrfile_path => { + store => \$stderrfile_path, + strict_type => 1, + }, + stderrfile_path_append => { + store => \$stderrfile_path_append, + strict_type => 1, + }, + stdoutfile_path => { + store => \$stdoutfile_path, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + use MIP::Program::Singularity qw{ singularity_pull }; + use MIP::Program::Docker qw{ docker_pull }; + + my %container_api = ( + docker => { + arg_href => { + filehandle => $filehandle, + image => $container_uri, + stderrfile_path => $stderrfile_path, + stderrfile_path_append => $stderrfile_path_append, + stdoutfile_path => $stdoutfile_path, + }, + method => \&docker_pull, + }, + singularity => { + arg_href => { + container_uri => $container_uri, + filehandle => $filehandle, + force => $force, + outfile_path => $container_outpath, + stderrfile_path => $stderrfile_path, + stderrfile_path_append => $stderrfile_path_append, + stdoutfile_path => $stdoutfile_path, + }, + method => \&singularity_pull, + }, + ); + + my @commands = $container_api{$container_manager}{method} + ->( { %{ $container_api{$container_manager}{arg_href} } } ); + + return @commands; +} + sub run_container { ## Function : Run a docker container or exec a singularity image diff --git a/lib/MIP/Main/Install.pm b/lib/MIP/Main/Install.pm index 06ec5fed0..5df8b70bc 100644 --- a/lib/MIP/Main/Install.pm +++ b/lib/MIP/Main/Install.pm @@ -15,7 +15,6 @@ use warnings; use MIP::Active_parameter qw{ set_conda_paths update_to_absolute_path }; use MIP::Config qw{ check_cmd_config_vs_definition_file set_config_to_active_parameters }; use MIP::Constants qw{ $COLON $MIP_VERSION $SPACE }; -use MIP::Environment::Container qw{ parse_containers }; use MIP::Io::Read qw{ read_from_file }; use MIP::Log::MIP_log4perl qw{ get_log }; use MIP::Parameter qw{ set_default }; diff --git a/lib/MIP/Parameter.pm b/lib/MIP/Parameter.pm index cc30d0ac9..6f90763ff 100644 --- a/lib/MIP/Parameter.pm +++ b/lib/MIP/Parameter.pm @@ -901,15 +901,15 @@ sub set_custom_default_to_active_parameter { use MIP::Active_parameter qw{ set_default_analysis_type + set_default_container_config_file set_default_human_genome set_default_infile_dirs - set_default_install_config_file set_default_pedigree_fam_file set_default_program_test_file + set_default_qccollect_store_metrics_outfile set_default_reference_dir set_default_reference_info_file set_default_store_file - set_default_qccollect_store_metrics_outfile set_default_temp_directory set_default_transcript_annotation set_default_uninitialized_parameter @@ -938,6 +938,13 @@ sub set_custom_default_to_active_parameter { parameter_name => $parameter_name, }, }, + container_config_file => { + method => \&set_default_container_config_file, + arg_href => { + active_parameter_href => $active_parameter_href, + parameter_name => $parameter_name, + }, + }, exome_target_bed => { method => \&_set_default_capture_kit, arg_href => { @@ -958,13 +965,6 @@ sub set_custom_default_to_active_parameter { active_parameter_href => $active_parameter_href, }, }, - install_config_file => { - method => \&set_default_install_config_file, - arg_href => { - active_parameter_href => $active_parameter_href, - parameter_name => $parameter_name, - }, - }, pedigree_fam_file => { method => \&set_default_pedigree_fam_file, arg_href => { diff --git a/lib/MIP/Program/Docker.pm b/lib/MIP/Program/Docker.pm index 61f764c1d..7273e11ad 100644 --- a/lib/MIP/Program/Docker.pm +++ b/lib/MIP/Program/Docker.pm @@ -23,7 +23,77 @@ BEGIN { use base qw{ Exporter }; # Functions and variables which can be optionally exported - our @EXPORT_OK = qw{ docker_run }; + our @EXPORT_OK = qw{ docker_pull docker_run }; +} + +sub docker_pull { + +## Function : Perl wrapper for pulling docker images Based on Docker 19.03.8 +## Returns : @commands +## Arguments: $filehandle => Filehandle to write to +## : $image => Image to pull +## : $stderrfile_path => Stderrfile path +## : $stderrfile_path_append => Append stderr info to file path +## : $stdoutfile_path => Stdoutfile path + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $filehandle; + my $image; + my $stderrfile_path; + my $stderrfile_path_append; + my $stdoutfile_path; + + my $tmpl = { + filehandle => { + store => \$filehandle, + }, + image => { + defined => 1, + required => 1, + store => \$image, + strict_type => 1, + }, + stderrfile_path => { + store => \$stderrfile_path, + strict_type => 1, + }, + stderrfile_path_append => { + store => \$stderrfile_path_append, + strict_type => 1, + }, + stdoutfile_path => { + store => \$stdoutfile_path, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + ## Stores commands depending on input parameters + my @commands = qw{ docker pull }; + + push @commands, $image; + + push @commands, + unix_standard_streams( + { + stderrfile_path => $stderrfile_path, + stderrfile_path_append => $stderrfile_path_append, + stdoutfile_path => $stdoutfile_path, + } + ); + + unix_write_to_file( + { + commands_ref => \@commands, + filehandle => $filehandle, + separator => $SPACE, + + } + ); + return @commands; } sub docker_run { diff --git a/lib/MIP/Recipes/Install/Container.pm b/lib/MIP/Recipes/Install/Container.pm index f1cdcf142..5f08660c1 100644 --- a/lib/MIP/Recipes/Install/Container.pm +++ b/lib/MIP/Recipes/Install/Container.pm @@ -55,7 +55,8 @@ sub install_containers { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; use MIP::Environment::Child_process qw{ child_process }; - use MIP::Environment::Container qw{ parse_container_uri run_container }; + use MIP::Environment::Container + qw{ parse_container_config parse_container_path parse_container_uri pull_container }; use MIP::Recipes::Install::Vep qw{ install_vep }; ## Retrieve logger object @@ -70,7 +71,7 @@ sub install_containers { CONTAINER: foreach my $container ( keys %{$container_href} ) { - $log->info( q{Caching image} . $COLON . $SPACE . $container ); + $log->info( q{Pulling image} . $COLON . $SPACE . $container ); parse_container_uri( { @@ -79,14 +80,23 @@ sub install_containers { } ); + parse_container_path( + { + conda_environment_path => $active_parameter_href->{conda_environment_path}, + container_directory_path => $active_parameter_href->{container_directory_path}, + container_href => $container_href->{$container}, + container_manager => $CONTAINER_MANAGER, + local_install => $active_parameter_href->{singularity_local_install}, + } + ); + ## Get command for caching image - my @container_commands = run_container( + my @container_commands = pull_container( { - container_cmds_ref => [qw{ ls }], - container_path => $container_href->{$container}{uri}, - container_manager => $CONTAINER_MANAGER, - stderrfile_path => $stderr_file_path, - stdoutfile_path => devnull(), + container_uri => $container_href->{$container}{uri}, + container_manager => $CONTAINER_MANAGER, + container_outpath => $container_href->{$container}{path}, + stderrfile_path => $stderr_file_path, } ); @@ -99,7 +109,7 @@ sub install_containers { if ( not $process_return{success} ) { - $log->fatal(qq{$CONTAINER_MANAGER failed to cache $container}); + $log->fatal(qq{$CONTAINER_MANAGER failed to pull $container}); $log->logdie( $process_return{error_message} ); } @@ -114,6 +124,15 @@ sub install_containers { ); } } + + ## Parse and write container config to MIP env + parse_container_config( + { + container_href => $container_href, + conda_environment_path => $active_parameter_href->{conda_environment_path}, + } + ); + return 1; } diff --git a/t/data/test_data/install_active_parameters.yaml b/t/data/test_data/install_active_parameters.yaml index 9d9d79184..4847c7b0e 100644 --- a/t/data/test_data/install_active_parameters.yaml +++ b/t/data/test_data/install_active_parameters.yaml @@ -240,8 +240,8 @@ custom_default_parameters: - program_test_file - select_programs - skip_programs -environment_name: mip_install_test -log_file: /MIP/mip_install_test.log +environment_name: mip_ci +log_file: /MIP/mip_ci.log mip: 1 pipelines: - rd_dna @@ -310,7 +310,7 @@ rd_rna: - ucsc - utilities - vep -reference_dir: /Users/andersjemt/aj/MIP/t/data/mip_install_test +reference_dir: /MIP/t/data/mip_ci sbatch_mode: '0' select_programs: - bedtools diff --git a/t/data/test_data/miptest_container_config.yaml b/t/data/test_data/miptest_container_config.yaml new file mode 100644 index 000000000..e4af0e00c --- /dev/null +++ b/t/data/test_data/miptest_container_config.yaml @@ -0,0 +1,257 @@ +--- +arriba: + bind_path: + arriba: "reference_dir!/a_dir:opt/conda/share/a_dir" + executable: + arriba: /arriba_v2.1.0/arriba + draw_fusions.R: /arriba_v2.1.0/draw_fusions.R + uri: docker.io/uhrigs/arriba:2.1.0 +bedtools: + executable: + bedtools: ~ + uri: docker.io/clinicalgenomics/bedtools:2.30.0 +blobfish: + executable: + BlobFish.py: ~ + uri: docker.io/clinicalgenomics/blobfish:0.0.2 +bootstrapann: + executable: + BootstrapAnn.py: ~ + uri: docker.io/clinicalgenomics/bootstrapann:df02f35 +bwa: + executable: + bwa: ~ + uri: docker.io/clinicalgenomics/bwa:0.7.17 +bwa-mem2: + executable: + bwa-mem2: ~ + uri: docker.io/clinicalgenomics/bwa-mem2:2.2.1 +bwakit: + executable: + bwakit: no_executable_in_image + run-bwamem: ~ + uri: docker.io/jemten/bwakit:0.7.17 +cadd: + bind_path: + CADD.sh: reference_dir!/CADD-scripts/data/annotations:/opt/conda/share/CADD-scripts/data/annotations + executable: + CADD.sh: ~ + uri: docker.io/clinicalgenomics/cadd:1.6 +chanjo: + executable: + chanjo: ~ + uri: docker.io/clinicalgenomics/chanjo:4.6 +chromograph: + executable: + chromograph: ~ + uri: docker.io/clinicalgenomics/chromograph:1.1 +cnvnator: + executable: + cnvnator: ~ + cnvnator2VCF.pl: ~ + uri: docker.io/clinicalgenomics/cnvnator:0.4.1 +cyrius: + executable: + star_caller.py: ~ + uri: docker.io/clinicalgenomics/cyrius:v1.1.1 +deeptrio: + executable: + run_deeptrio: /opt/deepvariant/bin/deeptrio/run_deeptrio + gpu_support: '1' + uri: docker.io/google/deepvariant:deeptrio-1.2.0-gpu +deepvariant: + executable: + call_variants: /opt/deepvariant/bin/call_variants + make_examples: /opt/deepvariant/bin/make_examples + postprocess_variants: /opt/deepvariant/bin/postprocess_variants + run_deepvariant: /opt/deepvariant/bin/run_deepvariant + gpu_support: '1' + uri: docker.io/google/deepvariant:1.2.0-gpu +delly: + executable: + delly: ~ + uri: docker.io/clinicalgenomics/delly:0.8.7 +expansionhunter: + executable: + ExpansionHunter: ~ + uri: docker.io/clinicalgenomics/expansionhunter:4.0.2 +fastqc: + executable: + fastqc: ~ + uri: docker.io/clinicalgenomics/fastqc:0.11.9 +gatk: + executable: + gatk3: no_executable_in_image + uri: docker.io/broadinstitute/gatk3:3.8-1 +gatk4: + executable: + gatk: ~ + uri: docker.io/broadinstitute/gatk:4.2.2.0 +genmod: + executable: + genmod: ~ + uri: docker.io/clinicalgenomics/genmod:3.7.3 +gffcompare: + executable: + gffcompare: ~ + uri: docker.io/clinicalgenomics/gffcompare:0.11.2 +glnexus: + executable: + glnexus_cli: ~ + uri: docker.io/clinicalgenomics/glnexus:v1.4.1 +hmtnote: + executable: + hmtnote: ~ + uri: docker.io/clinicalgenomics/hmtnote:0.7.2 +htslib: + executable: + bcftools: ~ + bgzip: ~ + samtools: ~ + tabix: ~ + uri: docker.io/clinicalgenomics/htslib:1.13 +manta: + executable: + configManta.py: ~ + runWorkflow.py: no_executable_in_image + uri: docker.io/clinicalgenomics/manta:1.6.0 +megafusion: + executable: + MegaFusion.py: python /opt/conda/share/MegaFusion/MegaFusion.py + uri: docker.io/clinicalgenomics/megafusion:66a3a80 +mip: + executable: + mip: ~ + uri: docker.io/clinicalgenomics/mip:v10.2.0 +multiqc: + executable: + multiqc: ~ + uri: docker.io/ewels/multiqc:v1.11 +pdfmerger: + executable: + pdfmerger: ~ + uri: docker.io/northwestwitch/pdfmerger:v1.0 +peddy: + executable: + peddy: python -m peddy + uri: docker.io/clinicalgenomics/peddy:0.4.3 +perl: + executable: + perl: ~ + uri: docker.io/clinicalgenomics/perl:5.26 +picard: + executable: + picard: no_executable_in_image + uri: docker.io/broadinstitute/picard:2.25.0 +plink: + executable: + plink2: ~ + uri: docker.io/clinicalgenomics/plink:1.90b3.35 +preseq: + executable: + preseq: ~ + uri: docker.io/clinicalgenomics/preseq:3.1.2 +rhocall: + executable: + rhocall: ~ + uri: docker.io/clinicalgenomics/rhocall:0.5.1 +rseqc: + executable: + bam2wig.py: ~ + bam_stat.py: ~ + geneBody_coverage2.py: ~ + infer_experiment.py: ~ + inner_distance.py: ~ + junction_annotation.py: ~ + junction_saturation.py: ~ + read_distribution.py: ~ + read_duplication.py: ~ + uri: docker.io/clinicalgenomics/rseqc:4.0.0 +rtg-tools: + executable: + rtg: ~ + uri: docker.io/realtimegenomics/rtg-tools:3.12 +salmon: + executable: + salmon: ~ + uri: docker.io/combinelab/salmon:1.4.0 +sambamba: + executable: + sambamba: ~ + uri: docker.io/clinicalgenomics/sambamba:0.6.8 +smncopynumbercaller: + executable: + smn_caller.py: ~ + uri: docker.io/clinicalgenomics/smncopynumbercaller:v1.1.1 +star: + executable: + STAR: ~ + uri: docker.io/clinicalgenomics/star:2.7.8a +star-fusion: + executable: + STAR-Fusion: /usr/local/src/STAR-Fusion/STAR-Fusion + blastn: ~ + gtf_file_to_feature_seqs.pl: /usr/local/src/STAR-Fusion/ctat-genome-lib-builder/util/gtf_file_to_feature_seqs.pl + hmmpress: ~ + make_super_locus.pl: /usr/local/src/STAR-Fusion/ctat-genome-lib-builder/util/make_super_locus.pl + makeblastdb: ~ + prep_genome_lib.pl: /usr/local/src/STAR-Fusion/ctat-genome-lib-builder/prep_genome_lib.pl + remove_long_intron_readthru_transcripts.pl: /usr/local/src/STAR-Fusion/ctat-genome-lib-builder/util/remove_long_intron_readthru_transcripts.pl + restrict_genome_to_chr_entries.pl: /usr/local/src/STAR-Fusion/ctat-genome-lib-builder/util/restrict_genome_to_chr_entries.pl + uri: docker.io/trinityctat/starfusion:1.10.1 +stranger: + executable: + stranger: ~ + uri: docker.io/clinicalgenomics/stranger:0.8.0 +stringtie: + executable: + stringtie: ~ + uri: docker.io/clinicalgenomics/stringtie:2.1.3b +svdb: + executable: + svdb: ~ + uri: quay.io/biocontainers/svdb:2.4.0--py37h77a2a36_4 +telomerecat: + executable: + telomerecat: ~ + uri: quay.io/wtsicgp/telomerecat:3.4.0 +tiddit: + executable: + TIDDIT.py: ~ + uri: docker.io/clinicalgenomics/tiddit:2.12.1 +trim-galore: + executable: + trim_galore: ~ + uri: docker.io/clinicalgenomics/trim_galore:0.6.4 +ucsc: + executable: + bedToBigBed: ~ + gtfToGenePred: ~ + wigToBigWig: ~ + uri: docker.io/clinicalgenomics/ucsc:377 +upd: + executable: + upd: ~ + uri: docker.io/clinicalgenomics/upd:0.1.1 +utilities: + executable: + gtf2bed: ~ + pigz: ~ + uri: docker.io/clinicalgenomics/utilities:latest +varg: + executable: + varg: ~ + uri: docker.io/clinicalgenomics/varg:1.6.11 +vcf2cytosure: + executable: + vcf2cytosure: ~ + uri: docker.io/jemten/vcf2cytosure:0.5.1 +vcfanno: + executable: + vcfanno: ~ + uri: docker.io/clinicalgenomics/vcfanno:0.3.2 +vep: + executable: + vep: ~ + uri: docker.io/ensemblorg/ensembl-vep:release_104.3 + diff --git a/t/docker_pull.t b/t/docker_pull.t new file mode 100644 index 000000000..c0bb54704 --- /dev/null +++ b/t/docker_pull.t @@ -0,0 +1,105 @@ +#!/usr/bin/env perl + +use 5.026; +use Carp; +use charnames qw{ :full :short }; +use English qw{ -no_match_vars }; +use File::Basename qw{ dirname }; +use File::Spec::Functions qw{ catdir }; +use FindBin qw{ $Bin }; +use open qw{ :encoding(UTF-8) :std }; +use Params::Check qw{ allow check last_error }; +use Test::More; +use utf8; +use warnings qw{ FATAL utf8 }; + +## CPANM +use autodie qw{ :all }; +use Modern::Perl qw{ 2018 }; +use Readonly; + +## MIPs lib/ +use lib catdir( dirname($Bin), q{lib} ); +use MIP::Constants qw{ $COLON $COMMA $SPACE }; +use MIP::Test::Commands qw{ test_function }; + +BEGIN { + + use MIP::Test::Fixtures qw{ test_import }; + +### Check all internal dependency modules and imports +## Modules with import + my %perl_module = ( q{MIP::Program::Docker} => [qw{ docker_pull }], ); + + test_import( { perl_module_href => \%perl_module, } ); +} + +use MIP::Program::Docker qw{ docker_pull }; + +diag( q{Test docker_pull from Docker.pm} + . $COMMA + . $SPACE . q{Perl} + . $SPACE + . $PERL_VERSION + . $SPACE + . $EXECUTABLE_NAME ); + +## Base arguments +my @function_base_commands = qw{ docker pull }; + +my %base_argument = ( + filehandle => { + input => undef, + expected_output => \@function_base_commands, + }, + stderrfile_path => { + input => q{stderrfile.test}, + expected_output => q{2> stderrfile.test}, + }, + stderrfile_path_append => { + input => q{stderrfile.test}, + expected_output => q{2>> stderrfile.test}, + }, + stdoutfile_path => { + input => q{stdoutfile.test}, + expected_output => q{1> stdoutfile.test}, + }, +); + +## Can be duplicated with %base_argument and/or %specific_argument +## to enable testing of each individual argument +my %required_argument = ( + image => { + input => q{docker.io/clinicalgenomics/bwa:0.7.17}, + expected_output => q{docker.io/clinicalgenomics/bwa:0.7.17}, + }, +); + +my %specific_argument = ( + image => { + input => q{docker.io/clinicalgenomics/bwa:0.7.17}, + expected_output => q{docker.io/clinicalgenomics/bwa:0.7.17}, + }, +); + +## Coderef - enables generalized use of generate call +my $module_function_cref = \&docker_pull; + +## Test both base and function specific arguments +my @arguments = ( \%base_argument, \%specific_argument ); + +ARGUMENT_HASH_REF: +foreach my $argument_href (@arguments) { + + my @commands = test_function( + { + argument_href => $argument_href, + do_test_base_command => 1, + function_base_commands_ref => \@function_base_commands, + module_function_cref => $module_function_cref, + required_argument_href => \%required_argument, + } + ); +} + +done_testing(); diff --git a/t/get_install_containers.t b/t/get_install_containers.t index 7aaec424d..13cde345e 100644 --- a/t/get_install_containers.t +++ b/t/get_install_containers.t @@ -21,16 +21,13 @@ use Modern::Perl qw{ 2018 }; use lib catdir( dirname($Bin), q{lib} ); use MIP::Constants qw{ $COMMA $SPACE }; - BEGIN { use MIP::Test::Fixtures qw{ test_import }; ### Check all internal dependency modules and imports ## Modules with import - my %perl_module = ( - q{MIP::Config} => [qw{ get_install_containers }], -); + my %perl_module = ( q{MIP::Config} => [qw{ get_install_containers }], ); test_import( { perl_module_href => \%perl_module, } ); } @@ -46,12 +43,10 @@ diag( q{Test get_install_containers from Config.pm} . $EXECUTABLE_NAME ); ## Given an installation config -my $install_config_path = - catfile( $Bin, qw{ data test_data install_active_parameters.yaml } ); +my $container_config_path = catfile( $Bin, qw{ data test_data miptest_container_config.yaml } ); ## When getting containers from install config file -my %container = - get_install_containers( { install_config_file => $install_config_path, } ); +my %container = get_install_containers( { container_config_file => $container_config_path, } ); ## Then return container info from installation config my %expected_container = ( @@ -60,10 +55,10 @@ my %expected_container = ( arriba => q{reference_dir!/a_dir:opt/conda/share/a_dir} }, executable => { - arriba => q{/arriba_v1.2.0/arriba}, - q{draw_fusions.R} => q{/arriba_v1.2.0/draw_fusions.R}, + arriba => q{/arriba_v2.1.0/arriba}, + q{draw_fusions.R} => q{/arriba_v2.1.0/draw_fusions.R}, }, - uri => q{docker.io/uhrigs/arriba:1.2.0}, + uri => q{docker.io/uhrigs/arriba:2.1.0}, }, ); diff --git a/t/mip_analyse_dragen_rd_dna.test b/t/mip_analyse_dragen_rd_dna.test index 4ed00f156..e14351467 100644 --- a/t/mip_analyse_dragen_rd_dna.test +++ b/t/mip_analyse_dragen_rd_dna.test @@ -35,8 +35,8 @@ my $VERBOSE = 1; my $conda_path = catdir( dirname($Bin), qw{ t data modules miniconda} ); my $cluster_constant_path = catdir( dirname($Bin), qw{ t data} ); my $config_file = catfile( dirname($Bin), qw{ templates mip_dragen_rd_dna_config.yaml } ); -my $mip_install_config = - catfile( dirname($Bin), qw{ t data test_data miptest_install_config.yaml } ); +my $mip_container_config = + catfile( dirname($Bin), qw{ t data test_data miptest_container_config.yaml } ); my $mip_path = catfile( dirname($Bin), q{mip} ); my $test_reference_path = catdir( $cluster_constant_path, q{references} ); my $toml_template_path = @@ -92,8 +92,8 @@ my $cmds_ref = [ qw{ analyse dragen_rd_dna 643594-miptest }, q{--config}, $config_file, - q{--install_config}, - $mip_install_config, + q{--container_config}, + $mip_container_config, q{--conda_path}, $conda_path, q{--cluster_constant_path}, diff --git a/t/mip_analyse_rd_dna.test b/t/mip_analyse_rd_dna.test index c2592d948..d18078a6b 100644 --- a/t/mip_analyse_rd_dna.test +++ b/t/mip_analyse_rd_dna.test @@ -37,8 +37,8 @@ my $cluster_constant_path = catdir( dirname($Bin), qw{ t data} ); my $config_file = catfile( dirname($Bin), qw{ templates mip_rd_dna_config.yaml } ); my $mip_path = catfile( dirname($Bin), q{mip} ); my $test_reference_path = catdir( $cluster_constant_path, q{references} ); -my $mip_install_config = - catfile( dirname($Bin), qw{ t data test_data miptest_install_config.yaml } ); +my $mip_container_config = + catfile( dirname($Bin), qw{ t data test_data miptest_container_config.yaml } ); my %pedigree = ( wes => catfile( $cluster_constant_path, qw{ test_data pedigree_wes.yaml } ), @@ -115,8 +115,8 @@ my @cmds = ( q{perl}, $mip_path, qw{ analyse rd_dna 643594-miptest }, - q{--install_config}, - $mip_install_config, + q{--container_config}, + $mip_container_config, q{--conda_path}, $conda_path, q{--cluster_constant_path}, diff --git a/t/mip_analyse_rd_dna_panel.test b/t/mip_analyse_rd_dna_panel.test index 4221d2e77..d38adb309 100644 --- a/t/mip_analyse_rd_dna_panel.test +++ b/t/mip_analyse_rd_dna_panel.test @@ -35,8 +35,8 @@ my $VERBOSE = 1; my $conda_path = catdir( dirname($Bin), qw{ t data modules miniconda} ); my $cluster_constant_path = catdir( dirname($Bin), qw{ t data} ); my $config_file = catfile( dirname($Bin), qw{ templates mip_rd_dna_panel_config.yaml } ); -my $mip_install_config = - catfile( dirname($Bin), qw{ t data test_data miptest_install_config.yaml } ); +my $mip_container_config = + catfile( dirname($Bin), qw{ t data test_data miptest_container_config.yaml } ); my $mip_path = catfile( dirname($Bin), q{mip} ); my $test_reference_path = catdir( $cluster_constant_path, q{references} ); my $toml_template_path = @@ -92,8 +92,8 @@ my $cmds_ref = [ qw{ analyse rd_dna_panel 643594-miptest }, q{--config}, $config_file, - q{--install_config}, - $mip_install_config, + q{--container_config}, + $mip_container_config, q{--conda_path}, $conda_path, q{--cluster_constant_path}, diff --git a/t/mip_analyse_rd_dna_vcf_rerun.test b/t/mip_analyse_rd_dna_vcf_rerun.test index 4cfe3e32d..cb6f21701 100644 --- a/t/mip_analyse_rd_dna_vcf_rerun.test +++ b/t/mip_analyse_rd_dna_vcf_rerun.test @@ -35,8 +35,8 @@ my $VERBOSE = 1; my $conda_path = catdir( dirname($Bin), qw{ t data modules miniconda} ); my $cluster_constant_path = catdir( dirname($Bin), qw{ t data} ); my $config_file = catfile( dirname($Bin), qw{ templates mip_rd_dna_vcf_rerun_config.yaml } ); -my $mip_install_config = - catfile( dirname($Bin), qw{ t data test_data miptest_install_config.yaml } ); +my $mip_container_config = + catfile( dirname($Bin), qw{ t data test_data miptest_container_config.yaml } ); my $mip_path = catfile( dirname($Bin), q{mip} ); my $test_reference_path = catdir( $cluster_constant_path, q{references} ); my $toml_template_path = @@ -92,8 +92,8 @@ my $cmds_ref = [ qw{ analyse rd_dna_vcf_rerun 643594-miptest }, q{--config}, $config_file, - q{--install_config}, - $mip_install_config, + q{--container_config}, + $mip_container_config, q{--conda_path}, $conda_path, q{--cluster_constant_path}, diff --git a/t/mip_analyse_rd_rna.test b/t/mip_analyse_rd_rna.test index 3a7dd9d6e..c6d0feb3c 100644 --- a/t/mip_analyse_rd_rna.test +++ b/t/mip_analyse_rd_rna.test @@ -33,8 +33,8 @@ my $VERBOSE = 1; ## Set paths my $conda_path = catdir( dirname($Bin), qw{ t data modules miniconda} ); my $cluster_constant_path = catdir( dirname($Bin), qw{ t data } ); -my $mip_install_config = - catfile( dirname($Bin), qw{ t data test_data miptest_install_config.yaml } ); +my $mip_container_config = + catfile( dirname($Bin), qw{ t data test_data miptest_container_config.yaml } ); my $mip_path = catfile( dirname($Bin), q{mip} ); my $dna_vcf_file = catfile( dirname($Bin), qw{ t data test_data 643594-miptest_sorted_md_brecal_comb_BOTH.bcf } ); @@ -79,8 +79,8 @@ my @base_cmds = ( q{perl}, $mip_path, qw{ analyse rd_rna 118-rnatest }, - q{--install_config}, - $mip_install_config, + q{--container_config}, + $mip_container_config, q{--conda_path}, $conda_path, q{--cluster_constant_path}, diff --git a/t/mip_install.test b/t/mip_install.test index 22a0ea598..8c107e38e 100644 --- a/t/mip_install.test +++ b/t/mip_install.test @@ -1,5 +1,6 @@ #!/usr/bin/env perl +use 5.026; use Carp; use open qw{ :encoding(UTF-8) :std }; use charnames qw{ :full :short }; @@ -154,6 +155,29 @@ my %process_return = child_process( ok( $process_return{success}, q{Executed install} ); +## When given option to pull singularity images for local installation +push @{$cmds_ref}, q{--singularity_local_install}; +%process_return = child_process( + { + commands_ref => $cmds_ref, + process_type => q{ipc_cmd_run}, + verbose => $verbose, + } +); +## Then return OK +ok( $process_return{success}, q{Executed local singularity install} ); + +## When given option to pull images using docker +push @{$cmds_ref}, qw{ --container_manager docker }; +%process_return = child_process( + { + commands_ref => $cmds_ref, + process_type => q{ipc_cmd_run}, + verbose => $verbose, + } +); +## Then return OK +ok( $process_return{success}, q{Executed docker install} ); ## Remove paths path($log_file_path)->remove; path( catdir( $conda_path, q{envs}, $environment_name ) )->remove_tree; diff --git a/t/parse_containers.t b/t/parse_containers.t index 3182efd62..a83b77aee 100644 --- a/t/parse_containers.t +++ b/t/parse_containers.t @@ -59,12 +59,11 @@ my %parameter = test_mip_hashes( ); ## Given an installation config -my $install_config_path = - catfile( $Bin, qw{ data test_data install_active_parameters.yaml } ); +my $container_config_path = catfile( $Bin, qw{ data test_data miptest_container_config.yaml } ); ## Given a container manager and an install config -$active_parameter{install_config_file} = $install_config_path; -$active_parameter{container_manager} = q{singularity}; +$active_parameter{container_config_file} = $container_config_path; +$active_parameter{container_manager} = q{singularity}; ## When parsing containers my $is_ok = parse_containers( diff --git a/t/set_executable_container_cmd.t b/t/set_executable_container_cmd.t index a97fa52ec..0b3b9ec07 100644 --- a/t/set_executable_container_cmd.t +++ b/t/set_executable_container_cmd.t @@ -61,9 +61,9 @@ my %parameter = test_mip_hashes( ); ## Given an installation config -my $install_config_path = catfile( $Bin, qw{ data test_data install_active_parameters.yaml } ); +my $container_config_path = catfile( $Bin, qw{ data test_data miptest_container_config.yaml } ); $active_parameter{container} = - { get_install_containers( { install_config_file => $install_config_path, } ) }; + { get_install_containers( { container_config_file => $container_config_path, } ) }; ## Given a container manager and some bind paths my $container_manager = q{singularity}; @@ -79,7 +79,7 @@ my %container_cmd = set_executable_container_cmd( ); my $expected_arriba_cmd = -q{singularity exec --bind reference_dir!/a_dir:opt/conda/share/a_dir docker://docker.io/uhrigs/arriba:1.2.0 /arriba_v1.2.0/arriba}; +q{singularity exec --bind reference_dir!/a_dir:opt/conda/share/a_dir docker://docker.io/uhrigs/arriba:2.1.0 /arriba_v2.1.0/arriba}; ## Then return command for how to execute arriba using singularity is( $container_cmd{arriba}, $expected_arriba_cmd, q{Set singularity cmd for executable} ); @@ -98,7 +98,7 @@ $container_manager = q{docker}; ); my $expected_arriba_docker_cmd = -q{docker run --volume reference_dir!/a_dir:opt/conda/share/a_dir --rm --entrypoint "" docker://docker.io/uhrigs/arriba:1.2.0 /arriba_v1.2.0/arriba}; +q{docker run --volume reference_dir!/a_dir:opt/conda/share/a_dir --rm --entrypoint "" docker://docker.io/uhrigs/arriba:2.1.0 /arriba_v2.1.0/arriba}; ## Then return command for how to execute arriba using docker is( $container_cmd{arriba}, $expected_arriba_docker_cmd, q{Set docker cmd for executable} ); From b591dd5b4021f6721293bfce830c0c646a4b2141 Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 13 Sep 2021 08:52:10 +0200 Subject: [PATCH 058/116] fixing download test --- lib/MIP/Cli/Mip/Download.pm | 8 ++++++++ t/mip_download.test | 9 +++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/lib/MIP/Cli/Mip/Download.pm b/lib/MIP/Cli/Mip/Download.pm index 38126fb91..6284073de 100644 --- a/lib/MIP/Cli/Mip/Download.pm +++ b/lib/MIP/Cli/Mip/Download.pm @@ -101,6 +101,14 @@ sub _build_usage { ) ); + option( + q{container_config_file} => ( + documentation => q{File with install configuration parameters in YAML format}, + is => q{rw}, + isa => Str, + ) + ); + option( q{core_ram_memory} => ( cmd_tags => [q{Default: 5}], diff --git a/t/mip_download.test b/t/mip_download.test index 7886aabc0..d21f2d000 100644 --- a/t/mip_download.test +++ b/t/mip_download.test @@ -38,6 +38,8 @@ my $config_files_ref = [ catfile( dirname($Bin), qw{ templates mip_download_rd_rna_config_-1.0-.yaml } ) ]; my $mip_path = catfile( dirname($Bin), q{mip} ); +my $mip_container_config = + catfile( dirname($Bin), qw{ t data test_data miptest_container_config.yaml } ); ### User Options GetOptions( @@ -76,8 +78,11 @@ ok( can_run(q{mip}), q{Checking can run mip} ); foreach my $config_file ( @{$config_files_ref} ) { my $cmds_ref = [ - q{perl}, $mip_path, q{download}, q{--config}, - $config_file, q{--conda_path}, $conda_path, qw{ --dra }, + q{perl}, $mip_path, + q{download}, q{--config}, + $config_file, q{--conda_path}, + $conda_path, qw{ --dra }, + q{--container_config}, $mip_container_config, ]; my %process_return = child_process( From 164250147483d3b4299cfb8964068f49d50da7a9 Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 13 Sep 2021 10:36:20 +0200 Subject: [PATCH 059/116] fix analysis test --- t/data/test_data/miptest_container_config.yaml | 1 - t/mip_analyse_rd_dna.test | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/t/data/test_data/miptest_container_config.yaml b/t/data/test_data/miptest_container_config.yaml index e4af0e00c..4a0f0c8e4 100644 --- a/t/data/test_data/miptest_container_config.yaml +++ b/t/data/test_data/miptest_container_config.yaml @@ -105,7 +105,6 @@ hmtnote: uri: docker.io/clinicalgenomics/hmtnote:0.7.2 htslib: executable: - bcftools: ~ bgzip: ~ samtools: ~ tabix: ~ diff --git a/t/mip_analyse_rd_dna.test b/t/mip_analyse_rd_dna.test index d18078a6b..c5520780b 100644 --- a/t/mip_analyse_rd_dna.test +++ b/t/mip_analyse_rd_dna.test @@ -162,7 +162,7 @@ while ( my ( $pedigree, $pedigree_path ) = each %pedigree ) { } ## Clean-up -remove_tree( catfile( $cluster_constant_path, qw{ 643594-miptest analysis } ) ); +#remove_tree( catfile( $cluster_constant_path, qw{ 643594-miptest analysis } ) ); GENOME_BUILD: foreach my $genome_build (@genome_builds) { From debe4058a1400a7bd8d0376384d1e6243edc5aca Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 13 Sep 2021 16:49:15 +0200 Subject: [PATCH 060/116] adds check for installed sif images --- lib/MIP/Environment/Container.pm | 69 +++++++++ lib/MIP/File/Path.pm | 7 +- lib/MIP/Validate/Data.pm | 1 + t/check_installed_containers.t | 134 ++++++++++++++++++ .../envs/mip_ci/bin/fastqc:0.11.9.sif | 0 .../envs/mip_ci/bin/multiqc:v1.11.sif | 0 t/install_container.t | 12 +- t/mip_analyse_rd_dna.test | 2 +- 8 files changed, 215 insertions(+), 10 deletions(-) create mode 100755 t/check_installed_containers.t create mode 100755 t/data/modules/miniconda/envs/mip_ci/bin/fastqc:0.11.9.sif create mode 100644 t/data/modules/miniconda/envs/mip_ci/bin/multiqc:v1.11.sif diff --git a/lib/MIP/Environment/Container.pm b/lib/MIP/Environment/Container.pm index 39c203625..58fac364b 100644 --- a/lib/MIP/Environment/Container.pm +++ b/lib/MIP/Environment/Container.pm @@ -27,6 +27,7 @@ BEGIN { # Functions and variables which can be optionally exported our @EXPORT_OK = qw{ build_container_cmd + check_installed_containers get_recipe_executable_bind_path parse_container_config parse_container_path @@ -247,6 +248,68 @@ sub get_recipe_executable_bind_path { return %recipe_executable_bind_path; } +sub check_installed_containers { + +## Function : Parse containers to set executable command based on current container manager +## Returns : +## Arguments: $container_href => Map of containers {REF} + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $container_href; + + my $tmpl = { + container_href => { + default => {}, + defined => 1, + required => 1, + store => \$container_href, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + use MIP::File::Path qw{ check_filesystem_objects_existance }; + use MIP::Validate::Data qw{ %constraint }; + + ## Retrieve logger object + my $log = Log::Log4perl->get_logger($LOG_NAME); + + my @error_messages; + + CONTAINER: + while ( my ( $container, $container_param_href ) = each %{$container_href} ) { + + ## Only run check for .sif file + next CONTAINER if ( not $constraint{is_sif}->( $container_param_href->{uri} ) ); + + my ( $ok, $error_message ) = check_filesystem_objects_existance( + { + object_name => $container_param_href->{uri}, + parameter_name => $container, + object_type => q{executable_file}, + } + ); + + next CONTAINER if $ok; + + push @error_messages, $error_message; + } + + return 1 if ( @error_messages == 0 ); + + ERROR_MESSAGE: + foreach my $error_message (@error_messages) { + + $log->fatal($error_message); + } + + $log->fatal(q{Please install missing image files}); + exit 1; +} + sub parse_containers { ## Function : Parse containers to set executable command based on current container manager @@ -287,6 +350,12 @@ sub parse_containers { get_install_containers( { container_config_file => $active_parameter_href->{container_config_file}, } ); + check_installed_containers( + { + container_href => $active_parameter_href->{container}, + } + ); + my %dynamic_parameter = ( reference_dir => $active_parameter_href->{reference_dir}, ); update_with_dynamic_config_parameters( { diff --git a/lib/MIP/File/Path.pm b/lib/MIP/File/Path.pm index d1727c43d..15999ebab 100644 --- a/lib/MIP/File/Path.pm +++ b/lib/MIP/File/Path.pm @@ -115,7 +115,7 @@ sub check_filesystem_objects_existance { strict_type => 1, }, object_type => { - allow => [qw{ directory file }], + allow => [qw{ directory executable_file file }], defined => 1, required => 1, store => \$object_type, @@ -128,8 +128,9 @@ sub check_filesystem_objects_existance { use MIP::Validate::Data qw{ %constraint }; my %exists_constraint_map = ( - directory => q{dir_exists}, - file => q{plain_file_exists}, + directory => q{dir_exists}, + file => q{plain_file_exists}, + executable_file => q{file_is_executable}, ); my $constraint = $exists_constraint_map{$object_type}; diff --git a/lib/MIP/Validate/Data.pm b/lib/MIP/Validate/Data.pm index f5e5410a5..b9f2f6a95 100644 --- a/lib/MIP/Validate/Data.pm +++ b/lib/MIP/Validate/Data.pm @@ -28,6 +28,7 @@ our %constraint = ( dir_exists => sub { return 1 if ( -d $_[0] ); return; }, is_digit => sub { return 1 if ( $_[0] =~ / \A \d+ \z /sxm ); return; }, is_gzipped => sub { return 1 if ( $_[0] =~ / [.]gz$ /xms ); return 0; }, + is_sif => sub { return 1 if ( $_[0] =~ / [.]sif \z /xms ); return 0; }, plain_file_exists => sub { return 1 if ( -f $_[0] ); return; }, ); diff --git a/t/check_installed_containers.t b/t/check_installed_containers.t new file mode 100755 index 000000000..75cbc0bb6 --- /dev/null +++ b/t/check_installed_containers.t @@ -0,0 +1,134 @@ +#! /usr/bin/env perl + +use 5.026; +use Carp; +use charnames qw{ :full :short }; +use English qw{ -no_match_vars }; +use File::Basename qw{ dirname }; +use File::Spec::Functions qw{ catdir catfile }; +use FindBin qw{ $Bin }; +use open qw{ :encoding(UTF-8) :std }; +use Params::Check qw{ allow check last_error }; +use Test::More; +use utf8; +use warnings qw{ FATAL utf8 }; + +## CPANM +use autodie qw { :all }; +use Modern::Perl qw{ 2018 }; +use Test::Trap; + +## MIPs lib/ +use lib catdir( dirname($Bin), q{lib} ); +use MIP::Constants qw{ $COMMA $SPACE }; +use MIP::Test::Fixtures qw{ test_log }; + +BEGIN { + + use MIP::Test::Fixtures qw{ test_import }; + +### Check all internal dependency modules and imports +## Modules with import + my %perl_module = ( + q{MIP::Environment::Container} => [qw{ check_installed_containers }], + q{MIP::Test::Fixtures} => [qw{ test_log }], + ); + + test_import( { perl_module_href => \%perl_module, } ); +} + +use MIP::Environment::Container qw{ check_installed_containers }; + +diag( q{Test check_installed_containers from Container.pm} + . $COMMA + . $SPACE . q{Perl} + . $SPACE + . $PERL_VERSION + . $SPACE + . $EXECUTABLE_NAME ); + +test_log( { no_screen => 0, } ); + +## Given docker hub address and executable container +my %container = ( + fastqc => { + executable => { + fastqc => undef, + }, + uri => catfile( + dirname($Bin), qw{ t data modules miniconda envs mip_ci bin fastqc:0.11.9.sif } + ), + }, + htslib => { + executable => { + bgzip => undef, + samtools => undef, + tabix => undef, + }, + uri => q{docker.io/clinicalgenomics/htslib:1.13}, + }, +); + +## Then everything is OK +my $ok = check_installed_containers( + { + container_href => \%container, + } +); +ok( $ok, q{Sif's exists and are executable} ); + +## Given missing executable +%container = ( + manta => { + executable => { + q{configManta.py} => undef, + q{runWorkflow.py} => q{no_executable_in_image}, + }, + uri => catfile(qw{ missing file manta:1.6.0.sif }), + }, +); + +## Then Croak with error message +trap { + check_installed_containers( + { + container_href => \%container, + } + ) +}; + +ok( $trap->exit, q{Exit if sif cannot be found} ); +like( + $trap->stderr, + qr/Could\snot\sfind\sintended\smanta\sexecutable_file/xms, + q{Print log message for missing sif} +); + +## Given non executable sif +%container = ( + multiqc => { + executable => { + multiqc => undef, + }, + uri => + catfile( dirname($Bin), qw{t data modules miniconda envs mip_ci bin multiqc:v1.11.sif } ), + }, +); + +## Then Croak with error message +trap { + check_installed_containers( + { + container_href => \%container, + } + ) +}; + +ok( $trap->exit, q{Exit if sif isn't executable} ); +like( + $trap->stderr, + qr/Could\snot\sfind\sintended\smultiqc\sexecutable_file/xms, + q{Print log message for non executable sif} +); + +done_testing(); diff --git a/t/data/modules/miniconda/envs/mip_ci/bin/fastqc:0.11.9.sif b/t/data/modules/miniconda/envs/mip_ci/bin/fastqc:0.11.9.sif new file mode 100755 index 000000000..e69de29bb diff --git a/t/data/modules/miniconda/envs/mip_ci/bin/multiqc:v1.11.sif b/t/data/modules/miniconda/envs/mip_ci/bin/multiqc:v1.11.sif new file mode 100644 index 000000000..e69de29bb diff --git a/t/install_container.t b/t/install_container.t index 40e715553..8566ed8f9 100644 --- a/t/install_container.t +++ b/t/install_container.t @@ -6,6 +6,7 @@ use charnames qw{ :full :short }; use English qw{ -no_match_vars }; use File::Basename qw{ dirname }; use File::Spec::Functions qw{ catdir catfile }; +use File::Temp qw{ tempdir }; use FindBin qw{ $Bin }; use open qw{ :encoding(UTF-8) :std }; use Params::Check qw{ allow check last_error }; @@ -32,8 +33,7 @@ BEGIN { ## Modules with import my %perl_module = ( q{MIP::Recipes::Install::Container} => [qw{ install_containers }], - q{MIP::Test::Fixtures} => - [qw{ test_constants test_log test_mip_hashes }], + q{MIP::Test::Fixtures} => [qw{ test_constants test_log test_mip_hashes }], ); test_import( { perl_module_href => \%perl_module, } ); @@ -55,10 +55,10 @@ test_log( { no_screen => 1, } ); test_constants( {} ); ## Given install parameters -my %active_parameter = - test_mip_hashes( { mip_hash_name => q{install_active_parameter}, } ); -$active_parameter{reference_dir} = catdir( $test_dir, qw{ a dir } ); -$active_parameter{container_manager} = q{docker}; +my %active_parameter = test_mip_hashes( { mip_hash_name => q{install_active_parameter}, } ); +$active_parameter{reference_dir} = catdir( $test_dir, qw{ a dir } ); +$active_parameter{container_manager} = q{docker}; +$active_parameter{conda_environment_path} = tempdir( CLEANUP => 1 ); my $is_ok = install_containers( { diff --git a/t/mip_analyse_rd_dna.test b/t/mip_analyse_rd_dna.test index c5520780b..d18078a6b 100644 --- a/t/mip_analyse_rd_dna.test +++ b/t/mip_analyse_rd_dna.test @@ -162,7 +162,7 @@ while ( my ( $pedigree, $pedigree_path ) = each %pedigree ) { } ## Clean-up -#remove_tree( catfile( $cluster_constant_path, qw{ 643594-miptest analysis } ) ); +remove_tree( catfile( $cluster_constant_path, qw{ 643594-miptest analysis } ) ); GENOME_BUILD: foreach my $genome_build (@genome_builds) { From ea3661311413b0ab213f32ccefb9aad104d34c66 Mon Sep 17 00:00:00 2001 From: Anders Jemt Date: Tue, 14 Sep 2021 10:33:01 +0200 Subject: [PATCH 061/116] Apply suggestions from code review Co-authored-by: Henrik Stranneheim --- t/check_installed_containers.t | 2 +- t/mip_install.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/t/check_installed_containers.t b/t/check_installed_containers.t index 75cbc0bb6..babca3970 100755 --- a/t/check_installed_containers.t +++ b/t/check_installed_containers.t @@ -69,7 +69,7 @@ my %container = ( }, ); -## Then everything is OK +## Then sif's exists and are executable my $ok = check_installed_containers( { container_href => \%container, diff --git a/t/mip_install.test b/t/mip_install.test index 8c107e38e..0744e2760 100644 --- a/t/mip_install.test +++ b/t/mip_install.test @@ -164,7 +164,7 @@ push @{$cmds_ref}, q{--singularity_local_install}; verbose => $verbose, } ); -## Then return OK +## Then return true ok( $process_return{success}, q{Executed local singularity install} ); ## When given option to pull images using docker From 4414e6720d40b8e71c08b23cc48d1ee8e39011ec Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 14 Sep 2021 10:34:42 +0200 Subject: [PATCH 062/116] Apply suggestions from code review --- lib/MIP/Environment/Container.pm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/MIP/Environment/Container.pm b/lib/MIP/Environment/Container.pm index 58fac364b..1d01ca87a 100644 --- a/lib/MIP/Environment/Container.pm +++ b/lib/MIP/Environment/Container.pm @@ -285,15 +285,15 @@ sub check_installed_containers { ## Only run check for .sif file next CONTAINER if ( not $constraint{is_sif}->( $container_param_href->{uri} ) ); - my ( $ok, $error_message ) = check_filesystem_objects_existance( + my ( $is_ok, $error_message ) = check_filesystem_objects_existance( { object_name => $container_param_href->{uri}, - parameter_name => $container, object_type => q{executable_file}, + parameter_name => $container, } ); - next CONTAINER if $ok; + next CONTAINER if $is_ok; push @error_messages, $error_message; } From f62b2489eb018296ea01494d4311872fd2279ead Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 14 Sep 2021 18:57:11 +0200 Subject: [PATCH 063/116] changing to non gpu image off deepvariant --- lib/MIP/Environment/Container.pm | 4 ++++ templates/mip_install_config.yaml | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/MIP/Environment/Container.pm b/lib/MIP/Environment/Container.pm index 1d01ca87a..9b289093f 100644 --- a/lib/MIP/Environment/Container.pm +++ b/lib/MIP/Environment/Container.pm @@ -408,6 +408,9 @@ sub parse_container_config { use MIP::Config qw{ get_install_containers }; use MIP::Io::Write qw{ write_to_file }; + ## Retrieve logger object + my $log = Log::Log4perl->get_logger($LOG_NAME); + ## Replace the uri with path IMAGE: foreach my $image ( keys %{$container_href} ) { @@ -434,6 +437,7 @@ sub parse_container_config { ## Merge hashes %container_config = ( %container_config, %{$container_href} ); + $log->info( q{Writing container config to} . $COLON . $SPACE . $container_config_path ); write_to_file( { data_href => \%container_config, diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index 09553a1ee..ab2b11a41 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -59,13 +59,13 @@ container: make_examples: /opt/deepvariant/bin/make_examples call_variants: /opt/deepvariant/bin/call_variants postprocess_variants: /opt/deepvariant/bin/postprocess_variants - gpu_support: 1 - uri: docker.io/google/deepvariant:1.2.0-gpu + gpu_support: 0 + uri: docker.io/google/deepvariant:1.2.0 deeptrio: executable: run_deeptrio: /opt/deepvariant/bin/deeptrio/run_deeptrio - gpu_support: 1 - uri: docker.io/google/deepvariant:deeptrio-1.2.0-gpu + gpu_support: 0 + uri: docker.io/google/deepvariant:deeptrio-1.2.0 delly: executable: delly: From 2bc397cf655aa675b3a2d53f89362e76fa642d5f Mon Sep 17 00:00:00 2001 From: jemten Date: Fri, 17 Sep 2021 16:36:03 +0200 Subject: [PATCH 064/116] turning of mt filtering for sv --- definitions/dragen_rd_dna_parameters.yaml | 10 + definitions/rd_dna_parameters.yaml | 10 + definitions/rd_dna_vcf_rerun_parameters.yaml | 10 + lib/MIP/Cli/Mip/Analyse/Dragen_rd_dna.pm | 10 +- lib/MIP/Cli/Mip/Analyse/Rd_dna.pm | 8 + lib/MIP/Cli/Mip/Analyse/Rd_dna_vcf_rerun.pm | 10 +- lib/MIP/Program/Bcftools.pm | 56 +++++- lib/MIP/Recipes/Analysis/Sv_annotate.pm | 196 ++++++++++--------- t/analysis_sv_annotate.t | 18 +- t/bcftools_base.t | 4 + t/bcftools_view.t | 9 +- templates/grch38_mip_rd_dna_config.yaml | 5 + templates/mip_rd_dna_config.yaml | 5 + templates/mip_rd_dna_vcf_rerun_config.yaml | 5 + 14 files changed, 247 insertions(+), 109 deletions(-) diff --git a/definitions/dragen_rd_dna_parameters.yaml b/definitions/dragen_rd_dna_parameters.yaml index ae8a1258b..42456a0ef 100755 --- a/definitions/dragen_rd_dna_parameters.yaml +++ b/definitions/dragen_rd_dna_parameters.yaml @@ -202,6 +202,16 @@ sv_fqa_annotations: - GNOMADAF - GNOMADAF_POPMAX type: recipe_argument +sv_fqa_filters: + associated_recipe: + - sv_annotate + data_type: ARRAY + default: + - GNOMADAF + - GNOMADAF_POPMAX + - gnomad_sv + - clinical_genomics_loqus + type: recipe_argument sv_bcftools_view_filter: associated_recipe: - sv_annotate diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index 26889a192..02c36f242 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -913,6 +913,16 @@ sv_fqa_annotations: - GNOMADAF - GNOMADAF_POPMAX type: recipe_argument +sv_fqa_filters: + associated_recipe: + - sv_annotate + data_type: ARRAY + default: + - GNOMADAF + - GNOMADAF_POPMAX + - gnomad_sv + - clinical_genomics_loqus + type: recipe_argument sv_bcftools_view_filter: associated_recipe: - sv_annotate diff --git a/definitions/rd_dna_vcf_rerun_parameters.yaml b/definitions/rd_dna_vcf_rerun_parameters.yaml index a06b2fbae..d5b21958c 100755 --- a/definitions/rd_dna_vcf_rerun_parameters.yaml +++ b/definitions/rd_dna_vcf_rerun_parameters.yaml @@ -170,6 +170,16 @@ sv_fqa_annotations: - GNOMADAF - GNOMADAF_POPMAX type: recipe_argument +sv_fqa_filters: + associated_recipe: + - sv_annotate + data_type: ARRAY + default: + - GNOMADAF + - GNOMADAF_POPMAX + - gnomad_sv + - clinical_genomics_loqus + type: recipe_argument sv_bcftools_view_filter: associated_recipe: - sv_annotate diff --git a/lib/MIP/Cli/Mip/Analyse/Dragen_rd_dna.pm b/lib/MIP/Cli/Mip/Analyse/Dragen_rd_dna.pm index 143fcccae..4feaa58da 100644 --- a/lib/MIP/Cli/Mip/Analyse/Dragen_rd_dna.pm +++ b/lib/MIP/Cli/Mip/Analyse/Dragen_rd_dna.pm @@ -258,7 +258,15 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v option( q{sv_fqa_annotations} => ( - documentation => q{Frequency annotations to use when filtering }, + documentation => q{Frequency annotations to use}, + is => q{rw}, + isa => ArrayRef, + ) + ); + + option( + q{sv_fqa_filters} => ( + documentation => q{Frequency annotations to use when filtering}, is => q{rw}, isa => ArrayRef, ) diff --git a/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm b/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm index 97b31e119..2d25b7104 100644 --- a/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm +++ b/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm @@ -799,6 +799,14 @@ q{Default: grch37_dbsnp_-138-.vcf, grch37_1000g_indels_-phase1-.vcf, grch37_mill option( q{sv_fqa_annotations} => ( + documentation => q{Frequency annotations to use }, + is => q{rw}, + isa => ArrayRef, + ) + ); + + option( + q{sv_fqa_filter} => ( documentation => q{Frequency annotations to use when filtering }, is => q{rw}, isa => ArrayRef, diff --git a/lib/MIP/Cli/Mip/Analyse/Rd_dna_vcf_rerun.pm b/lib/MIP/Cli/Mip/Analyse/Rd_dna_vcf_rerun.pm index c23453210..3a01b1fea 100644 --- a/lib/MIP/Cli/Mip/Analyse/Rd_dna_vcf_rerun.pm +++ b/lib/MIP/Cli/Mip/Analyse/Rd_dna_vcf_rerun.pm @@ -213,7 +213,15 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v option( q{sv_fqa_annotations} => ( - documentation => q{Frequency annotations to use when filtering }, + documentation => q{Frequency annotations to use}, + is => q{rw}, + isa => ArrayRef, + ) + ); + + option( + q{sv_fqa_filter} => ( + documentation => q{Frequency annotations to use when filtering}, is => q{rw}, isa => ArrayRef, ) diff --git a/lib/MIP/Program/Bcftools.pm b/lib/MIP/Program/Bcftools.pm index c27efb22f..78616eb80 100644 --- a/lib/MIP/Program/Bcftools.pm +++ b/lib/MIP/Program/Bcftools.pm @@ -210,6 +210,7 @@ sub bcftools_base { ## : $regions_ref => Regions to process {REF} ## : $samples_file_path => File of samples to annotate ## : $samples_ref => Samples to include or exclude if prefixed with "^" +## : $targets => Select target. Logical complement can be requested with "^" prefix ## : $threads => Extra compression threds in addition to main thread my ($arg_href) = @_; @@ -223,6 +224,7 @@ sub bcftools_base { my $regions_ref; my $samples_file_path; my $samples_ref; + my $targets; my $threads; my $tmpl = { @@ -261,6 +263,10 @@ sub bcftools_base { store => \$samples_ref, strict_type => 1, }, + targets => { + store => \$targets, + strict_type => 1, + }, threads => { allow => [ undef, qr{\A \d+ \z}xms ], store => \$threads, @@ -298,6 +304,11 @@ sub bcftools_base { push @commands, q{--output-type} . $SPACE . $output_type; } + if ($targets) { + + push @commands, q{--targets} . $SPACE . $targets; + } + if ($threads) { push @commands, q{--threads} . $SPACE . $threads; @@ -579,7 +590,7 @@ sub bcftools_concat { sub bcftools_filter { -## Function : Perl wrapper for writing bcftools filter recipe to $filehandle or return commands array. Based on bcftools 1.6. +## Function : Perl wrapper for writing bcftools filter recipe to $filehandle or return commands array. Based on bcftools 1.13. ## Returns : @commands ## Arguments: $filehandle => Filehandle to write to ## : $exclude => Exclude sites for which the expression is true @@ -587,7 +598,7 @@ sub bcftools_filter { ## : $include => Include only sites for which the expression is true ## : $indel_gap => Filter clusters of indels separated by or fewer base pairs allowing only one to pass ## : $infile_path => Infile paths -## : $outfile_path => Outfile path to write to +## : $outfile_path => Outfile path to write to/view ## : $output_type => 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v] ## : $regions_ref => Regions to process {REF} ## : $samples_file_path => File of samples to annotate @@ -597,6 +608,8 @@ sub bcftools_filter { ## : $stdoutfile_path => Stdoutfile path ## : $stderrfile_path => Stderrfile path ## : $stderrfile_path_append => Append stderr info to file path +## : $targets => Select target. Logical complement can be requested with "^" prefix +## : $threads => Extra compression threds in addition to main thread my ($arg_href) = @_; @@ -616,6 +629,8 @@ sub bcftools_filter { my $stdoutfile_path; my $stderrfile_path; my $stderrfile_path_append; + my $targets; + my $threads; ## Default(s) my $output_type; @@ -690,6 +705,15 @@ sub bcftools_filter { store => \$stdoutfile_path, strict_type => 1, }, + targets => { + store => \$targets, + strict_type => 1, + }, + threads => { + allow => [ undef, qr{\A \d+ \z}xms ], + store => \$threads, + strict_type => 1, + }, }; check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; @@ -706,6 +730,8 @@ sub bcftools_filter { regions_ref => $regions_ref, samples_file_path => $samples_file_path, samples_ref => $samples_ref, + targets => $targets, + threads => $threads, } ); @@ -975,7 +1001,7 @@ sub bcftools_merge { sub bcftools_mpileup { -## Function : Perl wrapper for writing bcftools mpileup recipe to $filehandle. Based on bcftools 1.6 (using htslib 1.6). +## Function : Perl wrapper for writing bcftools mpileup recipe to $filehandle. Based on bcftools 1.13 (using htslib 1.13). ## Returns : @commands ## : $adjust_mq => Adjust mapping quality ## : $filehandle => Sbatch filehandle to write to @@ -991,6 +1017,8 @@ sub bcftools_mpileup { ## : $stderrfile_path => Stderrfile path ## : $stderrfile_path_append => Stderrfile path append ## : $stdoutfile_path => Stdoutfile file path to write to +## : $targets => Select target. Logical complement can be requested with "^" prefix +## : $threads => Extra compression threds in addition to main thread my ($arg_href) = @_; @@ -1006,6 +1034,8 @@ sub bcftools_mpileup { my $stderrfile_path; my $stderrfile_path_append; my $stdoutfile_path; + my $targets; + my $threads; ## Default(s) my $adjust_mq; @@ -1066,6 +1096,15 @@ sub bcftools_mpileup { stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, stderrfile_path_append => { store => \$stderrfile_path_append, strict_type => 1, }, stdoutfile_path => { store => \$stdoutfile_path, strict_type => 1, }, + targets => { + store => $targets, + strict_type => 1, + }, + threads => { + allow => [ undef, qr{\A \d+ \z}xms ], + store => \$threads, + strict_type => 1, + }, }; check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; @@ -1082,6 +1121,8 @@ sub bcftools_mpileup { output_type => $output_type, samples_file_path => $samples_file_path, samples_ref => $samples_ref, + targets => $targets, + threads => $threads, } ); @@ -2058,6 +2099,7 @@ sub bcftools_view { ## : $stderrfile_path => Stderr file path to write to ## : $stderrfile_path_append => Append stderr info to file path ## : $stdoutfile_path => Stdoutfile file path to write to +## : $targets => Select target. Logical complement can be requested with "^" prefix ## : $threads => Number of threads to use ## : $types => Comma separated variant types to include (snps|indels|mnps|other), based on based on REF,ALT @@ -2084,6 +2126,7 @@ sub bcftools_view { my $stderrfile_path; my $stderrfile_path_append; my $stdoutfile_path; + my $targets; my $threads; my $types; @@ -2143,7 +2186,11 @@ sub bcftools_view { stderrfile_path => { store => \$stderrfile_path, strict_type => 1, }, stderrfile_path_append => { store => \$stderrfile_path_append, strict_type => 1, }, stdoutfile_path => { store => \$stdoutfile_path, strict_type => 1, }, - threads => { + targets => { + store => \$targets, + strict_type => 1, + }, + threads => { allow => qr/ \A \d+ \z /xms, store => \$threads, strict_type => 1, @@ -2168,6 +2215,7 @@ sub bcftools_view { regions_ref => $regions_ref, samples_file_path => $samples_file_path, samples_ref => $samples_ref, + targets => $targets, } ); diff --git a/lib/MIP/Recipes/Analysis/Sv_annotate.pm b/lib/MIP/Recipes/Analysis/Sv_annotate.pm index 8f32b7514..49a86f7cc 100644 --- a/lib/MIP/Recipes/Analysis/Sv_annotate.pm +++ b/lib/MIP/Recipes/Analysis/Sv_annotate.pm @@ -5,7 +5,8 @@ use Carp; use charnames qw{ :full :short }; use English qw{ -no_match_vars }; use File::Basename qw{ dirname }; -use File::Spec::Functions qw{ catfile splitpath }; +use File::Spec::Functions qw{ catfile devnull splitpath }; +use List::Util qw{ first }; use open qw{ :encoding(UTF-8) :std }; use Params::Check qw{ allow check last_error }; use utf8; @@ -128,7 +129,7 @@ sub analysis_sv_annotate { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; use MIP::File_info qw{ get_io_files parse_io_outfiles }; - use MIP::Program::Gnu::Coreutils qw(gnu_mv); + use MIP::Program::Gnu::Coreutils qw( gnu_cat gnu_mv gnu_tee ); use MIP::Io::Read qw{ read_from_file }; use MIP::Processmanagement::Processes qw{ submit_recipe }; use MIP::Program::Bcftools @@ -162,8 +163,7 @@ sub analysis_sv_annotate { my $infile_suffix = $io{in}{file_suffix}; my $infile_path = $infile_path_prefix . $infile_suffix; - my $consensus_analysis_type = $parameter_href->{cache}{consensus_analysis_type}; - my $sequence_dict_file = catfile( $reference_dir, + my $sequence_dict_file = catfile( $reference_dir, $file_info_href->{human_genome_reference_name_prefix} . $DOT . q{dict} ); my %recipe = parse_recipe_prerequisites( { @@ -212,11 +212,6 @@ sub analysis_sv_annotate { } ); - ## Split to enable submission to &sample_info_qc later - my ( $volume, $directory, $stderr_file ) = - splitpath( $recipe_info_path . $DOT . q{stderr.txt} ); - my $stderrfile_path = $recipe_info_path . $DOT . q{stderr.txt}; - ### SHELL: ## Alternative file tag @@ -304,124 +299,111 @@ sub analysis_sv_annotate { say {$filehandle} $NEWLINE; } - ## Alternative file tag - my $outfile_alt_file_tag = $alt_file_tag . $UNDERSCORE . q{sorted}; - - ## Writes sbatch code to supplied filehandle to sort variants in vcf format + ## Always sort the vcf, garantees an output file that's in sync with the IO hash + my $sort_infile_path = + $alt_file_tag eq $EMPTY_STR + ? $infile_path + : $outfile_path_prefix . $alt_file_tag . $outfile_suffix; + my $sort_outfile_path = $outfile_path; sort_vcf( { active_parameter_href => $active_parameter_href, filehandle => $filehandle, - infile_paths_ref => [ $outfile_path_prefix . $alt_file_tag . $outfile_suffix ], - outfile => $outfile_path_prefix . $outfile_alt_file_tag . $outfile_suffix, + infile_paths_ref => [$sort_infile_path], + outfile => $sort_outfile_path, sequence_dict_file => $sequence_dict_file, } ); say {$filehandle} $NEWLINE; - $alt_file_tag = $outfile_alt_file_tag; - - ## Remove FILTER ne PASS and on frequency + ## Remove FILTER ne PASS and filter on frequency if ( $active_parameter_href->{sv_frequency_filter} ) { - ## Build the exclude filter command - my $exclude_filter = _build_bcftools_filter( - { - annotations_ref => \@svdb_query_annotations, - fqf_bcftools_filter_threshold => - $active_parameter_href->{fqf_bcftools_filter_threshold}, - } - ); - - say {$filehandle} q{## Remove FILTER ne PASS and frequency over threshold}; + say {$filehandle} q{## Remove FILTER ne PASS, annotate and remove common variants}; bcftools_view( { apply_filters_ref => [qw{ PASS }], - exclude => $exclude_filter, filehandle => $filehandle, - infile_path => $outfile_path_prefix . $alt_file_tag . $outfile_suffix, - outfile_path => $outfile_path_prefix - . $alt_file_tag - . $UNDERSCORE . q{filt} - . $outfile_suffix, - output_type => q{v}, + infile_path => $sort_outfile_path, + output_type => q{v}, } ); - say {$filehandle} $NEWLINE; - - ## Update file tag - $alt_file_tag .= $UNDERSCORE . q{filt}; - } - - ## Remove common variants - if ( $active_parameter_href->{sv_frequency_filter} ) { + print {$filehandle} $PIPE . $SPACE; - say {$filehandle} q{## Remove common variants}; vcfanno( { - filehandle => $filehandle, - infile_path => $outfile_path_prefix . $alt_file_tag . $outfile_suffix, - luafile_path => $active_parameter_href->{vcfanno_functions}, - stderrfile_path_append => $stderrfile_path, - toml_configfile_path => $active_parameter_href->{sv_vcfanno_config}, + filehandle => $filehandle, + infile_path => catfile( dirname( devnull() ), q{stdout} ), + toml_configfile_path => $active_parameter_href->{sv_vcfanno_config}, } ); print {$filehandle} $PIPE . $SPACE; - ## Update file tag - $alt_file_tag .= $UNDERSCORE . q{bcftools_filter}; - - my %vcfanno_config = read_from_file( + ## Gnu tee to split into one unfiltered vcf and one filtered + $alt_file_tag .= $UNDERSCORE . q{anno}; + my $anno_outfile_path = $outfile_path_prefix . $alt_file_tag . $outfile_suffix; + gnu_tee( { - format => q{toml}, - path => $active_parameter_href->{sv_vcfanno_config}, + filehandle => $filehandle, + outfile_paths_ref => [$anno_outfile_path], } ); + print {$filehandle} $PIPE . $SPACE; - ## Store vcfanno annotations - my @vcfanno_annotations; - - ANNOTATION: - foreach my $annotation_href ( @{ $vcfanno_config{annotation} } ) { - - push @vcfanno_annotations, @{ $annotation_href->{names} }; - } ## Build the exclude filter command my $exclude_filter = _build_bcftools_filter( { - annotations_ref => \@vcfanno_annotations, + annotations_ref => \@svdb_query_annotations, + fqf_annotations_ref => $active_parameter_href->{sv_fqa_filters}, fqf_bcftools_filter_threshold => $active_parameter_href->{fqf_bcftools_filter_threshold}, + vcfanno_file_toml => $active_parameter_href->{sv_vcfanno_config}, } ); + ## Don't filter MT varaints + my $mt_contig = first { $_ =~ / MT | chrM /xms } @{ $file_info_href->{contigs} }; + my $target_exp = $mt_contig ? q{^} . $mt_contig : undef; + + ## Update file tag + $alt_file_tag .= $UNDERSCORE . q{filter}; + + ## Outfile path depends on wheter the MT contig is part of the analysis + my $filtered_anno_outfile_path = + $mt_contig ? $outfile_path_prefix . $alt_file_tag . $outfile_suffix : $outfile_path; bcftools_filter( { - exclude => $exclude_filter, - filehandle => $filehandle, - infile_path => $DASH, - outfile_path => $outfile_path_prefix . $alt_file_tag . $outfile_suffix, - output_type => q{v}, - stderrfile_path_append => $stderrfile_path, + exclude => $exclude_filter, + filehandle => $filehandle, + infile_path => $DASH, + outfile_path => $filtered_anno_outfile_path, + output_type => q{v}, + targets => $target_exp, } ); say {$filehandle} $NEWLINE; - } - ## Then we have something to rename - if ( $alt_file_tag ne $EMPTY_STR ) { + if ($mt_contig) { - ## Writes sbatch code to supplied filehandle to sort variants in vcf format - sort_vcf( - { - active_parameter_href => $active_parameter_href, - filehandle => $filehandle, - infile_paths_ref => [ $outfile_path_prefix . $alt_file_tag . $outfile_suffix ], - outfile => $outfile_path, - sequence_dict_file => $sequence_dict_file, - } - ); - say {$filehandle} $NEWLINE; + ## Concatenate filtered varaint file with unfiltered MT variants + my @mt_variants_cmds = bcftools_view( + { + infile_path => $anno_outfile_path, + no_header => 1, + targets => $mt_contig, + } + ); + + say {$filehandle} q{## Concatenate filtered varaints with unfiltered MT variants}; + my $stream_mt_variant_cmd = q{<(} . join( $SPACE, @mt_variants_cmds ) . q{)}; + gnu_cat( + { + filehandle => $filehandle, + infile_paths_ref => [ $filtered_anno_outfile_path, $stream_mt_variant_cmd ], + stdoutfile_path => $outfile_path, + } + ); + } } close $filehandle or $log->logcroak(q{Could not close filehandle}); @@ -460,33 +442,67 @@ sub _build_bcftools_filter { ## Function : Build the exclude filter command ## Returns : -## Arguments: $fqf_bcftools_filter_threshold => Exclude variants with frequency above filter threshold -## : $annotations_ref => Annotations to use in filtering +## Arguments: $annotations_ref => Annotations to use in filtering +## : $fqf_annotaions_ref => Frequency annotation to use in filtering +## : $fqf_bcftools_filter_threshold => Exclude variants with frequency above filter threshold +## : $vcfanno_file_toml => Toml config file my ($arg_href) = @_; ## Flatten argument(s) - my $fqf_bcftools_filter_threshold; my $annotations_ref; + my $fqf_annotations_ref; + my $fqf_bcftools_filter_threshold; + my $vcfanno_file_toml; my $tmpl = { + annotations_ref => { + default => [], + defined => 1, + required => 1, + store => \$annotations_ref, + strict_type => 1, + }, + fqf_annotations_ref => { + default => [], + defined => 1, + required => 1, + store => \$fqf_annotations_ref, + strict_type => 1, + }, fqf_bcftools_filter_threshold => { defined => 1, required => 1, store => \$fqf_bcftools_filter_threshold, strict_type => 1, }, - annotations_ref => { - default => [], + vcfanno_file_toml => { defined => 1, required => 1, - store => \$annotations_ref, + store => \$vcfanno_file_toml, strict_type => 1, }, }; check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + use Array::Utils qw{ intersect }; + + my %vcfanno_config = read_from_file( + { + format => q{toml}, + path => $vcfanno_file_toml, + } + ); + + ANNOTATION: + foreach my $annotation_href ( @{ $vcfanno_config{annotation} } ) { + + push @{$annotations_ref}, @{ $annotation_href->{names} }; + } + + @{$fqf_annotations_ref} = intersect( @{$fqf_annotations_ref}, @{$annotations_ref} ); + my $exclude_filter; my $threshold = $SPACE . q{>} . $SPACE . $fqf_bcftools_filter_threshold . $SPACE; diff --git a/t/analysis_sv_annotate.t b/t/analysis_sv_annotate.t index 1660fd975..feaaf7896 100644 --- a/t/analysis_sv_annotate.t +++ b/t/analysis_sv_annotate.t @@ -69,13 +69,13 @@ $active_parameter{recipe_time}{$recipe_name} = 1; my $case_id = $active_parameter{case_id}; $active_parameter{sv_frequency_filter} = 1; $active_parameter{fqf_bcftools_filter_threshold} = $FREQ_CUTOFF; -$active_parameter{sv_vcfanno_config} = catfile( $Bin, - qw{ data references grch37_frequency_vcfanno_filter_config_-v1.0-.toml } ); +$active_parameter{sv_vcfanno_config} = + catfile( $Bin, qw{ data references grch37_frequency_vcfanno_filter_config_-v1.0-.toml } ); $active_parameter{sv_svdb_query} = 1; $active_parameter{sv_svdb_query_db_files} = { a_file => -q{a_file|out_frequency_tag|out_allele_coiunt_tag|in_frequency_tag|in_allele_coiunt_tag|1}, - }; + q{a_file|out_frequency_tag|out_allele_coiunt_tag|in_frequency_tag|in_allele_coiunt_tag|1}, }; +@{ $active_parameter{sv_fqa_filters} } = (qw{ out_frequency_tag GNOMADAF }); my %file_info = test_mip_hashes( { @@ -94,11 +94,11 @@ my %parameter = test_mip_hashes( test_add_io_for_recipe( { - file_info_href => \%file_info, - id => $case_id, - parameter_href => \%parameter, - recipe_name => $recipe_name, - step => q{vcf}, + file_info_href => \%file_info, + id => $case_id, + parameter_href => \%parameter, + recipe_name => $recipe_name, + step => q{vcf}, } ); diff --git a/t/bcftools_base.t b/t/bcftools_base.t index 0fa5bf8fd..7dffda333 100644 --- a/t/bcftools_base.t +++ b/t/bcftools_base.t @@ -99,6 +99,10 @@ my %specific_argument = ( inputs_ref => [qw{ ^sample_1 sample_2 }], expected_output => q{--samples ^sample_1,sample_2}, }, + targets => { + input => q{^MT}, + expected_output => q{--targets} . $SPACE . q{^MT}, + }, threads => { input => $NR_THREADS_TO_USE, expected_output => q{--threads} . $SPACE . $NR_THREADS_TO_USE, diff --git a/t/bcftools_view.t b/t/bcftools_view.t index 94568d2ce..cd33e8ae0 100644 --- a/t/bcftools_view.t +++ b/t/bcftools_view.t @@ -23,16 +23,13 @@ use lib catdir( dirname($Bin), q{lib} ); use MIP::Constants qw{ $SPACE $COMMA }; use MIP::Test::Commands qw{ test_function }; - BEGIN { use MIP::Test::Fixtures qw{ test_import }; ### Check all internal dependency modules and imports ## Modules with import - my %perl_module = ( - q{MIP::Program::Bcftools} => [qw{ bcftools_view }], -); + my %perl_module = ( q{MIP::Program::Bcftools} => [qw{ bcftools_view }], ); test_import( { perl_module_href => \%perl_module, } ); } @@ -132,6 +129,10 @@ my %specific_argument = ( input => q{snps,indel}, expected_output => q{--types snps,indel}, }, + targets => { + input => q{MT}, + expected_output => q{--targets MT}, + }, ); ## Coderef - enables generalized use of generate call diff --git a/templates/grch38_mip_rd_dna_config.yaml b/templates/grch38_mip_rd_dna_config.yaml index f5134c3ed..bff0d12d6 100755 --- a/templates/grch38_mip_rd_dna_config.yaml +++ b/templates/grch38_mip_rd_dna_config.yaml @@ -66,6 +66,11 @@ vcfanno_config: grch38_vcfanno_config_-v0.2-.toml ### Analysis ## Programs ## Parameters +sv_fqa_annotations: + - GNOMADAF + - GNOMADAF_POPMAX + - SWEGENAF + - swegen gatk_path: /opt/conda/opt/gatk-3.8 qccollect_sampleinfo_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_qc_sample_info.yaml picardtools_path: /usr/picard/ diff --git a/templates/mip_rd_dna_config.yaml b/templates/mip_rd_dna_config.yaml index 96a0b9f79..698ec64b9 100755 --- a/templates/mip_rd_dna_config.yaml +++ b/templates/mip_rd_dna_config.yaml @@ -47,6 +47,11 @@ fqf_annotations: - GNOMADAF - GNOMADAF_popmax - SWEGENAF +sv_fqa_annotations: + - GNOMADAF + - GNOMADAF_POPMAX + - gnomad_sv + - clinical_genomics_loqus gatk_path: /usr picardtools_path: /usr/picard qccollect_sampleinfo_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_qc_sample_info.yaml diff --git a/templates/mip_rd_dna_vcf_rerun_config.yaml b/templates/mip_rd_dna_vcf_rerun_config.yaml index 791a86aae..7b112e790 100644 --- a/templates/mip_rd_dna_vcf_rerun_config.yaml +++ b/templates/mip_rd_dna_vcf_rerun_config.yaml @@ -44,6 +44,11 @@ fqf_annotations: - GNOMADAF - GNOMADAF_popmax - SWEGENAF +sv_fqa_annotations: + - GNOMADAF + - GNOMADAF_POPMAX + - SWEGENAF + - swegen picardtools_path: /usr/picard sv_genmod_models_case_type: cmms sv_vep_plugin: From 83214a24ad68c67e35e2b366e2f01f2939a68b68 Mon Sep 17 00:00:00 2001 From: jemten Date: Sat, 18 Sep 2021 17:35:55 +0200 Subject: [PATCH 065/116] adds processes to vcfanno --- definitions/rd_dna_parameters.yaml | 2 +- lib/MIP/Program/Vcfanno.pm | 13 ++++++++++++- lib/MIP/Recipes/Analysis/Sv_annotate.pm | 3 ++- t/vcfanno.t | 5 ++++- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index 02c36f242..b71482125 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -192,7 +192,7 @@ recipe_core_number: samtools_subsample_mt: 1 smncopynumbercaller: 4 star_caller: 4 - sv_annotate: 1 + sv_annotate: 2 sv_combinevariantcallsets: 1 sv_rankvariant: 16 sv_reformat: 1 diff --git a/lib/MIP/Program/Vcfanno.pm b/lib/MIP/Program/Vcfanno.pm index 70e81a816..2aca421bc 100644 --- a/lib/MIP/Program/Vcfanno.pm +++ b/lib/MIP/Program/Vcfanno.pm @@ -36,6 +36,7 @@ sub vcfanno { ## : $filehandle => Filehandle to write to ## : $infile_path => Infile path to read from ## : $luafile_path => Optional path to a file containing custom javascript functions to be used as ops +## : $processes => Processes to use ## : $stderrfile_path => Stderrfile path ## : $stderrfile_path_append => Append stderr info to file path ## : $stdoutfile_path => Stdoutfile path @@ -48,6 +49,7 @@ sub vcfanno { my $filehandle; my $infile_path; my $luafile_path; + my $processes; my $stderrfile_path; my $stderrfile_path_append; my $stdoutfile_path; @@ -61,8 +63,13 @@ sub vcfanno { strict_type => 1, }, filehandle => { store => \$filehandle, }, - infile_path => { store => \$infile_path, strict_type => 1, }, + infile_path => { store => \$infile_path, strict_type => 1, }, luafile_path => { store => \$luafile_path, strict_type => 1, }, + processes => { + allow => qr/\A \d+ \z/xms, + store => \$processes, + strict_type => 1, + }, stderrfile_path => { store => \$stderrfile_path, strict_type => 1, @@ -95,6 +102,10 @@ sub vcfanno { push @commands, q{-ends}; } + if ($processes) { + + push @commands, q{-p} . $SPACE . $processes; + } push @commands, $toml_configfile_path; diff --git a/lib/MIP/Recipes/Analysis/Sv_annotate.pm b/lib/MIP/Recipes/Analysis/Sv_annotate.pm index 49a86f7cc..364401f21 100644 --- a/lib/MIP/Recipes/Analysis/Sv_annotate.pm +++ b/lib/MIP/Recipes/Analysis/Sv_annotate.pm @@ -333,7 +333,8 @@ sub analysis_sv_annotate { vcfanno( { filehandle => $filehandle, - infile_path => catfile( dirname( devnull() ), q{stdout} ), + infile_path => catfile( dirname( devnull() ), q{stdin} ), + processes => $recipe{core_number}, toml_configfile_path => $active_parameter_href->{sv_vcfanno_config}, } ); diff --git a/t/vcfanno.t b/t/vcfanno.t index ec2f7e8e3..280b4f977 100644 --- a/t/vcfanno.t +++ b/t/vcfanno.t @@ -23,7 +23,6 @@ use lib catdir( dirname($Bin), q{lib} ); use MIP::Constants qw{ $COMMA $SPACE }; use MIP::Test::Commands qw{ test_function }; - BEGIN { use MIP::Test::Fixtures qw{ test_import }; @@ -92,6 +91,10 @@ my %specific_argument = ( input => catfile(qw{ a test luafile }), expected_output => q{-lua} . $SPACE . catfile(qw{ a test luafile }), }, + processes => { + input => 2, + expected_output => q{-p 2}, + }, toml_configfile_path => { input => catfile(qw{ a test tomlfile }), expected_output => catfile(qw{ a test tomlfile }), From 47a4de9695f9dc443615a75457d946f692ef2c97 Mon Sep 17 00:00:00 2001 From: jemten Date: Sun, 19 Sep 2021 10:19:50 +0200 Subject: [PATCH 066/116] use correct sv annotation list --- definitions/rd_dna_parameters.yaml | 4 ++-- lib/MIP/Recipes/Analysis/Sv_annotate.pm | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index b71482125..5316a9ae9 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -911,7 +911,7 @@ sv_fqa_annotations: data_type: ARRAY default: - GNOMADAF - - GNOMADAF_POPMAX + - GNOMADAF_popmax type: recipe_argument sv_fqa_filters: associated_recipe: @@ -919,7 +919,7 @@ sv_fqa_filters: data_type: ARRAY default: - GNOMADAF - - GNOMADAF_POPMAX + - GNOMADAF_popmax - gnomad_sv - clinical_genomics_loqus type: recipe_argument diff --git a/lib/MIP/Recipes/Analysis/Sv_annotate.pm b/lib/MIP/Recipes/Analysis/Sv_annotate.pm index 364401f21..615ef36a1 100644 --- a/lib/MIP/Recipes/Analysis/Sv_annotate.pm +++ b/lib/MIP/Recipes/Analysis/Sv_annotate.pm @@ -510,7 +510,7 @@ sub _build_bcftools_filter { $exclude_filter = $DOUBLE_QUOTE . q{INFO/} - . join( $threshold . $PIPE . $SPACE . q{INFO/}, @{$annotations_ref} ) + . join( $threshold . $PIPE . $SPACE . q{INFO/}, @{$fqf_annotations_ref} ) . $threshold . $DOUBLE_QUOTE; return $exclude_filter; From 52e7eaf82ed943a2e01286727f01dee4c6d3b679 Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 20 Sep 2021 14:55:05 +0200 Subject: [PATCH 067/116] changed varaible name to sv_fqa_vcfanno_filters --- definitions/dragen_rd_dna_parameters.yaml | 12 +----------- definitions/rd_dna_parameters.yaml | 12 +----------- definitions/rd_dna_vcf_rerun_parameters.yaml | 12 +----------- lib/MIP/Cli/Mip/Analyse/Dragen_rd_dna.pm | 12 ++---------- lib/MIP/Cli/Mip/Analyse/Rd_dna.pm | 10 +--------- lib/MIP/Cli/Mip/Analyse/Rd_dna_vcf_rerun.pm | 10 +--------- lib/MIP/Recipes/Analysis/Sv_annotate.pm | 13 ++++++++----- t/analysis_sv_annotate.t | 2 +- t/mip_analysis.test | 2 +- templates/grch38_mip_rd_dna_config.yaml | 2 +- templates/mip_rd_dna_config.yaml | 6 +++--- templates/mip_rd_dna_vcf_rerun_config.yaml | 2 +- 12 files changed, 22 insertions(+), 73 deletions(-) diff --git a/definitions/dragen_rd_dna_parameters.yaml b/definitions/dragen_rd_dna_parameters.yaml index 42456a0ef..bbbdb425d 100755 --- a/definitions/dragen_rd_dna_parameters.yaml +++ b/definitions/dragen_rd_dna_parameters.yaml @@ -194,7 +194,7 @@ sv_annotate: - bcftools - svdb type: recipe -sv_fqa_annotations: +sv_fqa_vcfanno_filters: associated_recipe: - sv_annotate data_type: ARRAY @@ -202,16 +202,6 @@ sv_fqa_annotations: - GNOMADAF - GNOMADAF_POPMAX type: recipe_argument -sv_fqa_filters: - associated_recipe: - - sv_annotate - data_type: ARRAY - default: - - GNOMADAF - - GNOMADAF_POPMAX - - gnomad_sv - - clinical_genomics_loqus - type: recipe_argument sv_bcftools_view_filter: associated_recipe: - sv_annotate diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index 5316a9ae9..070dca48b 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -905,7 +905,7 @@ sv_annotate: - bcftools - svdb type: recipe -sv_fqa_annotations: +sv_fqa_vcfanno_filters: associated_recipe: - sv_annotate data_type: ARRAY @@ -913,16 +913,6 @@ sv_fqa_annotations: - GNOMADAF - GNOMADAF_popmax type: recipe_argument -sv_fqa_filters: - associated_recipe: - - sv_annotate - data_type: ARRAY - default: - - GNOMADAF - - GNOMADAF_popmax - - gnomad_sv - - clinical_genomics_loqus - type: recipe_argument sv_bcftools_view_filter: associated_recipe: - sv_annotate diff --git a/definitions/rd_dna_vcf_rerun_parameters.yaml b/definitions/rd_dna_vcf_rerun_parameters.yaml index d5b21958c..f2c64fb1b 100755 --- a/definitions/rd_dna_vcf_rerun_parameters.yaml +++ b/definitions/rd_dna_vcf_rerun_parameters.yaml @@ -162,7 +162,7 @@ sv_annotate: - bcftools - svdb type: recipe -sv_fqa_annotations: +sv_fqa_vcfanno_filters: associated_recipe: - sv_annotate data_type: ARRAY @@ -170,16 +170,6 @@ sv_fqa_annotations: - GNOMADAF - GNOMADAF_POPMAX type: recipe_argument -sv_fqa_filters: - associated_recipe: - - sv_annotate - data_type: ARRAY - default: - - GNOMADAF - - GNOMADAF_POPMAX - - gnomad_sv - - clinical_genomics_loqus - type: recipe_argument sv_bcftools_view_filter: associated_recipe: - sv_annotate diff --git a/lib/MIP/Cli/Mip/Analyse/Dragen_rd_dna.pm b/lib/MIP/Cli/Mip/Analyse/Dragen_rd_dna.pm index 4feaa58da..2f8b20303 100644 --- a/lib/MIP/Cli/Mip/Analyse/Dragen_rd_dna.pm +++ b/lib/MIP/Cli/Mip/Analyse/Dragen_rd_dna.pm @@ -257,16 +257,8 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v ); option( - q{sv_fqa_annotations} => ( - documentation => q{Frequency annotations to use}, - is => q{rw}, - isa => ArrayRef, - ) - ); - - option( - q{sv_fqa_filters} => ( - documentation => q{Frequency annotations to use when filtering}, + q{sv_fqa_vcfanno_filters} => ( + documentation => q{Frequency annotations to use when filtering }, is => q{rw}, isa => ArrayRef, ) diff --git a/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm b/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm index 2d25b7104..876ca5778 100644 --- a/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm +++ b/lib/MIP/Cli/Mip/Analyse/Rd_dna.pm @@ -798,15 +798,7 @@ q{Default: grch37_dbsnp_-138-.vcf, grch37_1000g_indels_-phase1-.vcf, grch37_mill ); option( - q{sv_fqa_annotations} => ( - documentation => q{Frequency annotations to use }, - is => q{rw}, - isa => ArrayRef, - ) - ); - - option( - q{sv_fqa_filter} => ( + q{sv_fqa_vcfanno_filters} => ( documentation => q{Frequency annotations to use when filtering }, is => q{rw}, isa => ArrayRef, diff --git a/lib/MIP/Cli/Mip/Analyse/Rd_dna_vcf_rerun.pm b/lib/MIP/Cli/Mip/Analyse/Rd_dna_vcf_rerun.pm index 3a01b1fea..4d1e8b6b7 100644 --- a/lib/MIP/Cli/Mip/Analyse/Rd_dna_vcf_rerun.pm +++ b/lib/MIP/Cli/Mip/Analyse/Rd_dna_vcf_rerun.pm @@ -212,15 +212,7 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v ); option( - q{sv_fqa_annotations} => ( - documentation => q{Frequency annotations to use}, - is => q{rw}, - isa => ArrayRef, - ) - ); - - option( - q{sv_fqa_filter} => ( + q{sv_fqa_vcfanno_filters} => ( documentation => q{Frequency annotations to use when filtering}, is => q{rw}, isa => ArrayRef, diff --git a/lib/MIP/Recipes/Analysis/Sv_annotate.pm b/lib/MIP/Recipes/Analysis/Sv_annotate.pm index 615ef36a1..29ae5d595 100644 --- a/lib/MIP/Recipes/Analysis/Sv_annotate.pm +++ b/lib/MIP/Recipes/Analysis/Sv_annotate.pm @@ -132,8 +132,7 @@ sub analysis_sv_annotate { use MIP::Program::Gnu::Coreutils qw( gnu_cat gnu_mv gnu_tee ); use MIP::Io::Read qw{ read_from_file }; use MIP::Processmanagement::Processes qw{ submit_recipe }; - use MIP::Program::Bcftools - qw{ bcftools_annotate bcftools_filter bcftools_view bcftools_view_and_index_vcf }; + use MIP::Program::Bcftools qw{ bcftools_filter bcftools_view }; use MIP::Program::Genmod qw{ genmod_annotate }; use MIP::Program::Picardtools qw{ sort_vcf }; use MIP::Program::Svdb qw{ svdb_query }; @@ -355,7 +354,7 @@ sub analysis_sv_annotate { my $exclude_filter = _build_bcftools_filter( { annotations_ref => \@svdb_query_annotations, - fqf_annotations_ref => $active_parameter_href->{sv_fqa_filters}, + fqf_annotations_ref => $active_parameter_href->{sv_fqa_vcfanno_filters}, fqf_bcftools_filter_threshold => $active_parameter_href->{fqf_bcftools_filter_threshold}, vcfanno_file_toml => $active_parameter_href->{sv_vcfanno_config}, @@ -488,6 +487,7 @@ sub _build_bcftools_filter { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; use Array::Utils qw{ intersect }; + use List::MoreUtils qw{ uniq }; my %vcfanno_config = read_from_file( { @@ -496,13 +496,16 @@ sub _build_bcftools_filter { } ); + my @vcfanno_annotations; + ANNOTATION: foreach my $annotation_href ( @{ $vcfanno_config{annotation} } ) { - push @{$annotations_ref}, @{ $annotation_href->{names} }; + push @vcfanno_annotations, @{ $annotation_href->{names} }; } - @{$fqf_annotations_ref} = intersect( @{$fqf_annotations_ref}, @{$annotations_ref} ); + @{$fqf_annotations_ref} = intersect( @{$fqf_annotations_ref}, @vcfanno_annotations ); + @{$fqf_annotations_ref} = uniq( @{$fqf_annotations_ref}, @{$annotations_ref} ); my $exclude_filter; my $threshold = $SPACE . q{>} . $SPACE . $fqf_bcftools_filter_threshold . $SPACE; diff --git a/t/analysis_sv_annotate.t b/t/analysis_sv_annotate.t index feaaf7896..14294bd47 100644 --- a/t/analysis_sv_annotate.t +++ b/t/analysis_sv_annotate.t @@ -75,7 +75,7 @@ $active_parameter{sv_svdb_query} = 1; $active_parameter{sv_svdb_query_db_files} = { a_file => q{a_file|out_frequency_tag|out_allele_coiunt_tag|in_frequency_tag|in_allele_coiunt_tag|1}, }; -@{ $active_parameter{sv_fqa_filters} } = (qw{ out_frequency_tag GNOMADAF }); +@{ $active_parameter{sv_fqa_vcfanno_filters} } = (qw{ out_frequency_tag GNOMADAF }); my %file_info = test_mip_hashes( { diff --git a/t/mip_analysis.test b/t/mip_analysis.test index ad0ae7f42..bc6f93e3c 100755 --- a/t/mip_analysis.test +++ b/t/mip_analysis.test @@ -711,7 +711,7 @@ sub read_sv_infile_vcf { ## Test Frequency filter _test_frequency_filter_in_vcf_header( { - fqf_annotations_ref => $active_parameter_href->{sv_fqa_annotations}, + fqf_annotations_ref => $active_parameter_href->{sv_fqa_vcfanno_filters}, recipe_mode => $active_parameter_href->{sv_annotate}, toml_file_path => $active_parameter_href->{sv_vcfanno_config}, vcf_header_href => \%vcf_header, diff --git a/templates/grch38_mip_rd_dna_config.yaml b/templates/grch38_mip_rd_dna_config.yaml index bff0d12d6..b63d92f9f 100755 --- a/templates/grch38_mip_rd_dna_config.yaml +++ b/templates/grch38_mip_rd_dna_config.yaml @@ -66,7 +66,7 @@ vcfanno_config: grch38_vcfanno_config_-v0.2-.toml ### Analysis ## Programs ## Parameters -sv_fqa_annotations: +sv_fqa_vcfanno_filters: - GNOMADAF - GNOMADAF_POPMAX - SWEGENAF diff --git a/templates/mip_rd_dna_config.yaml b/templates/mip_rd_dna_config.yaml index 698ec64b9..7faec99c6 100755 --- a/templates/mip_rd_dna_config.yaml +++ b/templates/mip_rd_dna_config.yaml @@ -47,11 +47,11 @@ fqf_annotations: - GNOMADAF - GNOMADAF_popmax - SWEGENAF -sv_fqa_annotations: +sv_fqa_vcfanno_filters: - GNOMADAF - GNOMADAF_POPMAX - - gnomad_sv - - clinical_genomics_loqus + - gnomad_svAF + - clinical_genomics_loqusFrq gatk_path: /usr picardtools_path: /usr/picard qccollect_sampleinfo_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_qc_sample_info.yaml diff --git a/templates/mip_rd_dna_vcf_rerun_config.yaml b/templates/mip_rd_dna_vcf_rerun_config.yaml index 7b112e790..24c81f202 100644 --- a/templates/mip_rd_dna_vcf_rerun_config.yaml +++ b/templates/mip_rd_dna_vcf_rerun_config.yaml @@ -44,7 +44,7 @@ fqf_annotations: - GNOMADAF - GNOMADAF_popmax - SWEGENAF -sv_fqa_annotations: +sv_fqa_vcfanno_filters: - GNOMADAF - GNOMADAF_POPMAX - SWEGENAF From b04290795808bfbadb2677cbe649e13faf8eb020 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 21 Sep 2021 10:53:23 +0200 Subject: [PATCH 068/116] fixing tests --- t/check_vep_api_cache_versions.t | 8 ++++---- .../cache/Plugins/ExACpLI.pm | 0 .../cache/Plugins/Hello.txt | 0 .../cache/Plugins/LoFtool.pm | 0 .../cache/Plugins/LoFtool_scores.txt | 0 .../cache/Plugins/MaxEntScan.pm | 0 .../cache/Plugins/SpliceAI.pm | 0 .../cache/Plugins/dbNSFP.pm | 0 .../cache/Plugins/fordownload/Hello_world.txt | 0 .../cache/Plugins/human_ancestor.fa | 0 .../cache/homo_sapiens/104_GRCh37}/Hello.txt | 0 .../cache/homo_sapiens/95_GRCh37/Hello.txt | 0 .../cache/homo_sapiens/97_GRCh37/Hello.txt | 0 .../cache/homo_sapiens/99_GRCh37/Hello.txt | 0 .../homo_sapiens_merged/104_GRCh37}/Hello.txt | 0 .../cache/homo_sapiens_merged/95_GRCh37/Hello.txt | 0 .../cache/homo_sapiens_merged/97_GRCh37/Hello.txt | 0 .../cache2/homo_sapiens/95_GRCh37/Hello.txt | 0 .../cache2/homo_sapiens_merged/95_GRCh37/Hello.txt | 0 ...=> grch37_clinvar_reformated_-20210828-.vcf.gz} | 0 ...rch37_clinvar_reformated_-20210828-.vcf.gz.tbi} | 0 ...rch37_loqusdb_sv_variants_export-20210907-.vcf} | 0 ...=> grch38_clinvar_reformated_-20210828-.vcf.gz} | 0 ...rch38_clinvar_reformated_-20210828-.vcf.gz.tbi} | 0 templates/grch38_mip_rd_dna_config.yaml | 14 +++++++------- templates/mip_dragen_rd_dna_config.yaml | 14 +++++++------- templates/mip_rd_dna_config.yaml | 2 +- templates/mip_rd_dna_panel_config.yaml | 14 +++++++------- templates/mip_rd_dna_vcf_rerun_config.yaml | 14 +++++++------- 29 files changed, 33 insertions(+), 33 deletions(-) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/Plugins/ExACpLI.pm (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/Plugins/Hello.txt (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/Plugins/LoFtool.pm (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/Plugins/LoFtool_scores.txt (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/Plugins/MaxEntScan.pm (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/Plugins/SpliceAI.pm (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/Plugins/dbNSFP.pm (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/Plugins/fordownload/Hello_world.txt (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/Plugins/human_ancestor.fa (100%) rename t/data/references/{ensembl-tools-data-103/cache/homo_sapiens/103_GRCh37 => ensembl-tools-data-104/cache/homo_sapiens/104_GRCh37}/Hello.txt (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/homo_sapiens/95_GRCh37/Hello.txt (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/homo_sapiens/97_GRCh37/Hello.txt (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/homo_sapiens/99_GRCh37/Hello.txt (100%) rename t/data/references/{ensembl-tools-data-103/cache/homo_sapiens_merged/103_GRCh37 => ensembl-tools-data-104/cache/homo_sapiens_merged/104_GRCh37}/Hello.txt (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/homo_sapiens_merged/95_GRCh37/Hello.txt (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache/homo_sapiens_merged/97_GRCh37/Hello.txt (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache2/homo_sapiens/95_GRCh37/Hello.txt (100%) rename t/data/references/{ensembl-tools-data-103 => ensembl-tools-data-104}/cache2/homo_sapiens_merged/95_GRCh37/Hello.txt (100%) rename t/data/references/{grch37_clinvar_reformated_-20210415-.vcf.gz => grch37_clinvar_reformated_-20210828-.vcf.gz} (100%) rename t/data/references/{grch37_clinvar_reformated_-20210415-.vcf.gz.tbi => grch37_clinvar_reformated_-20210828-.vcf.gz.tbi} (100%) rename t/data/references/{grch37_loqusdb_sv_variants_export-20210416-.vcf => grch37_loqusdb_sv_variants_export-20210907-.vcf} (100%) rename t/data/references/{grch38_clinvar_reformated_-20210415-.vcf.gz => grch38_clinvar_reformated_-20210828-.vcf.gz} (100%) rename t/data/references/{grch38_clinvar_reformated_-20210415-.vcf.gz.tbi => grch38_clinvar_reformated_-20210828-.vcf.gz.tbi} (100%) diff --git a/t/check_vep_api_cache_versions.t b/t/check_vep_api_cache_versions.t index 0226a929d..a25bfc2e3 100644 --- a/t/check_vep_api_cache_versions.t +++ b/t/check_vep_api_cache_versions.t @@ -54,13 +54,13 @@ my $test_dir = File::Temp->newdir(); test_log( {} ); ## Given matching vep API and cache version -my $vep_directory_cache = catdir( $Bin, qw{ data references ensembl-tools-data-103 cache } ); +my $vep_directory_cache = catdir( $Bin, qw{ data references ensembl-tools-data-104 cache } ); my %process_return = ( buffers_ref => [], error_message => undef, stderrs_ref => [], - stdouts_ref => [qw{ 103 }], + stdouts_ref => [qw{ 104 }], success => 1, ); test_constants( @@ -70,7 +70,7 @@ test_constants( ); my $base_command = q{vep}; my $container_base_command = - q{singularity exec docker//docker.io/ensemblorg/ensembl-vep:release_103.1 vep}; + q{singularity exec docker//docker.io/ensemblorg/ensembl-vep:release_104.3 vep}; my %container_cmd = ( $base_command => $container_base_command, ); set_container_cmd( { container_cmd_href => \%container_cmd, } ); @@ -85,7 +85,7 @@ my $match = check_vep_api_cache_versions( ok( $match, q{Return on matching versions} ); ## Given non matching API and cache -$vep_directory_cache = catdir( $Bin, qw{ data references ensembl-tools-data-103 cache2 } ); +$vep_directory_cache = catdir( $Bin, qw{ data references ensembl-tools-data-104 cache2 } ); ## When comparing API and cache version trap { diff --git a/t/data/references/ensembl-tools-data-103/cache/Plugins/ExACpLI.pm b/t/data/references/ensembl-tools-data-104/cache/Plugins/ExACpLI.pm similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/Plugins/ExACpLI.pm rename to t/data/references/ensembl-tools-data-104/cache/Plugins/ExACpLI.pm diff --git a/t/data/references/ensembl-tools-data-103/cache/Plugins/Hello.txt b/t/data/references/ensembl-tools-data-104/cache/Plugins/Hello.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/Plugins/Hello.txt rename to t/data/references/ensembl-tools-data-104/cache/Plugins/Hello.txt diff --git a/t/data/references/ensembl-tools-data-103/cache/Plugins/LoFtool.pm b/t/data/references/ensembl-tools-data-104/cache/Plugins/LoFtool.pm similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/Plugins/LoFtool.pm rename to t/data/references/ensembl-tools-data-104/cache/Plugins/LoFtool.pm diff --git a/t/data/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt b/t/data/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt rename to t/data/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt diff --git a/t/data/references/ensembl-tools-data-103/cache/Plugins/MaxEntScan.pm b/t/data/references/ensembl-tools-data-104/cache/Plugins/MaxEntScan.pm similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/Plugins/MaxEntScan.pm rename to t/data/references/ensembl-tools-data-104/cache/Plugins/MaxEntScan.pm diff --git a/t/data/references/ensembl-tools-data-103/cache/Plugins/SpliceAI.pm b/t/data/references/ensembl-tools-data-104/cache/Plugins/SpliceAI.pm similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/Plugins/SpliceAI.pm rename to t/data/references/ensembl-tools-data-104/cache/Plugins/SpliceAI.pm diff --git a/t/data/references/ensembl-tools-data-103/cache/Plugins/dbNSFP.pm b/t/data/references/ensembl-tools-data-104/cache/Plugins/dbNSFP.pm similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/Plugins/dbNSFP.pm rename to t/data/references/ensembl-tools-data-104/cache/Plugins/dbNSFP.pm diff --git a/t/data/references/ensembl-tools-data-103/cache/Plugins/fordownload/Hello_world.txt b/t/data/references/ensembl-tools-data-104/cache/Plugins/fordownload/Hello_world.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/Plugins/fordownload/Hello_world.txt rename to t/data/references/ensembl-tools-data-104/cache/Plugins/fordownload/Hello_world.txt diff --git a/t/data/references/ensembl-tools-data-103/cache/Plugins/human_ancestor.fa b/t/data/references/ensembl-tools-data-104/cache/Plugins/human_ancestor.fa similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/Plugins/human_ancestor.fa rename to t/data/references/ensembl-tools-data-104/cache/Plugins/human_ancestor.fa diff --git a/t/data/references/ensembl-tools-data-103/cache/homo_sapiens/103_GRCh37/Hello.txt b/t/data/references/ensembl-tools-data-104/cache/homo_sapiens/104_GRCh37/Hello.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/homo_sapiens/103_GRCh37/Hello.txt rename to t/data/references/ensembl-tools-data-104/cache/homo_sapiens/104_GRCh37/Hello.txt diff --git a/t/data/references/ensembl-tools-data-103/cache/homo_sapiens/95_GRCh37/Hello.txt b/t/data/references/ensembl-tools-data-104/cache/homo_sapiens/95_GRCh37/Hello.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/homo_sapiens/95_GRCh37/Hello.txt rename to t/data/references/ensembl-tools-data-104/cache/homo_sapiens/95_GRCh37/Hello.txt diff --git a/t/data/references/ensembl-tools-data-103/cache/homo_sapiens/97_GRCh37/Hello.txt b/t/data/references/ensembl-tools-data-104/cache/homo_sapiens/97_GRCh37/Hello.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/homo_sapiens/97_GRCh37/Hello.txt rename to t/data/references/ensembl-tools-data-104/cache/homo_sapiens/97_GRCh37/Hello.txt diff --git a/t/data/references/ensembl-tools-data-103/cache/homo_sapiens/99_GRCh37/Hello.txt b/t/data/references/ensembl-tools-data-104/cache/homo_sapiens/99_GRCh37/Hello.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/homo_sapiens/99_GRCh37/Hello.txt rename to t/data/references/ensembl-tools-data-104/cache/homo_sapiens/99_GRCh37/Hello.txt diff --git a/t/data/references/ensembl-tools-data-103/cache/homo_sapiens_merged/103_GRCh37/Hello.txt b/t/data/references/ensembl-tools-data-104/cache/homo_sapiens_merged/104_GRCh37/Hello.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/homo_sapiens_merged/103_GRCh37/Hello.txt rename to t/data/references/ensembl-tools-data-104/cache/homo_sapiens_merged/104_GRCh37/Hello.txt diff --git a/t/data/references/ensembl-tools-data-103/cache/homo_sapiens_merged/95_GRCh37/Hello.txt b/t/data/references/ensembl-tools-data-104/cache/homo_sapiens_merged/95_GRCh37/Hello.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/homo_sapiens_merged/95_GRCh37/Hello.txt rename to t/data/references/ensembl-tools-data-104/cache/homo_sapiens_merged/95_GRCh37/Hello.txt diff --git a/t/data/references/ensembl-tools-data-103/cache/homo_sapiens_merged/97_GRCh37/Hello.txt b/t/data/references/ensembl-tools-data-104/cache/homo_sapiens_merged/97_GRCh37/Hello.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache/homo_sapiens_merged/97_GRCh37/Hello.txt rename to t/data/references/ensembl-tools-data-104/cache/homo_sapiens_merged/97_GRCh37/Hello.txt diff --git a/t/data/references/ensembl-tools-data-103/cache2/homo_sapiens/95_GRCh37/Hello.txt b/t/data/references/ensembl-tools-data-104/cache2/homo_sapiens/95_GRCh37/Hello.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache2/homo_sapiens/95_GRCh37/Hello.txt rename to t/data/references/ensembl-tools-data-104/cache2/homo_sapiens/95_GRCh37/Hello.txt diff --git a/t/data/references/ensembl-tools-data-103/cache2/homo_sapiens_merged/95_GRCh37/Hello.txt b/t/data/references/ensembl-tools-data-104/cache2/homo_sapiens_merged/95_GRCh37/Hello.txt similarity index 100% rename from t/data/references/ensembl-tools-data-103/cache2/homo_sapiens_merged/95_GRCh37/Hello.txt rename to t/data/references/ensembl-tools-data-104/cache2/homo_sapiens_merged/95_GRCh37/Hello.txt diff --git a/t/data/references/grch37_clinvar_reformated_-20210415-.vcf.gz b/t/data/references/grch37_clinvar_reformated_-20210828-.vcf.gz similarity index 100% rename from t/data/references/grch37_clinvar_reformated_-20210415-.vcf.gz rename to t/data/references/grch37_clinvar_reformated_-20210828-.vcf.gz diff --git a/t/data/references/grch37_clinvar_reformated_-20210415-.vcf.gz.tbi b/t/data/references/grch37_clinvar_reformated_-20210828-.vcf.gz.tbi similarity index 100% rename from t/data/references/grch37_clinvar_reformated_-20210415-.vcf.gz.tbi rename to t/data/references/grch37_clinvar_reformated_-20210828-.vcf.gz.tbi diff --git a/t/data/references/grch37_loqusdb_sv_variants_export-20210416-.vcf b/t/data/references/grch37_loqusdb_sv_variants_export-20210907-.vcf similarity index 100% rename from t/data/references/grch37_loqusdb_sv_variants_export-20210416-.vcf rename to t/data/references/grch37_loqusdb_sv_variants_export-20210907-.vcf diff --git a/t/data/references/grch38_clinvar_reformated_-20210415-.vcf.gz b/t/data/references/grch38_clinvar_reformated_-20210828-.vcf.gz similarity index 100% rename from t/data/references/grch38_clinvar_reformated_-20210415-.vcf.gz rename to t/data/references/grch38_clinvar_reformated_-20210828-.vcf.gz diff --git a/t/data/references/grch38_clinvar_reformated_-20210415-.vcf.gz.tbi b/t/data/references/grch38_clinvar_reformated_-20210828-.vcf.gz.tbi similarity index 100% rename from t/data/references/grch38_clinvar_reformated_-20210415-.vcf.gz.tbi rename to t/data/references/grch38_clinvar_reformated_-20210828-.vcf.gz.tbi diff --git a/templates/grch38_mip_rd_dna_config.yaml b/templates/grch38_mip_rd_dna_config.yaml index f5134c3ed..67b995c79 100755 --- a/templates/grch38_mip_rd_dna_config.yaml +++ b/templates/grch38_mip_rd_dna_config.yaml @@ -89,10 +89,10 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch38_clinvar_reformated_-20210415-.vcf.gz + path: cluster_constant_path!/references/grch38_clinvar_reformated_-20210828-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT -vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-103/cache/ -vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins +vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ +vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins vep_plugin: dbNSFP: exist_check: @@ -114,15 +114,15 @@ vep_plugin: LoFtool: exists_check: - type: file - path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt + path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt parameters: - - cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt + - cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt MaxEntScan: exist_check: - type: directory - path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/fordownload + path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload parameters: - - cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/fordownload + - cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload - SWA - NCSS SpliceAI: diff --git a/templates/mip_dragen_rd_dna_config.yaml b/templates/mip_dragen_rd_dna_config.yaml index 3fcba1f0f..4d9e2b28c 100644 --- a/templates/mip_dragen_rd_dna_config.yaml +++ b/templates/mip_dragen_rd_dna_config.yaml @@ -68,10 +68,10 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210415-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210828-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT -vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-103/cache/ -vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins +vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ +vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins vep_plugin: dbNSFP: exist_check: @@ -93,15 +93,15 @@ vep_plugin: LoFtool: exist_check: - type: file - path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt + path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt parameters: - - cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt + - cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt MaxEntScan: exist_check: - type: directory - path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/fordownload + path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload parameters: - - cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/fordownload + - cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload - SWA - NCSS SpliceAI: diff --git a/templates/mip_rd_dna_config.yaml b/templates/mip_rd_dna_config.yaml index efd5d6704..d8974be5f 100755 --- a/templates/mip_rd_dna_config.yaml +++ b/templates/mip_rd_dna_config.yaml @@ -70,7 +70,7 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_-20210828-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210828-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins diff --git a/templates/mip_rd_dna_panel_config.yaml b/templates/mip_rd_dna_panel_config.yaml index 7a2802008..674d0ac94 100755 --- a/templates/mip_rd_dna_panel_config.yaml +++ b/templates/mip_rd_dna_panel_config.yaml @@ -49,10 +49,10 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210415-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210828-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT -vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-103/cache/ -vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins +vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ +vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins vep_plugin: dbNSFP: exist_check: @@ -74,15 +74,15 @@ vep_plugin: LoFtool: exist_check: - type: file - path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt + path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt parameters: - - cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt + - cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt MaxEntScan: exist_check: - type: directory - path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/fordownload + path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload parameters: - - cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/fordownload + - cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload - SWA - NCSS SpliceAI: diff --git a/templates/mip_rd_dna_vcf_rerun_config.yaml b/templates/mip_rd_dna_vcf_rerun_config.yaml index 53f17ec55..6c1e315c9 100644 --- a/templates/mip_rd_dna_vcf_rerun_config.yaml +++ b/templates/mip_rd_dna_vcf_rerun_config.yaml @@ -65,10 +65,10 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210415-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210828-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT -vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-103/cache/ -vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins +vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ +vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins vep_plugin: dbNSFP: exist_check: @@ -90,15 +90,15 @@ vep_plugin: LoFtool: exist_check: - type: file - path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt + path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt parameters: - - cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/LoFtool_scores.txt + - cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/LoFtool_scores.txt MaxEntScan: exist_check: - type: directory - path: cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/fordownload + path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload parameters: - - cluster_constant_path!/references/ensembl-tools-data-103/cache/Plugins/fordownload + - cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins/fordownload - SWA - NCSS SpliceAI: From 892a1986111b0362592beeeb3ae889eceed06f34 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 21 Sep 2021 11:07:14 +0200 Subject: [PATCH 069/116] fixing rna tests --- .../cache => ensembl-tools-104}/cache/Plugins/Hello.txt | 0 .../cache/Plugins/fordownload/Hello_world.txt | 0 .../cache => ensembl-tools-104}/cache/Plugins/human_ancestor.fa | 0 templates/mip_rd_rna_config.yaml | 2 +- 4 files changed, 1 insertion(+), 1 deletion(-) rename t/data/modules/miniconda/envs/MIP_rd_rna/{ensembl-tools-103/cache => ensembl-tools-104}/cache/Plugins/Hello.txt (100%) rename t/data/modules/miniconda/envs/MIP_rd_rna/{ensembl-tools-103/cache => ensembl-tools-104}/cache/Plugins/fordownload/Hello_world.txt (100%) rename t/data/modules/miniconda/envs/MIP_rd_rna/{ensembl-tools-103/cache => ensembl-tools-104}/cache/Plugins/human_ancestor.fa (100%) diff --git a/t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-103/cache/cache/Plugins/Hello.txt b/t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-104/cache/Plugins/Hello.txt similarity index 100% rename from t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-103/cache/cache/Plugins/Hello.txt rename to t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-104/cache/Plugins/Hello.txt diff --git a/t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-103/cache/cache/Plugins/fordownload/Hello_world.txt b/t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-104/cache/Plugins/fordownload/Hello_world.txt similarity index 100% rename from t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-103/cache/cache/Plugins/fordownload/Hello_world.txt rename to t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-104/cache/Plugins/fordownload/Hello_world.txt diff --git a/t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-103/cache/cache/Plugins/human_ancestor.fa b/t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-104/cache/Plugins/human_ancestor.fa similarity index 100% rename from t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-103/cache/cache/Plugins/human_ancestor.fa rename to t/data/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-104/cache/Plugins/human_ancestor.fa diff --git a/templates/mip_rd_rna_config.yaml b/templates/mip_rd_rna_config.yaml index 65636965b..52172efa5 100644 --- a/templates/mip_rd_rna_config.yaml +++ b/templates/mip_rd_rna_config.yaml @@ -33,4 +33,4 @@ fusion_cytoband_path: /arriba_v2.1.0/database/cytobands_hg19_hs37d5_GRCh37_v2.1. fusion_protein_domain_path: /arriba_v2.1.0/database/protein_domains_hg19_hs37d5_GRCh37_v2.1.0.gff3 picardtools_path: /usr/picard qccollect_sampleinfo_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_qc_sample_info.yaml -vep_directory_cache: cluster_constant_path!/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-103/cache/ +vep_directory_cache: cluster_constant_path!/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-104/cache/ From 41d521bc0abc8e595b869d5e5f56cabffd36ec3b Mon Sep 17 00:00:00 2001 From: Anders Jemt Date: Tue, 21 Sep 2021 17:13:54 +0200 Subject: [PATCH 070/116] Update lib/MIP/Recipes/Analysis/Sv_annotate.pm Co-authored-by: Henrik Stranneheim --- lib/MIP/Recipes/Analysis/Sv_annotate.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MIP/Recipes/Analysis/Sv_annotate.pm b/lib/MIP/Recipes/Analysis/Sv_annotate.pm index 29ae5d595..f3e61fa7d 100644 --- a/lib/MIP/Recipes/Analysis/Sv_annotate.pm +++ b/lib/MIP/Recipes/Analysis/Sv_annotate.pm @@ -361,7 +361,7 @@ sub analysis_sv_annotate { } ); - ## Don't filter MT varaints + ## Don't filter MT variants my $mt_contig = first { $_ =~ / MT | chrM /xms } @{ $file_info_href->{contigs} }; my $target_exp = $mt_contig ? q{^} . $mt_contig : undef; From 5160f29129c77f84d3b429a147a04b95b956ea7b Mon Sep 17 00:00:00 2001 From: Anders Jemt Date: Tue, 21 Sep 2021 17:14:01 +0200 Subject: [PATCH 071/116] Update lib/MIP/Recipes/Analysis/Sv_annotate.pm Co-authored-by: Henrik Stranneheim --- lib/MIP/Recipes/Analysis/Sv_annotate.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MIP/Recipes/Analysis/Sv_annotate.pm b/lib/MIP/Recipes/Analysis/Sv_annotate.pm index f3e61fa7d..476d73d03 100644 --- a/lib/MIP/Recipes/Analysis/Sv_annotate.pm +++ b/lib/MIP/Recipes/Analysis/Sv_annotate.pm @@ -385,7 +385,7 @@ sub analysis_sv_annotate { if ($mt_contig) { - ## Concatenate filtered varaint file with unfiltered MT variants + ## Concatenate filtered variant file with unfiltered MT variants my @mt_variants_cmds = bcftools_view( { infile_path => $anno_outfile_path, From 514ec3a0e048a39b5756cc5a67233e47de8e6d31 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 21 Sep 2021 17:33:35 +0200 Subject: [PATCH 072/116] fixing gnomad_popmax annotation --- definitions/dragen_rd_dna_parameters.yaml | 2 +- definitions/rd_dna_vcf_rerun_parameters.yaml | 2 +- lib/MIP/Program/Bcftools.pm | 2 +- .../grch37_frequency_vcfanno_filter_config_-v1.0-.toml | 6 +++--- ...h37_frequency_vcfanno_filter_config_bad_data_-v1.0-.toml | 4 ++-- .../grch37_vcfanno_config_bad_template-v1.0-.toml | 4 ++-- t/data/references/grch37_vcfanno_config_template-v1.0-.toml | 2 +- t/data/references/grch38_vcfanno_config_template-v1.0-.toml | 2 +- t/data/test_data/test.vcf | 2 +- templates/grch38_mip_rd_dna_config.yaml | 2 +- templates/mip_rd_dna_config.yaml | 2 +- templates/mip_rd_dna_vcf_rerun_config.yaml | 2 +- 12 files changed, 16 insertions(+), 16 deletions(-) diff --git a/definitions/dragen_rd_dna_parameters.yaml b/definitions/dragen_rd_dna_parameters.yaml index bbbdb425d..4aa75b15a 100755 --- a/definitions/dragen_rd_dna_parameters.yaml +++ b/definitions/dragen_rd_dna_parameters.yaml @@ -200,7 +200,7 @@ sv_fqa_vcfanno_filters: data_type: ARRAY default: - GNOMADAF - - GNOMADAF_POPMAX + - GNOMADAF_popmax type: recipe_argument sv_bcftools_view_filter: associated_recipe: diff --git a/definitions/rd_dna_vcf_rerun_parameters.yaml b/definitions/rd_dna_vcf_rerun_parameters.yaml index f2c64fb1b..f4b5eeb8f 100755 --- a/definitions/rd_dna_vcf_rerun_parameters.yaml +++ b/definitions/rd_dna_vcf_rerun_parameters.yaml @@ -168,7 +168,7 @@ sv_fqa_vcfanno_filters: data_type: ARRAY default: - GNOMADAF - - GNOMADAF_POPMAX + - GNOMADAF_popmax type: recipe_argument sv_bcftools_view_filter: associated_recipe: diff --git a/lib/MIP/Program/Bcftools.pm b/lib/MIP/Program/Bcftools.pm index 78616eb80..2276bbbed 100644 --- a/lib/MIP/Program/Bcftools.pm +++ b/lib/MIP/Program/Bcftools.pm @@ -1097,7 +1097,7 @@ sub bcftools_mpileup { stderrfile_path_append => { store => \$stderrfile_path_append, strict_type => 1, }, stdoutfile_path => { store => \$stdoutfile_path, strict_type => 1, }, targets => { - store => $targets, + store => \$targets, strict_type => 1, }, threads => { diff --git a/t/data/references/grch37_frequency_vcfanno_filter_config_-v1.0-.toml b/t/data/references/grch37_frequency_vcfanno_filter_config_-v1.0-.toml index 0a54604b3..56058f80c 100644 --- a/t/data/references/grch37_frequency_vcfanno_filter_config_-v1.0-.toml +++ b/t/data/references/grch37_frequency_vcfanno_filter_config_-v1.0-.toml @@ -1,19 +1,19 @@ # TOML -title = "Vcfanno configuration file" +title = "Vcfanno configuration file" [[annotation]] file="/mnt/hds/proj/cust003/develop/mip_references/grch37_gnomad.genomes_-r2.0.1-.vcf.gz" fields = ["AF", "AF_POPMAX"] ops=["self", "self"] -names=["GNOMADAF", "GNOMADAF_POPMAX"] +names=["GNOMADAF", "GNOMADAF_popmax"] skip_split_and_normalize = true [[annotation]] file="/mnt/hds/proj/cust003/develop/mip_references/grch37_gnomad.genomes_-r2.1.1-.vcf.gz" fields = ["AF", "AF_POPMAX"] ops=["self", "self"] -names=["GNOMADAF", "GNOMADAF_POPMAX"] +names=["GNOMADAF", "GNOMADAF_popmax"] [[annotation]] file="/mnt/hds/proj/cust003/develop/mip_references/grch37_cadd_whole_genome_snvs_-v1.4-.tsv.gz" diff --git a/t/data/references/grch37_frequency_vcfanno_filter_config_bad_data_-v1.0-.toml b/t/data/references/grch37_frequency_vcfanno_filter_config_bad_data_-v1.0-.toml index e3f9c32d6..ddfa62e67 100644 --- a/t/data/references/grch37_frequency_vcfanno_filter_config_bad_data_-v1.0-.toml +++ b/t/data/references/grch37_frequency_vcfanno_filter_config_bad_data_-v1.0-.toml @@ -1,9 +1,9 @@ # TOML -title = "Vcfanno configuration file" +title = "Vcfanno configuration file" [[annotation]] file="/mnt/hds/proj/cust003/develop/mip_references/grch37_gnomad.genomes_-r2.0.1-.vcf.gz" #fields = ["AF", "AF_POPMAX"] ops=["self", "self"] -names=["GNOMADAF", "GNOMADAF_POPMAX"] +names=["GNOMADAF", "GNOMADAF_popmax"] diff --git a/t/data/references/grch37_vcfanno_config_bad_template-v1.0-.toml b/t/data/references/grch37_vcfanno_config_bad_template-v1.0-.toml index a55b01a36..9b4bbdacd 100644 --- a/t/data/references/grch37_vcfanno_config_bad_template-v1.0-.toml +++ b/t/data/references/grch37_vcfanno_config_bad_template-v1.0-.toml @@ -1,6 +1,6 @@ # TOML -title = "Vcfanno configuration file" +title = "Vcfanno configuration file" [functions] file = "TEST_REFERENCES!/vcfanno_functions_-v1.0-.lua" @@ -9,4 +9,4 @@ file = "TEST_REFERENCES!/vcfanno_functions_-v1.0-.lua" file="TEST_REFERENCES!/grch37_gnomad.genomes_-r2.0.1-.vcf.gz" fields = ["AF", "AF_POPMAX", "MISSING_TAG"] ops=["self", "self", "self"] -names=["GNOMADAF", "GNOMADAF_POPMAX", "MISSING_TAG"] +names=["GNOMADAF", "GNOMADAF_popmax", "MISSING_TAG"] diff --git a/t/data/references/grch37_vcfanno_config_template-v1.0-.toml b/t/data/references/grch37_vcfanno_config_template-v1.0-.toml index b11003b15..230eba077 100644 --- a/t/data/references/grch37_vcfanno_config_template-v1.0-.toml +++ b/t/data/references/grch37_vcfanno_config_template-v1.0-.toml @@ -15,5 +15,5 @@ names=["Obs", "Hom"] file="TEST_REFERENCES!/grch37_gnomad.genomes_-r2.0.1-.vcf.gz" fields = ["AF", "AF_POPMAX"] ops=["self", "self"] -names=["GNOMADAF", "GNOMADAF_POPMAX"] +names=["GNOMADAF", "GNOMADAF_popmax"] diff --git a/t/data/references/grch38_vcfanno_config_template-v1.0-.toml b/t/data/references/grch38_vcfanno_config_template-v1.0-.toml index c59fa51b6..71ba6968e 100644 --- a/t/data/references/grch38_vcfanno_config_template-v1.0-.toml +++ b/t/data/references/grch38_vcfanno_config_template-v1.0-.toml @@ -15,4 +15,4 @@ ops=["self", "self"] file="TEST_REFERENCES!/grch38_gnomad_reformated_-r3.1.1-.vcf.gz" fields = ["AF", "AF_popmax"] ops=["self", "self"] -names=["GNOMADAF", "GNOMADAF_POPMAX"] +names=["GNOMADAF", "GNOMADAF_popmax"] diff --git a/t/data/test_data/test.vcf b/t/data/test_data/test.vcf index 5ea5c820b..53390ed9a 100644 --- a/t/data/test_data/test.vcf +++ b/t/data/test_data/test.vcf @@ -13,4 +13,4 @@ ##INFO= ##Software=C||||||,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|ENSG00000223972|transcript|ENST00000515242|transcribed_unprocessed_pseudogene|2/3|n.463G>C||||||,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|ENSG00000223972|transcript|ENST00000518655|transcribed_unprocessed_pseudogene|2/4|n.479G>C||||||,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000438504|unprocessed_pseudogene||n.*1644C>G|||||1644|,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000541675|unprocessed_pseudogene||n.*1644C>G|||||1644|,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000423562|unprocessed_pseudogene||n.*1644C>G|||||1644|,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147|unprocessed_pseudogene||n.*1685C>G|||||1685|,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000538476|unprocessed_pseudogene||n.*1692C>G|||||1692|,C|intron_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000450305|transcribed_unprocessed_pseudogene|3/5|n.182+22G>C||||||;Annotation=DDX11L1,WASH7P;BaseQRankSum=0;CADD=3.695;CSQ=C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000423562|unprocessed_pseudogene|||||||||||1644|-1||HGNC|38034||||||||||Ensembl|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000438504|unprocessed_pseudogene|||||||||||1644|-1||HGNC|38034|YES|||||||||Ensembl|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|intron_variant&non_coding_transcript_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000450305|transcribed_unprocessed_pseudogene||3/5|ENST00000450305.2:n.182+22G>C|||||||||1||HGNC|37102||||||||||Ensembl|G|G||||||||||||7.609|1.139|11.897|7.154|1.939|-9.983|-0.263|-8.045|9.219|-1.563|7.656|7.656||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|ENSG00000223972|Transcript|ENST00000456328|processed_transcript|2/3||ENST00000456328.2:n.466G>C||466|||||||1||HGNC|37102|YES|||||||||Ensembl|G|G||||||||||||3.895|7.656|11.897|9.092|1.939|-9.983|-0.263|-8.045|9.219|-1.563|7.656|7.656|9.219|-1.563|7.656|0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000488147|unprocessed_pseudogene|||||||||||1685|-1||HGNC|38034||||||||||Ensembl|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|ENSG00000223972|Transcript|ENST00000515242|transcribed_unprocessed_pseudogene|2/3||ENST00000515242.2:n.463G>C||463|||||||1||HGNC|37102||||||||||Ensembl|G|G||||||||||||-0.591|7.656|11.897|9.092|1.939|-9.983|-0.263|-8.045|9.219|-1.563|7.656|7.656|9.219|-1.563|7.656|0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|ENSG00000223972|Transcript|ENST00000518655|transcribed_unprocessed_pseudogene|2/4||ENST00000518655.2:n.479G>C||479|||||||1||HGNC|37102||||||||||Ensembl|G|G||||||||||||11.709|7.656|4.227|9.092|1.939|-9.983|-0.263|-8.045|9.219|-1.563|7.656|7.656|9.219|-1.563|7.656|0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000538476|unprocessed_pseudogene|||||||||||1692|-1||HGNC|38034||||||||||Ensembl|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000541675|unprocessed_pseudogene|||||||||||1644|-1||HGNC|38034||||||||||Ensembl|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|WASH7P|653635|Transcript|NR_024540.1|pseudogene|||||||||||1643|-1||EntrezGene|38034|YES|||||||||RefSeq|G|G|OK|||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|100287102|Transcript|NR_046018.2|pseudogene|2/3||NR_046018.2:n.461G>C||461|||||||1||EntrezGene|37102|YES|||||||||RefSeq|G|G||||||||||||3.895|7.656|11.897|9.092|1.939|-9.983|-0.263|-8.045|9.219|-1.563|7.656|7.656|9.219|-1.563|7.656|0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|MIR6859-1|102466751|Transcript|NR_106918.1|miRNA|||||||||||4650|-1||EntrezGene||YES|||||||||RefSeq|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232;DP=138;ExcessHet=4.7712;FS=0;GNOMADAF=0.0494;GNOMADAF_POPMAX=0.1274;MLEAC=2;MLEAF=0.333;MQ=25.62;MQRankSum=-0.086;NEGATIVE_TRAIN_SITE;Obs=230;QD=3.7;RankResult=-12|0|0|-12|5|0|3|0|0|3;RankScore=livingox:-13;ReadPosRankSum=0.819;SOR=0.09;SWEGENAC_Hemi=0;SWEGENAC_Het=154;SWEGENAC_Hom=0;SWEGENAF=0.077;VQSLOD=-3.219;culprit=QD;most_severe_consequence=38034:C|downstream_gene_variant,37102:C|splice_region_variant GT:AD:DP:GQ:PL 0/1:33,7:40:89:89,0,891 0/1:28,11:39:99:214,0,726 0/0:59,0:59:53:0,53,2332 +1 12719 . G C 292.10 PASS AC=2;AF=0.333;AN=6;ANN=C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|ENSG00000223972|transcript|ENST00000456328|processed_transcript|2/3|n.466G>C||||||,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|ENSG00000223972|transcript|ENST00000515242|transcribed_unprocessed_pseudogene|2/3|n.463G>C||||||,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|ENSG00000223972|transcript|ENST00000518655|transcribed_unprocessed_pseudogene|2/4|n.479G>C||||||,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000438504|unprocessed_pseudogene||n.*1644C>G|||||1644|,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000541675|unprocessed_pseudogene||n.*1644C>G|||||1644|,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000423562|unprocessed_pseudogene||n.*1644C>G|||||1644|,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147|unprocessed_pseudogene||n.*1685C>G|||||1685|,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000538476|unprocessed_pseudogene||n.*1692C>G|||||1692|,C|intron_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000450305|transcribed_unprocessed_pseudogene|3/5|n.182+22G>C||||||;Annotation=DDX11L1,WASH7P;BaseQRankSum=0;CADD=3.695;CSQ=C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000423562|unprocessed_pseudogene|||||||||||1644|-1||HGNC|38034||||||||||Ensembl|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000438504|unprocessed_pseudogene|||||||||||1644|-1||HGNC|38034|YES|||||||||Ensembl|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|intron_variant&non_coding_transcript_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000450305|transcribed_unprocessed_pseudogene||3/5|ENST00000450305.2:n.182+22G>C|||||||||1||HGNC|37102||||||||||Ensembl|G|G||||||||||||7.609|1.139|11.897|7.154|1.939|-9.983|-0.263|-8.045|9.219|-1.563|7.656|7.656||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|ENSG00000223972|Transcript|ENST00000456328|processed_transcript|2/3||ENST00000456328.2:n.466G>C||466|||||||1||HGNC|37102|YES|||||||||Ensembl|G|G||||||||||||3.895|7.656|11.897|9.092|1.939|-9.983|-0.263|-8.045|9.219|-1.563|7.656|7.656|9.219|-1.563|7.656|0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000488147|unprocessed_pseudogene|||||||||||1685|-1||HGNC|38034||||||||||Ensembl|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|ENSG00000223972|Transcript|ENST00000515242|transcribed_unprocessed_pseudogene|2/3||ENST00000515242.2:n.463G>C||463|||||||1||HGNC|37102||||||||||Ensembl|G|G||||||||||||-0.591|7.656|11.897|9.092|1.939|-9.983|-0.263|-8.045|9.219|-1.563|7.656|7.656|9.219|-1.563|7.656|0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|ENSG00000223972|Transcript|ENST00000518655|transcribed_unprocessed_pseudogene|2/4||ENST00000518655.2:n.479G>C||479|||||||1||HGNC|37102||||||||||Ensembl|G|G||||||||||||11.709|7.656|4.227|9.092|1.939|-9.983|-0.263|-8.045|9.219|-1.563|7.656|7.656|9.219|-1.563|7.656|0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000538476|unprocessed_pseudogene|||||||||||1692|-1||HGNC|38034||||||||||Ensembl|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000541675|unprocessed_pseudogene|||||||||||1644|-1||HGNC|38034||||||||||Ensembl|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|WASH7P|653635|Transcript|NR_024540.1|pseudogene|||||||||||1643|-1||EntrezGene|38034|YES|||||||||RefSeq|G|G|OK|||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|splice_region_variant&non_coding_transcript_exon_variant|LOW|DDX11L1|100287102|Transcript|NR_046018.2|pseudogene|2/3||NR_046018.2:n.461G>C||461|||||||1||EntrezGene|37102|YES|||||||||RefSeq|G|G||||||||||||3.895|7.656|11.897|9.092|1.939|-9.983|-0.263|-8.045|9.219|-1.563|7.656|7.656|9.219|-1.563|7.656|0.982898&0.981582&0.992904&0.987856&0.986907&0.994232,C|downstream_gene_variant|MODIFIER|MIR6859-1|102466751|Transcript|NR_106918.1|miRNA|||||||||||4650|-1||EntrezGene||YES|||||||||RefSeq|G|G||||||||||||||||0.904|0.625|1.528|1.528|-7.625|-3.049|-9.575|-10.674||||0.982898&0.981582&0.992904&0.987856&0.986907&0.994232;DP=138;ExcessHet=4.7712;FS=0;GNOMADAF=0.0494;GNOMADAF_popmax=0.1274;MLEAC=2;MLEAF=0.333;MQ=25.62;MQRankSum=-0.086;NEGATIVE_TRAIN_SITE;Obs=230;QD=3.7;RankResult=-12|0|0|-12|5|0|3|0|0|3;RankScore=livingox:-13;ReadPosRankSum=0.819;SOR=0.09;SWEGENAC_Hemi=0;SWEGENAC_Het=154;SWEGENAC_Hom=0;SWEGENAF=0.077;VQSLOD=-3.219;culprit=QD;most_severe_consequence=38034:C|downstream_gene_variant,37102:C|splice_region_variant GT:AD:DP:GQ:PL 0/1:33,7:40:89:89,0,891 0/1:28,11:39:99:214,0,726 0/0:59,0:59:53:0,53,2332 diff --git a/templates/grch38_mip_rd_dna_config.yaml b/templates/grch38_mip_rd_dna_config.yaml index b63d92f9f..3b2bb4061 100755 --- a/templates/grch38_mip_rd_dna_config.yaml +++ b/templates/grch38_mip_rd_dna_config.yaml @@ -68,7 +68,7 @@ vcfanno_config: grch38_vcfanno_config_-v0.2-.toml ## Parameters sv_fqa_vcfanno_filters: - GNOMADAF - - GNOMADAF_POPMAX + - GNOMADAF_popmax - SWEGENAF - swegen gatk_path: /opt/conda/opt/gatk-3.8 diff --git a/templates/mip_rd_dna_config.yaml b/templates/mip_rd_dna_config.yaml index 7faec99c6..80b819353 100755 --- a/templates/mip_rd_dna_config.yaml +++ b/templates/mip_rd_dna_config.yaml @@ -49,7 +49,7 @@ fqf_annotations: - SWEGENAF sv_fqa_vcfanno_filters: - GNOMADAF - - GNOMADAF_POPMAX + - GNOMADAF_popmax - gnomad_svAF - clinical_genomics_loqusFrq gatk_path: /usr diff --git a/templates/mip_rd_dna_vcf_rerun_config.yaml b/templates/mip_rd_dna_vcf_rerun_config.yaml index 24c81f202..a061cd30d 100644 --- a/templates/mip_rd_dna_vcf_rerun_config.yaml +++ b/templates/mip_rd_dna_vcf_rerun_config.yaml @@ -46,7 +46,7 @@ fqf_annotations: - SWEGENAF sv_fqa_vcfanno_filters: - GNOMADAF - - GNOMADAF_POPMAX + - GNOMADAF_popmax - SWEGENAF - swegen picardtools_path: /usr/picard From 7ccac09d98006a7f4960432e239baebbcc44728a Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 21 Sep 2021 19:27:46 +0200 Subject: [PATCH 073/116] Test for missing annotation --- lib/MIP/Recipes/Analysis/Sv_annotate.pm | 60 ++++++++++++++--------- t/analysis_sv_annotate.t | 63 ++++++++++++++++--------- 2 files changed, 81 insertions(+), 42 deletions(-) diff --git a/lib/MIP/Recipes/Analysis/Sv_annotate.pm b/lib/MIP/Recipes/Analysis/Sv_annotate.pm index 476d73d03..784c14879 100644 --- a/lib/MIP/Recipes/Analysis/Sv_annotate.pm +++ b/lib/MIP/Recipes/Analysis/Sv_annotate.pm @@ -353,11 +353,11 @@ sub analysis_sv_annotate { ## Build the exclude filter command my $exclude_filter = _build_bcftools_filter( { - annotations_ref => \@svdb_query_annotations, - fqf_annotations_ref => $active_parameter_href->{sv_fqa_vcfanno_filters}, fqf_bcftools_filter_threshold => $active_parameter_href->{fqf_bcftools_filter_threshold}, - vcfanno_file_toml => $active_parameter_href->{sv_vcfanno_config}, + svdb_filters_ref => \@svdb_query_annotations, + vcfanno_file_toml => $active_parameter_href->{sv_vcfanno_config}, + vcfanno_filters_ref => $active_parameter_href->{sv_fqa_vcfanno_filters}, } ); @@ -442,53 +442,55 @@ sub _build_bcftools_filter { ## Function : Build the exclude filter command ## Returns : -## Arguments: $annotations_ref => Annotations to use in filtering -## : $fqf_annotaions_ref => Frequency annotation to use in filtering -## : $fqf_bcftools_filter_threshold => Exclude variants with frequency above filter threshold +## Arguments: $fqf_bcftools_filter_threshold => Exclude variants with frequency above filter threshold +## : $svdb_filters_ref => Annotations to use in filtering ## : $vcfanno_file_toml => Toml config file +## : $vcfanno_filters_ref => Frequency annotation to use when filtering annotations from vcfanno my ($arg_href) = @_; ## Flatten argument(s) - my $annotations_ref; - my $fqf_annotations_ref; my $fqf_bcftools_filter_threshold; + my $svdb_filters_ref; my $vcfanno_file_toml; + my $vcfanno_filters_ref; my $tmpl = { - annotations_ref => { - default => [], + fqf_bcftools_filter_threshold => { defined => 1, required => 1, - store => \$annotations_ref, + store => \$fqf_bcftools_filter_threshold, strict_type => 1, }, - fqf_annotations_ref => { + svdb_filters_ref => { default => [], defined => 1, required => 1, - store => \$fqf_annotations_ref, + store => \$svdb_filters_ref, strict_type => 1, }, - fqf_bcftools_filter_threshold => { + vcfanno_file_toml => { defined => 1, required => 1, - store => \$fqf_bcftools_filter_threshold, + store => \$vcfanno_file_toml, strict_type => 1, }, - vcfanno_file_toml => { + vcfanno_filters_ref => { + default => [], defined => 1, required => 1, - store => \$vcfanno_file_toml, + store => \$vcfanno_filters_ref, strict_type => 1, }, }; check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; - use Array::Utils qw{ intersect }; + use Array::Utils qw{ array_minus }; use List::MoreUtils qw{ uniq }; + my $log = Log::Log4perl->get_logger($LOG_NAME); + my %vcfanno_config = read_from_file( { format => q{toml}, @@ -504,8 +506,24 @@ sub _build_bcftools_filter { push @vcfanno_annotations, @{ $annotation_href->{names} }; } - @{$fqf_annotations_ref} = intersect( @{$fqf_annotations_ref}, @vcfanno_annotations ); - @{$fqf_annotations_ref} = uniq( @{$fqf_annotations_ref}, @{$annotations_ref} ); + ## Check if all annotations in vcfanno_filters are present vcfanno_annotations; + my @missing_annotations = array_minus( @{$vcfanno_filters_ref}, @vcfanno_annotations ); + + if (@missing_annotations) { + + $log->warn( + q{The following vcfanno frequency filters aren't part of the vcfanno annotations:} + . $SPACE + . join $SPACE, + @missing_annotations + ); + $log->warn( +q{This might lead to unexpected results. Update the parameter sv_fqa_vcfanno_filters or update your vcfanno file for structural variants} + ); + } + + ## Check for overlapping tags + my @frequency_filters = uniq( @{$vcfanno_filters_ref}, @{$svdb_filters_ref} ); my $exclude_filter; my $threshold = $SPACE . q{>} . $SPACE . $fqf_bcftools_filter_threshold . $SPACE; @@ -513,7 +531,7 @@ sub _build_bcftools_filter { $exclude_filter = $DOUBLE_QUOTE . q{INFO/} - . join( $threshold . $PIPE . $SPACE . q{INFO/}, @{$fqf_annotations_ref} ) + . join( $threshold . $PIPE . $SPACE . q{INFO/}, @frequency_filters ) . $threshold . $DOUBLE_QUOTE; return $exclude_filter; diff --git a/t/analysis_sv_annotate.t b/t/analysis_sv_annotate.t index 14294bd47..ff76686ba 100644 --- a/t/analysis_sv_annotate.t +++ b/t/analysis_sv_annotate.t @@ -17,7 +17,7 @@ use warnings qw{ FATAL utf8 }; use autodie qw { :all }; use Modern::Perl qw{ 2018 }; use Readonly; -use Test::Trap; +use Test::Trap qw{ :stderr :output(systemsafe) }; ## MIPs lib/ use lib catdir( dirname($Bin), q{lib} ); @@ -51,7 +51,7 @@ diag( q{Test analysis_sv_annotate from Sv_annotate.pm} ## Constants Readonly my $FREQ_CUTOFF => 0.40; -my $log = test_log( { log_name => q{MIP}, no_screen => 1, } ); +my $log = test_log( { log_name => q{MIP} } ); ## Given analysis parameters my $recipe_name = q{sv_annotate}; @@ -71,11 +71,9 @@ $active_parameter{sv_frequency_filter} = 1; $active_parameter{fqf_bcftools_filter_threshold} = $FREQ_CUTOFF; $active_parameter{sv_vcfanno_config} = catfile( $Bin, qw{ data references grch37_frequency_vcfanno_filter_config_-v1.0-.toml } ); -$active_parameter{sv_svdb_query} = 1; -$active_parameter{sv_svdb_query_db_files} = - { a_file => - q{a_file|out_frequency_tag|out_allele_coiunt_tag|in_frequency_tag|in_allele_coiunt_tag|1}, }; -@{ $active_parameter{sv_fqa_vcfanno_filters} } = (qw{ out_frequency_tag GNOMADAF }); +$active_parameter{sv_svdb_query} = 1; +$active_parameter{sv_svdb_query_db_files} = { a_file => q{a_file|AF|AC|in_AF|in_AC|1}, }; +@{ $active_parameter{sv_fqa_vcfanno_filters} } = (qw{ GNOMADAF_popmax GNOMADAF }); my %file_info = test_mip_hashes( { @@ -104,20 +102,43 @@ test_add_io_for_recipe( my %sample_info; -my $is_ok = analysis_sv_annotate( - { - active_parameter_href => \%active_parameter, - case_id => $case_id, - file_info_href => \%file_info, - job_id_href => \%job_id, - parameter_href => \%parameter, - profile_base_command => $slurm_mock_cmd, - recipe_name => $recipe_name, - sample_info_href => \%sample_info, - } -); - +my @return = trap { + analysis_sv_annotate( + { + active_parameter_href => \%active_parameter, + case_id => $case_id, + file_info_href => \%file_info, + job_id_href => \%job_id, + parameter_href => \%parameter, + profile_base_command => $slurm_mock_cmd, + recipe_name => $recipe_name, + sample_info_href => \%sample_info, + } + ) +}; ## Then return TRUE -ok( $is_ok, q{ Executed analysis recipe } . $recipe_name ); +ok( $return[0], q{Executed analysis recipe} . $recipe_name ); + +## Given request to filter with missing vcfanno annotation +push @{ $active_parameter{sv_fqa_vcfanno_filters} }, q{missingtag}; + +trap { + analysis_sv_annotate( + { + active_parameter_href => \%active_parameter, + case_id => $case_id, + file_info_href => \%file_info, + job_id_href => \%job_id, + parameter_href => \%parameter, + profile_base_command => $slurm_mock_cmd, + recipe_name => $recipe_name, + sample_info_href => \%sample_info, + } + ) +}; + +## Then print warning and which annotation that is missing to log +is( $trap->leaveby, q{return}, q{Don't fail missing annotation} ); +like( $trap->stderr, qr/missingtag/xms, q{Print missing annotation tag} ); done_testing(); From 306ca0c0a4df34afd11a908710024a3e7050169a Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 22 Sep 2021 17:29:03 +0200 Subject: [PATCH 074/116] updates clinvar and locus references --- CHANGELOG.md | 2 +- .../grch37_clinvar_reformated_-20210828-.vcf.gz | 0 ...ch37_clinvar_reformated_-20210828-.vcf.gz.tbi | 0 ...ch37_loqusdb_sv_variants_export-20210907-.vcf | 0 .../grch38_clinvar_reformated_-20210828-.vcf.gz | 0 ...ch38_clinvar_reformated_-20210828-.vcf.gz.tbi | 0 templates/grch38_mip_rd_dna_config.yaml | 2 +- templates/mip_download_rd_dna_config_-1.0-.yaml | 16 ++++++++-------- templates/mip_dragen_rd_dna_config.yaml | 2 +- templates/mip_rd_dna_config.yaml | 4 ++-- templates/mip_rd_dna_panel_config.yaml | 2 +- templates/mip_rd_dna_vcf_rerun_config.yaml | 4 ++-- templates/mip_rd_rna_config.yaml | 2 +- 13 files changed, 17 insertions(+), 17 deletions(-) delete mode 100644 t/data/references/grch37_clinvar_reformated_-20210828-.vcf.gz delete mode 100644 t/data/references/grch37_clinvar_reformated_-20210828-.vcf.gz.tbi delete mode 100644 t/data/references/grch37_loqusdb_sv_variants_export-20210907-.vcf delete mode 100644 t/data/references/grch38_clinvar_reformated_-20210828-.vcf.gz delete mode 100644 t/data/references/grch38_clinvar_reformated_-20210828-.vcf.gz.tbi diff --git a/CHANGELOG.md b/CHANGELOG.md index d543a3e7b..c1b95dfcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ vep release_103.1 -> release_104.3 - gnomad: r3.0 -> r3.1.1 - [NEW] gnomad mt: r3.1 -- clinvar: 20210415 -> 20210828 +- clinvar: 20210415 -> 20210919 ## [10.2.0] diff --git a/t/data/references/grch37_clinvar_reformated_-20210828-.vcf.gz b/t/data/references/grch37_clinvar_reformated_-20210828-.vcf.gz deleted file mode 100644 index e69de29bb..000000000 diff --git a/t/data/references/grch37_clinvar_reformated_-20210828-.vcf.gz.tbi b/t/data/references/grch37_clinvar_reformated_-20210828-.vcf.gz.tbi deleted file mode 100644 index e69de29bb..000000000 diff --git a/t/data/references/grch37_loqusdb_sv_variants_export-20210907-.vcf b/t/data/references/grch37_loqusdb_sv_variants_export-20210907-.vcf deleted file mode 100644 index e69de29bb..000000000 diff --git a/t/data/references/grch38_clinvar_reformated_-20210828-.vcf.gz b/t/data/references/grch38_clinvar_reformated_-20210828-.vcf.gz deleted file mode 100644 index e69de29bb..000000000 diff --git a/t/data/references/grch38_clinvar_reformated_-20210828-.vcf.gz.tbi b/t/data/references/grch38_clinvar_reformated_-20210828-.vcf.gz.tbi deleted file mode 100644 index e69de29bb..000000000 diff --git a/templates/grch38_mip_rd_dna_config.yaml b/templates/grch38_mip_rd_dna_config.yaml index 67b995c79..15101fdb3 100755 --- a/templates/grch38_mip_rd_dna_config.yaml +++ b/templates/grch38_mip_rd_dna_config.yaml @@ -89,7 +89,7 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch38_clinvar_reformated_-20210828-.vcf.gz + path: cluster_constant_path!/references/grch38_clinvar_reformated_-20210919-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins diff --git a/templates/mip_download_rd_dna_config_-1.0-.yaml b/templates/mip_download_rd_dna_config_-1.0-.yaml index 5b3ffbcbd..17b308fa6 100644 --- a/templates/mip_download_rd_dna_config_-1.0-.yaml +++ b/templates/mip_download_rd_dna_config_-1.0-.yaml @@ -27,7 +27,7 @@ reference: - v1.0 clinvar: - 20210415 - - 20210828 + - 20210919 dbnsfp: - 3.5a - 4.0b2a @@ -323,13 +323,13 @@ reference_feature: outfile_index: grch37_clinvar_-20210415-.vcf.gz.tbi outfile_check_method: md5sum url_prefix: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/weekly/ - 20210828: - file: clinvar_20210828.vcf.gz - file_check: clinvar_20210828.vcf.gz.md5 - file_index: clinvar_20210828.vcf.gz.tbi - outfile: grch37_clinvar_-20210828-.vcf.gz - outfile_check: grch37_clinvar_-20210828-.vcf.gz.md5 - outfile_index: grch37_clinvar_-20210828-.vcf.gz.tbi + 20210919: + file: clinvar_20210919.vcf.gz + file_check: clinvar_20210919.vcf.gz.md5 + file_index: clinvar_20210919.vcf.gz.tbi + outfile: grch37_clinvar_-20210919-.vcf.gz + outfile_check: grch37_clinvar_-20210919-.vcf.gz.md5 + outfile_index: grch37_clinvar_-20210919-.vcf.gz.tbi outfile_check_method: md5sum url_prefix: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/weekly grch38: diff --git a/templates/mip_dragen_rd_dna_config.yaml b/templates/mip_dragen_rd_dna_config.yaml index 4d9e2b28c..2f8555f83 100644 --- a/templates/mip_dragen_rd_dna_config.yaml +++ b/templates/mip_dragen_rd_dna_config.yaml @@ -68,7 +68,7 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210828-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210919-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins diff --git a/templates/mip_rd_dna_config.yaml b/templates/mip_rd_dna_config.yaml index d8974be5f..7e67f3792 100755 --- a/templates/mip_rd_dna_config.yaml +++ b/templates/mip_rd_dna_config.yaml @@ -31,7 +31,7 @@ sv_rank_model_file: svrank_model_-v1.8-.ini sv_svdb_query_db_files: # FORMAT: filename|OUT_FREQUENCY_INFO_KEY|OUT_ALLELE_COUNT_INFO_KEY|IN_FREQUENCY_INFO_KEY|IN_ALLELE_COUNT_INFO_KEY|USE_IN_FREQUENCY_FILTER grch37_gnomad_reformated_-r2.1.1_sv-.vcf.gz: gnomad_sv|AF|AC|AF|AC|1 - grch37_loqusdb_sv_variants_export-20210907-.vcf: clinical_genomics_loqus|Frq|Obs|Frq|Obs + grch37_loqusdb_sv_variants_export-20210921-.vcf: clinical_genomics_loqus|Frq|Obs|Frq|Obs|1 grch37_mip_sv_svdb_export_-2018-10-09-.vcf: clinical_genomics_mip|AF|OCC|FRQ|OCC|1 grch37_svdb_query_decipher_-v1.0.0-.vcf: decipher|AF|OCC|FRQ|OCC grch37_svdb_query_clingen_cgh_benign_-v1.0.0-.vcf: clingen_cgh_benign @@ -70,7 +70,7 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210828-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210919-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins diff --git a/templates/mip_rd_dna_panel_config.yaml b/templates/mip_rd_dna_panel_config.yaml index 674d0ac94..0dc5cc1ab 100755 --- a/templates/mip_rd_dna_panel_config.yaml +++ b/templates/mip_rd_dna_panel_config.yaml @@ -49,7 +49,7 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210828-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210919-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins diff --git a/templates/mip_rd_dna_vcf_rerun_config.yaml b/templates/mip_rd_dna_vcf_rerun_config.yaml index 6c1e315c9..397ffa317 100644 --- a/templates/mip_rd_dna_vcf_rerun_config.yaml +++ b/templates/mip_rd_dna_vcf_rerun_config.yaml @@ -28,7 +28,7 @@ sv_rank_model_file: svrank_model_-v1.8-.ini # FORMAT: filename|OUT_FREQUENCY_INFO_KEY|OUT_ALLELE_COUNT_INFO_KEY|IN_FREQUENCY_INFO_KEY|IN_ALLELE_COUNT_INFO_KEY|USE_IN_FREQUENCY_FILTER sv_svdb_query_db_files: grch37_gnomad_reformated_-r2.1.1_sv-.vcf.gz: gnomad_sv|AF|AC|AF|AC|1 - grch37_loqusdb_sv_variants_export-20210907-.vcf: clinical_genomics_loqus|Frq|Obs|Frq|Obs + grch37_loqusdb_sv_variants_export-20210921-.vcf: clinical_genomics_loqus|Frq|Obs|Frq|Obs grch37_mip_sv_svdb_export_-2018-10-09-.vcf: clinical_genomics_mip|AF|OCC|FRQ|OCC|1 grch37_svdb_query_decipher_-v1.0.0-.vcf: decipher|AF|OCC|FRQ|OCC grch37_svdb_query_clingen_cgh_benign_-v1.0.0-.vcf: clingen_cgh_benign @@ -65,7 +65,7 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210828-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210919-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins diff --git a/templates/mip_rd_rna_config.yaml b/templates/mip_rd_rna_config.yaml index 52172efa5..65636965b 100644 --- a/templates/mip_rd_rna_config.yaml +++ b/templates/mip_rd_rna_config.yaml @@ -33,4 +33,4 @@ fusion_cytoband_path: /arriba_v2.1.0/database/cytobands_hg19_hs37d5_GRCh37_v2.1. fusion_protein_domain_path: /arriba_v2.1.0/database/protein_domains_hg19_hs37d5_GRCh37_v2.1.0.gff3 picardtools_path: /usr/picard qccollect_sampleinfo_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_qc_sample_info.yaml -vep_directory_cache: cluster_constant_path!/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-104/cache/ +vep_directory_cache: cluster_constant_path!/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-103/cache/ From 4b33cc796c5b3b4e8d8cd6ae8b388c84b395f8f6 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 22 Sep 2021 17:37:56 +0200 Subject: [PATCH 075/116] iadding missing test files --- t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz | 0 t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz.tbi | 0 t/data/references/grch37_loqusdb_sv_variants_export-20210921-.vcf | 0 t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz | 0 t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz.tbi | 0 5 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz create mode 100644 t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz.tbi create mode 100644 t/data/references/grch37_loqusdb_sv_variants_export-20210921-.vcf create mode 100644 t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz create mode 100644 t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz.tbi diff --git a/t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz b/t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz new file mode 100644 index 000000000..e69de29bb diff --git a/t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz.tbi b/t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz.tbi new file mode 100644 index 000000000..e69de29bb diff --git a/t/data/references/grch37_loqusdb_sv_variants_export-20210921-.vcf b/t/data/references/grch37_loqusdb_sv_variants_export-20210921-.vcf new file mode 100644 index 000000000..e69de29bb diff --git a/t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz b/t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz new file mode 100644 index 000000000..e69de29bb diff --git a/t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz.tbi b/t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz.tbi new file mode 100644 index 000000000..e69de29bb From 5f96beed21084ca6fe2b2ef19fae4a5d8c0ec3d7 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 22 Sep 2021 17:53:03 +0200 Subject: [PATCH 076/116] fixing rna test --- templates/mip_rd_rna_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/mip_rd_rna_config.yaml b/templates/mip_rd_rna_config.yaml index 65636965b..52172efa5 100644 --- a/templates/mip_rd_rna_config.yaml +++ b/templates/mip_rd_rna_config.yaml @@ -33,4 +33,4 @@ fusion_cytoband_path: /arriba_v2.1.0/database/cytobands_hg19_hs37d5_GRCh37_v2.1. fusion_protein_domain_path: /arriba_v2.1.0/database/protein_domains_hg19_hs37d5_GRCh37_v2.1.0.gff3 picardtools_path: /usr/picard qccollect_sampleinfo_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_id!_qc_sample_info.yaml -vep_directory_cache: cluster_constant_path!/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-103/cache/ +vep_directory_cache: cluster_constant_path!/modules/miniconda/envs/MIP_rd_rna/ensembl-tools-104/cache/ From b2cde9b8c13e920550852ca484d42cd5660fc046 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 23 Sep 2021 10:36:41 +0200 Subject: [PATCH 077/116] adds regex for hmtnote version --- lib/MIP/Environment/Executable.pm | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/MIP/Environment/Executable.pm b/lib/MIP/Environment/Executable.pm index 18b0a3212..43efcb937 100644 --- a/lib/MIP/Environment/Executable.pm +++ b/lib/MIP/Environment/Executable.pm @@ -379,6 +379,11 @@ q?'my ($version) = /genmod\s+version:\s+(\S+)/xms; if($version) {print $version; version_regexp => q?'my ($version) = /gzip\s+(\S+)/xms; if($version) {print $version;last;}'?, }, + hmtnote => { + version_cmd => q{--version}, + version_regexp => + q?'my ($version) = /version\s(\S+)/xms; if($version) {print $version;last;}'?, + }, q{infer_exeperiment.py} => { version_cmd => q{--version}, version_regexp => From 6312906e3d95c57b243ed39ff133629f1219f3f3 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 23 Sep 2021 11:32:21 +0200 Subject: [PATCH 078/116] reverting back to gencode release 37 to match star-fusion --- t/data/references/grch38_gencode_annotation_-v37-.gtf | 0 t/data/references/grch38_gencode_transcripts_-v37-.fa | 0 templates/grch38_mip_rd_rna_config.yaml | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 t/data/references/grch38_gencode_annotation_-v37-.gtf create mode 100644 t/data/references/grch38_gencode_transcripts_-v37-.fa diff --git a/t/data/references/grch38_gencode_annotation_-v37-.gtf b/t/data/references/grch38_gencode_annotation_-v37-.gtf new file mode 100644 index 000000000..e69de29bb diff --git a/t/data/references/grch38_gencode_transcripts_-v37-.fa b/t/data/references/grch38_gencode_transcripts_-v37-.fa new file mode 100644 index 000000000..e69de29bb diff --git a/templates/grch38_mip_rd_rna_config.yaml b/templates/grch38_mip_rd_rna_config.yaml index 6a4d77c15..2ec08db2d 100644 --- a/templates/grch38_mip_rd_rna_config.yaml +++ b/templates/grch38_mip_rd_rna_config.yaml @@ -25,8 +25,8 @@ sample_info_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_i ## References human_genome_reference: grch38_homo_sapiens_-gencode_pri-.fasta star_fusion_genome_lib_dir: cluster_constant_path!/references/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play/ctat_genome_lib_build_dir -transcript_annotation: grch38_gencode_annotation_-v38-.gtf -transcript_sequence: grch38_gencode_transcripts_-v38-.fa +transcript_annotation: grch38_gencode_annotation_-v37-.gtf +transcript_sequence: grch38_gencode_transcripts_-v37-.fa gatk_haplotypecaller_snp_known_set: grch38_dbsnp_-146-.vcf.gz gatk_baserecalibration_known_sites: - grch38_1000g_indels_-phase1-.vcf.gz From 80839fc0388a05f5f50ffb468050950291bb201f Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 23 Sep 2021 16:52:56 +0200 Subject: [PATCH 079/116] fixing annotation recipe --- definitions/rd_dna_initiation_map.yaml | 2 +- .../Recipes/Analysis/Variant_annotation.pm | 208 ++++++++++++++---- t/mip_analyse_rd_dna.test | 4 +- 3 files changed, 169 insertions(+), 45 deletions(-) diff --git a/definitions/rd_dna_initiation_map.yaml b/definitions/rd_dna_initiation_map.yaml index 91e57f7a3..c31457983 100644 --- a/definitions/rd_dna_initiation_map.yaml +++ b/definitions/rd_dna_initiation_map.yaml @@ -77,13 +77,13 @@ CHAIN_ALL: - rhocall_ar - bcftools_norm - variant_annotation - - mt_annotation - CHAIN_RHOVIZ: - rhocall_viz - chromograph_rhoviz - CHAIN_UPD: - upd_ar - chromograph_upd + - mt_annotation - frequency_filter - cadd_ar - varianteffectpredictor diff --git a/lib/MIP/Recipes/Analysis/Variant_annotation.pm b/lib/MIP/Recipes/Analysis/Variant_annotation.pm index d821da3fe..823134190 100644 --- a/lib/MIP/Recipes/Analysis/Variant_annotation.pm +++ b/lib/MIP/Recipes/Analysis/Variant_annotation.pm @@ -198,6 +198,14 @@ sub analysis_variant_annotation { say {$filehandle} q{## } . $recipe_name; + my $loqusdb_header_path = _build_loqusdb_headers( + { + filehandle => $filehandle, + outfile_path_prefix => $outfile_path_prefix, + vcfanno_config_name => $active_parameter_href->{vcfanno_config}, + } + ); + ## Create file commands for xargs my ( $xargs_file_counter, $xargs_file_path_prefix ) = xargs_command( { @@ -224,16 +232,16 @@ sub analysis_variant_annotation { toml_configfile_path => $active_parameter_href->{vcfanno_config}, } ); - print {$xargsfilehandle} $PIPE . $SPACE; - bcftools_view( + ## Add loqusdb headers if defined + _compress_and_add_loqusdb_headers( { - filehandle => $xargsfilehandle, - infile_path => $DASH, - outfile_path => $outfile_path{$contig}, - output_type => q{z}, - stderrfile_path_append => $stderrfile_path, + filehandle => $xargsfilehandle, + infile_path => $DASH, + loqusdb_header_path => $loqusdb_header_path, + outfile_path => $outfile_path{$contig}, + stderrfile_path => $stderrfile_path, } ); say {$xargsfilehandle} $NEWLINE; @@ -272,7 +280,7 @@ sub analysis_variant_annotation { close $xargsfilehandle or $log->logcroak(q{Could not close xargsfilehandle}); - say {$filehandle} q{## Concatenate outfiles}; + say {$filehandle} q{## Concatenate outfiles for CHAIN_RHOVIZ and CHAIN_UPD}; my $concat_outfile_path = $outfile_path_prefix . $DOT . q{vcf.gz}; bcftools_concat( @@ -287,16 +295,6 @@ sub analysis_variant_annotation { ); say {$filehandle} $NEWLINE; - _add_loqusdb_headers( - { - filehandle => $filehandle, - infile_path => $concat_outfile_path, - outfile_path_prefix => $outfile_path_prefix, - outfile_suffix => $DOT . q{vcf.gz}, - vcfanno_config_name => $active_parameter_href->{vcfanno_config}, - } - ); - bcftools_index( { infile_path => $concat_outfile_path, @@ -304,9 +302,8 @@ sub analysis_variant_annotation { output_type => q{tbi}, } ); - say {$filehandle} $NEWLINE; - ## Close filehandleS + ## Close filehandles close $filehandle or $log->logcroak(q{Could not close filehandle}); if ( $recipe{mode} == 1 ) { @@ -314,7 +311,7 @@ sub analysis_variant_annotation { ## Collect QC metadata info for later use set_recipe_outfile_in_sample_info( { - path => $concat_outfile_path, + path => $outfile_paths[0], recipe_name => $recipe_name, sample_info_href => $sample_info_href, } @@ -623,10 +620,78 @@ sub _add_loqusdb_headers { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + use MIP::Program::Bcftools qw{ bcftools_annotate }; + use MIP::Program::Gnu::Coreutils qw{ gnu_mv}; + + my $loqusdb_header_path = _build_loqusdb_headers( + { + filehandle => $filehandle, + outfile_path_prefix => $outfile_path_prefix, + vcfanno_config_name => $vcfanno_config_name, + } + ); + + return if ( not $loqusdb_header_path ); + + my $annotate_outfile_path = $outfile_path_prefix . $UNDERSCORE . q{annotated.vcf.gz}; + bcftools_annotate( + { + filehandle => $filehandle, + headerfile_path => $loqusdb_header_path, + infile_path => $infile_path, + outfile_path => $annotate_outfile_path, + output_type => q{z}, + } + ); + say {$filehandle} $NEWLINE; + + gnu_mv( + { + filehandle => $filehandle, + infile_path => $annotate_outfile_path, + outfile_path => $outfile_path_prefix . $outfile_suffix, + } + ); + say {$filehandle} $NEWLINE; + return; +} + +sub _build_loqusdb_headers { + +## Function : Build relevant loqusDB headers for downstream processing +## Returns : $loqusdb_header_path +## Arguments: $filehandle => Filehandle to write to +## : $outfile_path_prefix => Outfile path +## : $vcfanno_config_name => Name of vcfanno config + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $filehandle; + my $outfile_path_prefix; + my $vcfanno_config_name; + + my $tmpl = { + filehandle => { store => \$filehandle, }, + outfile_path_prefix => { + defined => 1, + required => 1, + store => \$outfile_path_prefix, + strict_type => 1, + }, + vcfanno_config_name => { + defined => 1, + required => 1, + store => \$vcfanno_config_name, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + use MIP::Io::Read qw{ read_from_file }; use MIP::Language::Perl qw{ perl_nae_oneliners }; - use MIP::Program::Bcftools qw{ bcftools_annotate bcftools_view }; - use MIP::Program::Gnu::Coreutils qw{ gnu_mv}; + use MIP::Program::Bcftools qw{ bcftools_view }; my $loqusdb_reference_file; @@ -649,7 +714,7 @@ sub _add_loqusdb_headers { ## Nothing to process - skip return if ( not $loqusdb_reference_file ); - say {$filehandle} q{## Add loqusdb headers to output}; + say {$filehandle} q{## Build loqusdb headers}; bcftools_view( { @@ -671,26 +736,83 @@ sub _add_loqusdb_headers { ); say {$filehandle} $NEWLINE; - my $annotate_outfile_path = $outfile_path_prefix . $UNDERSCORE . q{annotated.vcf.gz}; - bcftools_annotate( - { - filehandle => $filehandle, - headerfile_path => $loqusdb_header_path, - infile_path => $infile_path, - outfile_path => $annotate_outfile_path, - output_type => q{z}, - } - ); - say {$filehandle} $NEWLINE; + return $loqusdb_header_path; +} - gnu_mv( - { - filehandle => $filehandle, - infile_path => $annotate_outfile_path, - outfile_path => $outfile_path_prefix . $outfile_suffix, - } - ); - say {$filehandle} $NEWLINE; +sub _compress_and_add_loqusdb_headers { + +## Function : Compress and relevant loqusDB headers for downstream processing +## Returns : +## Arguments: $filehandle => Filehandle to write to +## : $infile_path => Infile path to read from +## : $loqusdb_header_path => Path to loqusdb header file +## : $outfile_path => Outfile path +## : $stderrfile_path => Name of vcfanno config + + my ($arg_href) = @_; + + ## Flatten argument(s) + my $filehandle; + my $infile_path; + my $loqusdb_header_path; + my $outfile_path; + my $stderrfile_path; + + my $tmpl = { + filehandle => { store => \$filehandle, }, + infile_path => { + defined => 1, + required => 1, + store => \$infile_path, + strict_type => 1, + }, + loqusdb_header_path => { + store => \$loqusdb_header_path, + strict_type => 1, + }, + outfile_path => { + defined => 1, + required => 1, + store => \$outfile_path, + strict_type => 1, + }, + stderrfile_path => { + defined => 1, + required => 1, + store => \$stderrfile_path, + strict_type => 1, + }, + }; + + check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; + + use MIP::Program::Bcftools qw{ bcftools_annotate bcftools_view }; + + if ( not $loqusdb_header_path ) { + + bcftools_view( + { + filehandle => $filehandle, + infile_path => $infile_path, + outfile_path => $outfile_path, + output_type => q{z}, + stderrfile_path_append => $stderrfile_path, + } + ); + } + else { + + bcftools_annotate( + { + filehandle => $filehandle, + headerfile_path => $loqusdb_header_path, + infile_path => $infile_path, + outfile_path => $outfile_path, + output_type => q{z}, + stderrfile_path_append => $stderrfile_path, + } + ); + } return; } diff --git a/t/mip_analyse_rd_dna.test b/t/mip_analyse_rd_dna.test index d18078a6b..2b15695ec 100644 --- a/t/mip_analyse_rd_dna.test +++ b/t/mip_analyse_rd_dna.test @@ -90,7 +90,9 @@ GetOptions( ok( can_run(q{mip}), q{Checking can run mip} ); Readonly my $DRY_RUN_MODE => 2; -my @genome_builds = qw{ grch37 grch38 }; + +#my @genome_builds = qw{ grch37 grch38 }; +my @genome_builds = qw{ grch37 }; ## Update path in toml config GENOME_BUILD: From fcb68b820e11717a0b75a5e0ea89a49c2ea690ba Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 23 Sep 2021 17:51:50 +0200 Subject: [PATCH 080/116] fixing mt_annotation recipe --- lib/MIP/Recipes/Analysis/Mt_annotation.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MIP/Recipes/Analysis/Mt_annotation.pm b/lib/MIP/Recipes/Analysis/Mt_annotation.pm index b3543478d..6951c0386 100644 --- a/lib/MIP/Recipes/Analysis/Mt_annotation.pm +++ b/lib/MIP/Recipes/Analysis/Mt_annotation.pm @@ -220,6 +220,7 @@ sub analysis_mt_annotation { htslib_bgzip( { filehandle => $filehandle, + force => 1, infile_path => $outfile_no_suffix, } ); From 72abee2ce2335c2ce96e741280385d4566d6d04f Mon Sep 17 00:00:00 2001 From: jemten Date: Fri, 24 Sep 2021 16:06:38 +0200 Subject: [PATCH 081/116] updated rankmodel version --- t/data/references/rank_model_-v1.32-.ini | 0 templates/mip_dragen_rd_dna_config.yaml | 2 +- templates/mip_rd_dna_config.yaml | 2 +- templates/mip_rd_dna_panel_config.yaml | 2 +- templates/mip_rd_dna_vcf_rerun_config.yaml | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) delete mode 100644 t/data/references/rank_model_-v1.32-.ini diff --git a/t/data/references/rank_model_-v1.32-.ini b/t/data/references/rank_model_-v1.32-.ini deleted file mode 100644 index e69de29bb..000000000 diff --git a/templates/mip_dragen_rd_dna_config.yaml b/templates/mip_dragen_rd_dna_config.yaml index 2f8555f83..4a588e3a9 100644 --- a/templates/mip_dragen_rd_dna_config.yaml +++ b/templates/mip_dragen_rd_dna_config.yaml @@ -22,7 +22,7 @@ sample_info_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_i ## References genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv human_genome_reference: grch37_homo_sapiens_-d5-.fasta -rank_model_file: rank_model_-v1.32-.ini +rank_model_file: rank_model_-v1.33-.ini sv_genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv sv_rank_model_file: svrank_model_-v1.8-.ini sv_svdb_query_db_files: diff --git a/templates/mip_rd_dna_config.yaml b/templates/mip_rd_dna_config.yaml index 7e67f3792..5df7bbde4 100755 --- a/templates/mip_rd_dna_config.yaml +++ b/templates/mip_rd_dna_config.yaml @@ -23,7 +23,7 @@ sample_info_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_i gatk_genotypegvcfs_ref_gvcf: grch37_gatk_merged_reference_samples.txt genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv human_genome_reference: grch37_homo_sapiens_-d5-.fasta -rank_model_file: rank_model_-v1.32-.ini +rank_model_file: rank_model_-v1.33-.ini sambamba_depth_bed: grch37_scout_exons_-2017-01-.bed sv_vcfanno_config: grch37_sv_vcfanno_config_-v1.4-.toml sv_genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv diff --git a/templates/mip_rd_dna_panel_config.yaml b/templates/mip_rd_dna_panel_config.yaml index 0dc5cc1ab..038a1261d 100755 --- a/templates/mip_rd_dna_panel_config.yaml +++ b/templates/mip_rd_dna_panel_config.yaml @@ -22,7 +22,7 @@ sample_info_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_i gatk_genotypegvcfs_ref_gvcf: grch37_gatk_merged_reference_samples.txt genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv human_genome_reference: grch37_homo_sapiens_-d5-.fasta -rank_model_file: rank_model_-v1.32-.ini +rank_model_file: rank_model_-v1.33-.ini sambamba_depth_bed: grch37_scout_exons_-2017-01-.bed vcfanno_config: grch37_vcfanno_config_v1.16-.toml ### Analysis diff --git a/templates/mip_rd_dna_vcf_rerun_config.yaml b/templates/mip_rd_dna_vcf_rerun_config.yaml index 397ffa317..d89582e64 100644 --- a/templates/mip_rd_dna_vcf_rerun_config.yaml +++ b/templates/mip_rd_dna_vcf_rerun_config.yaml @@ -22,7 +22,7 @@ sample_info_file: cluster_constant_path!/case_id!/analysis_constant_path!/case_i ## References genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv human_genome_reference: grch37_homo_sapiens_-d5-.fasta -rank_model_file: rank_model_-v1.32-.ini +rank_model_file: rank_model_-v1.33-.ini sv_genmod_models_reduced_penetrance_file: grch37_cust003-cmms-red-pen_-2017-.tsv sv_rank_model_file: svrank_model_-v1.8-.ini # FORMAT: filename|OUT_FREQUENCY_INFO_KEY|OUT_ALLELE_COUNT_INFO_KEY|IN_FREQUENCY_INFO_KEY|IN_ALLELE_COUNT_INFO_KEY|USE_IN_FREQUENCY_FILTER From c3cf372a2206940606d3325a93d09f81555fe34a Mon Sep 17 00:00:00 2001 From: jemten Date: Sun, 26 Sep 2021 09:48:15 +0200 Subject: [PATCH 082/116] adds missing test file --- t/data/references/rank_model_-v1.33-.ini | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 t/data/references/rank_model_-v1.33-.ini diff --git a/t/data/references/rank_model_-v1.33-.ini b/t/data/references/rank_model_-v1.33-.ini new file mode 100644 index 000000000..e69de29bb From 9f2ac934ca2b15b344f536e4f64935b562b1e68a Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 27 Sep 2021 18:20:19 +0200 Subject: [PATCH 083/116] adds sed command to remove whitespace after annotation with hmtnote --- lib/MIP/Program/Gnu/Software/Gnu_sed.pm | 49 ++++++++++----------- lib/MIP/Recipes/Analysis/Mt_annotation.pm | 53 +++++++++++++++++++---- t/gnu_sed.t | 12 +++-- 3 files changed, 76 insertions(+), 38 deletions(-) diff --git a/lib/MIP/Program/Gnu/Software/Gnu_sed.pm b/lib/MIP/Program/Gnu/Software/Gnu_sed.pm index 234f1ce0f..91df31700 100644 --- a/lib/MIP/Program/Gnu/Software/Gnu_sed.pm +++ b/lib/MIP/Program/Gnu/Software/Gnu_sed.pm @@ -1,5 +1,6 @@ package MIP::Program::Gnu::Software::Gnu_sed; +use 5.026; use Carp; use charnames qw{ :full :short }; use English qw{ -no_match_vars }; @@ -10,7 +11,6 @@ use warnings; use warnings qw{ FATAL utf8 }; ## CPANM -use autodie qw{ :all }; use Readonly; ## MIPs lib/ @@ -34,13 +34,14 @@ sub gnu_sed { ##Function : Perl wrapper for writing sed recipe to already open $filehandle or return commands array. Based on sed 4.2.1. ##Returns : "@commands" -##Arguments: $filehandle => Filehandle to write to -## : $infile_path => Infile path -## : $inplace_edit => Edit file in place -## : $outfile_path => Outfile path +##Arguments: $filehandle => Filehandle to write to +## : $infile_path => Infile path +## : $inplace_edit => Edit file in place +## : $script => Script to edit infile stream +## : $stderrfile_path => Stderrfile path ## : $stderrfile_path_append => Append stderr info to file -## : $stderrfile_path => Stderrfile path -## : $script => Script to edit infile stream +## : $stdoutfile_path => Stdoutfile path +## : $stdoutfile_path_append => Append stdout info to file path my ($arg_href) = @_; @@ -48,10 +49,11 @@ sub gnu_sed { my $filehandle; my $infile_path; my $inplace_edit; - my $outfile_path; - my $stderrfile_path; my $script; + my $stderrfile_path; my $stderrfile_path_append; + my $stdoutfile_path; + my $stdoutfile_path_append; my $tmpl = { filehandle => { @@ -67,22 +69,26 @@ sub gnu_sed { store => \$inplace_edit, strict_type => 1, }, - outfile_path => { + script => { strict_type => 1, - store => \$outfile_path + store => \$script }, stderrfile_path => { strict_type => 1, store => \$stderrfile_path }, - script => { - strict_type => 1, - store => \$script - }, stderrfile_path_append => { strict_type => 1, store => \$stderrfile_path_append }, + stdoutfile_path => { + strict_type => 1, + store => \$stdoutfile_path + }, + stdoutfile_path_append => { + strict_type => 1, + store => \$stdoutfile_path_append + }, }; check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; @@ -90,37 +96,28 @@ sub gnu_sed { ##Stores commands depending on input parameters my @commands = q{sed}; - ## Options if ($inplace_edit) { push @commands, q{-i}; } - if ($script) { push @commands, $script; } - - ## Infile if ($infile_path) { push @commands, $infile_path; } - ## Outfile - if ($outfile_path) { - - push @commands, q{>} . $SPACE . $outfile_path; - } - push @commands, unix_standard_streams( { stderrfile_path => $stderrfile_path, stderrfile_path_append => $stderrfile_path_append, + stdoutfile_path => $stdoutfile_path, + stdoutfile_path_append => $stdoutfile_path_append, } ); - unix_write_to_file( { commands_ref => \@commands, diff --git a/lib/MIP/Recipes/Analysis/Mt_annotation.pm b/lib/MIP/Recipes/Analysis/Mt_annotation.pm index b3543478d..f60337ea8 100644 --- a/lib/MIP/Recipes/Analysis/Mt_annotation.pm +++ b/lib/MIP/Recipes/Analysis/Mt_annotation.pm @@ -17,7 +17,7 @@ use autodie qw{ :all }; use Readonly; ## MIPs lib/ -use MIP::Constants qw{ $LOG_NAME $NEWLINE $UNDERSCORE }; +use MIP::Constants qw{ $LOG_NAME $NEWLINE $PIPE $SPACE $UNDERSCORE }; BEGIN { @@ -114,7 +114,9 @@ sub analysis_mt_annotation { use MIP::File_info qw{ get_io_files parse_io_outfiles }; use MIP::File::Path qw { remove_file_path_suffix }; + use MIP::Program::Bcftools qw{ bcftools_view }; use MIP::Program::Gnu::Coreutils qw { gnu_cp }; + use MIP::Program::Gnu::Software::Gnu_sed qw { gnu_sed }; use MIP::Program::HmtNote qw{ hmtnote_annotate }; use MIP::Program::Htslib qw{ htslib_bgzip htslib_tabix }; use MIP::Processmanagement::Processes qw{ submit_recipe }; @@ -167,10 +169,10 @@ sub analysis_mt_annotation { ) ); - my @outfile_paths = @{ $io{out}{file_paths} }; - my $outfile_path_prefix = $io{out}{file_path_prefix}; - my %outfile_path = %{ $io{out}{file_path_href} }; - my $outfile_suffix = $io{out}{file_suffix}; + my @outfile_paths = @{ $io{out}{file_paths} }; + my %outfile_path = %{ $io{out}{file_path_href} }; + my %outfile_name = %{ $io{out}{file_name_href} }; + my $outdir_path = $io{out}{dir_path}; ## Filehandles # Create anonymous filehandle @@ -202,25 +204,60 @@ sub analysis_mt_annotation { my $outfile_no_suffix = remove_file_path_suffix( { - file_path => $outfile_path{$contig}, + file_path => $outfile_name{$contig}, file_suffixes_ref => [qw{.gz}], } ); + my $temp_outfile_path = + catfile( $active_parameter_href->{temp_directory}, $outfile_no_suffix ); + ## Hmtnote inserts whitespace in the INFO field which violates the VCF v4.2 specification + say {$filehandle} q{## Annotate MT variants}; hmtnote_annotate( { filehandle => $filehandle, infile_path => $infile_path{$contig}, offline => $active_parameter_href->{mt_offline}, - outfile_path => $outfile_no_suffix, + outfile_path => $temp_outfile_path, } ); print {$filehandle} $NEWLINE; + say {$filehandle} q{## Remove whitespace from vcf}; + my $outfile_path_no_suffix = catfile( $outdir_path, $outfile_no_suffix ); + bcftools_view( + { + filehandle => $filehandle, + header_only => 1, + infile_path => $temp_outfile_path, + outfile_path => $outfile_path_no_suffix, + } + ); + print {$filehandle} $NEWLINE; + + bcftools_view( + { + filehandle => $filehandle, + infile_path => $temp_outfile_path, + no_header => 1, + } + ); + print {$filehandle} $PIPE . $SPACE; + + gnu_sed( + { + filehandle => $filehandle, + script => q{'s/ /_/g'}, + stdoutfile_path_append => $outfile_path_no_suffix, + } + ); + print {$filehandle} $NEWLINE; + + say {$filehandle} q{## Compress and index}; htslib_bgzip( { filehandle => $filehandle, - infile_path => $outfile_no_suffix, + infile_path => $outfile_path_no_suffix, } ); print {$filehandle} $NEWLINE; diff --git a/t/gnu_sed.t b/t/gnu_sed.t index 97a626b85..ab3988b3a 100644 --- a/t/gnu_sed.t +++ b/t/gnu_sed.t @@ -110,6 +110,14 @@ my %base_argument = ( input => q{stderrfile.test}, expected_output => q{2>> stderrfile.test}, }, + stdoutfile_path => { + input => q{stdoutfile.test}, + expected_output => q{1> stdoutfile.test}, + }, + stdoutfile_path_append => { + input => q{stdoutfile.test}, + expected_output => q{1>> stdoutfile.test}, + }, ); ## Can be duplicated with %base and/or %specific to enable testing of each individual argument @@ -130,10 +138,6 @@ my %specific_argument = ( input => 1, expected_output => q{-i}, }, - outfile_path => { - input => q{outfile.test}, - expected_output => q{> outfile.test}, - }, script => { input => q{script.test}, expected_output => q{script.test}, From da5777e8a4bc602572cbe55bd30ebbc5f4663dfa Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 27 Sep 2021 18:27:21 +0200 Subject: [PATCH 084/116] uncommented a line in tests --- t/mip_analyse_rd_dna.test | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/t/mip_analyse_rd_dna.test b/t/mip_analyse_rd_dna.test index 2b15695ec..0835b46a1 100644 --- a/t/mip_analyse_rd_dna.test +++ b/t/mip_analyse_rd_dna.test @@ -91,8 +91,7 @@ ok( can_run(q{mip}), q{Checking can run mip} ); Readonly my $DRY_RUN_MODE => 2; -#my @genome_builds = qw{ grch37 grch38 }; -my @genome_builds = qw{ grch37 }; +my @genome_builds = qw{ grch37 grch38 }; ## Update path in toml config GENOME_BUILD: From d556690417c9ed484d342dab078aeb9d6d7cf072 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 28 Sep 2021 12:07:51 +0200 Subject: [PATCH 085/116] updating path to file --- lib/MIP/Recipes/Analysis/Mt_annotation.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MIP/Recipes/Analysis/Mt_annotation.pm b/lib/MIP/Recipes/Analysis/Mt_annotation.pm index 58fcf9277..f03a3fc4f 100644 --- a/lib/MIP/Recipes/Analysis/Mt_annotation.pm +++ b/lib/MIP/Recipes/Analysis/Mt_annotation.pm @@ -258,7 +258,7 @@ sub analysis_mt_annotation { { filehandle => $filehandle, force => 1, - infile_path => $outfile_no_suffix, + infile_path => $outfile_path_no_suffix, } ); print {$filehandle} $NEWLINE; From 13c0fb6b83cb9d9602c41f4ecaccefadf19cf40e Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 28 Sep 2021 17:42:27 +0200 Subject: [PATCH 086/116] change to bcftools concat --- .../Analysis/Endvariantannotationblock.pm | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/lib/MIP/Recipes/Analysis/Endvariantannotationblock.pm b/lib/MIP/Recipes/Analysis/Endvariantannotationblock.pm index d4a9ce0dc..184858513 100644 --- a/lib/MIP/Recipes/Analysis/Endvariantannotationblock.pm +++ b/lib/MIP/Recipes/Analysis/Endvariantannotationblock.pm @@ -131,10 +131,10 @@ sub analysis_endvariantannotationblock { use MIP::Analysis qw{ get_vcf_parser_analysis_suffix }; use MIP::File_info qw{ get_io_files parse_io_outfiles }; - use MIP::Program::Gnu::Software::Gnu_grep qw{ gnu_grep }; use MIP::Processmanagement::Processes qw{ submit_recipe }; + use MIP::Program::Bcftools qw{ bcftools_concat }; + use MIP::Program::Gnu::Software::Gnu_grep qw{ gnu_grep }; use MIP::Program::Htslib qw{ htslib_bgzip htslib_tabix }; - use MIP::Program::Gatk qw{ gatk_concatenate_variants }; use MIP::Recipe qw{ parse_recipe_prerequisites }; use MIP::Sample_info qw{ set_file_path_to_store set_recipe_metafile_in_sample_info }; @@ -235,18 +235,17 @@ sub analysis_endvariantannotationblock { $metafile_tag = q{clinical}; } - ## Writes sbatch code to supplied filehandle to concatenate variants in vcf format. Each array element is combined with the infile prefix and postfix. - gatk_concatenate_variants( + my @infile_paths = + map { $infile_path_prefix . $DOT . $_ . $infile_postfix } @concat_contigs; + bcftools_concat( { - active_parameter_href => $active_parameter_href, - elements_ref => \@concat_contigs, - filehandle => $filehandle, - infile_prefix => $infile_path_prefix, - infile_postfix => $infile_postfix, - outfile_path_prefix => $outfile_path_prefix, - outfile_suffix => $analysis_suffix, + filehandle => $filehandle, + infile_paths_ref => \@infile_paths, + outfile_path => $outfile_path_prefix . $analysis_suffix, + rm_dups => 0, } ); + say {$filehandle} $NEWLINE; ## Remove variants in hgnc_id list from vcf if ( $active_parameter_href->{endvariantannotationblock_remove_genes_file} ) { From 19ffc81bccd24dd875fbad18c0304bdadbb1e46e Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 6 Oct 2021 18:06:30 +0200 Subject: [PATCH 087/116] adds more metrics to qc deliverables --- lib/MIP/Store.pm | 38 +++++++++++++++++++++++++++++++++++++- t/store_metrics.t | 2 +- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/lib/MIP/Store.pm b/lib/MIP/Store.pm index 31a9f0db1..5fcf8df38 100644 --- a/lib/MIP/Store.pm +++ b/lib/MIP/Store.pm @@ -103,12 +103,20 @@ sub define_qc_metrics_to_store { my %store_metrics = ( AT_DROPOUT => { analysis_mode => q{sample}, - recipe_name => q{picardtools_collectmultiplemetrics}, + recipe_name => q{picardtools_collecthsmetrics}, + }, + FOLD_80_BASE_PENALTY => { + analysis_mode => q{sample}, + recipe_name => q{picardtools_collecthsmetrics}, }, fraction_duplicates => { analysis_mode => q{sample}, recipe_name => q{markduplicates}, }, + GC_DROPOUT => { + analysis_mode => q{sample}, + recipe_name => q{picardtools_collecthsmetrics}, + }, gender => { analysis_mode => q{sample}, recipe_name => q{chanjo_sexcheck}, @@ -117,6 +125,10 @@ sub define_qc_metrics_to_store { analysis_mode => q{sample}, recipe_name => q{collectmultiplemetricsinsertsize}, }, + MEAN_TARGET_COVERAGE => { + analysis_mode => q{sample}, + recipe_name => q{collecthsmetrics} + }, MEDIAN_TARGET_COVERAGE => { analysis_mode => q{sample}, recipe_name => q{collecthsmetrics} @@ -133,6 +145,30 @@ sub define_qc_metrics_to_store { analysis_mode => q{sample}, recipe_name => q{collectrnaseqmetrics}, }, + PCT_OFF_BAIT => { + analysis_mode => q{sample}, + recipe_name => q{picardtools_collecthsmetrics}, + }, + PCT_TARGET_BASES_10X => { + analysis_mode => q{sample}, + recipe_name => q{picardtools_collecthsmetrics}, + }, + PCT_TARGET_BASES_20X => { + analysis_mode => q{sample}, + recipe_name => q{picardtools_collecthsmetrics}, + }, + PCT_TARGET_BASES_30x => { + analysis_mode => q{sample}, + recipe_name => q{picardtools_collecthsmetrics}, + }, + PCT_TARGET_BASES_50x => { + analysis_mode => q{sample}, + recipe_name => q{picardtools_collecthsmetrics}, + }, + PCT_TARGET_BASES_100x => { + analysis_mode => q{sample}, + recipe_name => q{picardtools_collecthsmetrics}, + }, percentage_mapped_reads => { analysis_mode => q{sample}, recipe_name => q{bamstats}, diff --git a/t/store_metrics.t b/t/store_metrics.t index 32bb7abc9..c9bdb53ee 100644 --- a/t/store_metrics.t +++ b/t/store_metrics.t @@ -64,7 +64,7 @@ my $metric_name = q{AT_DROPOUT}; my $metric_value = 1; ## Given a recipe -my $recipe_name = q{picardtools_collectmultiplemetrics}; +my $recipe_name = q{picardtools_collecthsmetrics}; ## Given two sample_ids my $sample_id = q{ADM1059A1}; From 97c3c761ac4a6012c3cf262ba5047ade78085fe9 Mon Sep 17 00:00:00 2001 From: jemten Date: Fri, 8 Oct 2021 11:17:18 +0200 Subject: [PATCH 088/116] fixinng recipe name --- lib/MIP/Store.pm | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/MIP/Store.pm b/lib/MIP/Store.pm index 5fcf8df38..4573eae40 100644 --- a/lib/MIP/Store.pm +++ b/lib/MIP/Store.pm @@ -103,11 +103,11 @@ sub define_qc_metrics_to_store { my %store_metrics = ( AT_DROPOUT => { analysis_mode => q{sample}, - recipe_name => q{picardtools_collecthsmetrics}, + recipe_name => q{collecthsmetrics}, }, FOLD_80_BASE_PENALTY => { analysis_mode => q{sample}, - recipe_name => q{picardtools_collecthsmetrics}, + recipe_name => q{collecthsmetrics}, }, fraction_duplicates => { analysis_mode => q{sample}, @@ -115,7 +115,7 @@ sub define_qc_metrics_to_store { }, GC_DROPOUT => { analysis_mode => q{sample}, - recipe_name => q{picardtools_collecthsmetrics}, + recipe_name => q{collecthsmetrics}, }, gender => { analysis_mode => q{sample}, @@ -147,27 +147,27 @@ sub define_qc_metrics_to_store { }, PCT_OFF_BAIT => { analysis_mode => q{sample}, - recipe_name => q{picardtools_collecthsmetrics}, + recipe_name => q{collecthsmetrics}, }, PCT_TARGET_BASES_10X => { analysis_mode => q{sample}, - recipe_name => q{picardtools_collecthsmetrics}, + recipe_name => q{collecthsmetrics}, }, PCT_TARGET_BASES_20X => { analysis_mode => q{sample}, - recipe_name => q{picardtools_collecthsmetrics}, + recipe_name => q{collecthsmetrics}, }, PCT_TARGET_BASES_30x => { analysis_mode => q{sample}, - recipe_name => q{picardtools_collecthsmetrics}, + recipe_name => q{collecthsmetrics}, }, PCT_TARGET_BASES_50x => { analysis_mode => q{sample}, - recipe_name => q{picardtools_collecthsmetrics}, + recipe_name => q{collecthsmetrics}, }, PCT_TARGET_BASES_100x => { analysis_mode => q{sample}, - recipe_name => q{picardtools_collecthsmetrics}, + recipe_name => q{collecthsmetrics}, }, percentage_mapped_reads => { analysis_mode => q{sample}, From d9bf58880976d70dd169be4d9daa2729d956db4c Mon Sep 17 00:00:00 2001 From: jemten Date: Fri, 8 Oct 2021 11:25:36 +0200 Subject: [PATCH 089/116] fixing test --- t/store_metrics.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/store_metrics.t b/t/store_metrics.t index c9bdb53ee..06d0821ab 100644 --- a/t/store_metrics.t +++ b/t/store_metrics.t @@ -64,7 +64,7 @@ my $metric_name = q{AT_DROPOUT}; my $metric_value = 1; ## Given a recipe -my $recipe_name = q{picardtools_collecthsmetrics}; +my $recipe_name = q{collecthsmetrics}; ## Given two sample_ids my $sample_id = q{ADM1059A1}; From 52b10c7c3a78b85eff31a6cf8a5a768862e6ae14 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 12 Oct 2021 14:21:00 +0200 Subject: [PATCH 090/116] increases AF cutoff to 0.7 --- CHANGELOG.md | 1 + definitions/dragen_rd_dna_parameters.yaml | 2 +- definitions/rd_dna_panel_parameters.yaml | 2 +- definitions/rd_dna_parameters.yaml | 2 +- definitions/rd_dna_vcf_rerun_parameters.yaml | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4576cef62..6375e3103 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Mitochondrial deletion analysis - GATK Haplotypecaller has been turned off in favour of Deepvariant - Introduces possibility to store singularity images locally as a .sif file +- Increased allele frequency cut off for when a variant is filtered out to 0.7 ### Tools diff --git a/definitions/dragen_rd_dna_parameters.yaml b/definitions/dragen_rd_dna_parameters.yaml index 0d868682d..13becdf18 100755 --- a/definitions/dragen_rd_dna_parameters.yaml +++ b/definitions/dragen_rd_dna_parameters.yaml @@ -530,7 +530,7 @@ fqf_bcftools_filter_threshold: associated_recipe: - frequency_filter data_type: SCALAR - default: 0.40 + default: 0.70 type: recipe_argument cadd_ar: analysis_mode: case diff --git a/definitions/rd_dna_panel_parameters.yaml b/definitions/rd_dna_panel_parameters.yaml index b662e9c92..ef51f21b6 100755 --- a/definitions/rd_dna_panel_parameters.yaml +++ b/definitions/rd_dna_panel_parameters.yaml @@ -831,7 +831,7 @@ fqf_bcftools_filter_threshold: associated_recipe: - frequency_filter data_type: SCALAR - default: 0.40 + default: 0.70 type: recipe_argument cadd_ar: analysis_mode: case diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index be9e51bc9..3e8c11c7c 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -1670,7 +1670,7 @@ fqf_bcftools_filter_threshold: associated_recipe: - frequency_filter data_type: SCALAR - default: 0.40 + default: 0.70 type: recipe_argument cadd_ar: analysis_mode: case diff --git a/definitions/rd_dna_vcf_rerun_parameters.yaml b/definitions/rd_dna_vcf_rerun_parameters.yaml index 39194033d..fd06611b1 100755 --- a/definitions/rd_dna_vcf_rerun_parameters.yaml +++ b/definitions/rd_dna_vcf_rerun_parameters.yaml @@ -526,7 +526,7 @@ fqf_bcftools_filter_threshold: associated_recipe: - frequency_filter data_type: SCALAR - default: 0.40 + default: 0.70 type: recipe_argument cadd_ar: analysis_mode: case From e7e0602220f0d217dcb12707871800fcba2913cf Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 13 Oct 2021 13:09:42 +0200 Subject: [PATCH 091/116] Updates README --- README.md | 10 ++++++++++ documentation/README.md | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/README.md b/README.md index 9f83c76ba..3cec5fc10 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,13 @@ MIP enables identification of potential disease causing variants from sequencing ## Citing MIP +``` +Integration of whole genome sequencing into a healthcare setting: high diagnostic rates across multiple clinical entities in 3219 rare disease patients +Stranneheim H, Lagerstedt-Robinson K, Magnusson M, Kvarnung M, Nilsson D, Lesko N, Engvall M, Anderlid BM, Arnell H, Johansson CB, Barbaro M, Björck E, Bruhn H, Eisfeldt J, Freyer C, Grigelioniene G, Gustavsson P, Hammarsjö A, Hellström-Pigg M, Iwarsson E, Jemt A, Laaksonen M, Enoksson SL, Malmgren H, Naess K, Nordenskjöld M, Oscarson M, Pettersson M, Rasi C, Rosenbaum A, Sahlin E, Sardh E, Stödberg T, Tesi B, Tham E, Thonberg H, Töhönen V, von Döbeln U, Vassiliou D, Vonlanthen S, Wikström AC, Wincent J, Winqvist O, Wredenberg A, Ygberg S, Zetterström RH, Marits P, Soller MJ, Nordgren A, Wirta V, Lindstrand A, Wedell A. +Genome Med. 2021 Mar 17;13(1):40. doi: 10.1186/s13073-021-00855-5. +PMID: 33726816; PMCID: PMC7968334. +``` + ``` Rapid pulsed whole genome sequencing for comprehensive acute diagnostics of inborn errors of metabolism Stranneheim H, Engvall M, Naess K, Lesko N, Larsson P, Dahlberg M, Andeer R, Wredenberg A, Freyer C, Barbaro M, Bruhn H, Emahazion T, Magnusson M, Wibom R, Zetterström RH, Wirta V, von Döbeln U, Wedell A. @@ -22,6 +29,9 @@ PMID:25495354 ## Overview +**N.B. MIP is being rewritten in NextFlow as a part of the [nf-core](https://nf-co.re/) project. This repo will mainly receive bugfixes as we are focusing our resources on the new pipeline.** +**You can follow the progress here :point_right: [raredisease](https://github.com/nf-core/raredisease).** + MIP performs whole genome or target region analysis of sequenced single-end and/or paired-end reads from the Illumina platform in fastq\(.gz\) format to generate annotated ranked potential disease causing variants. MIP performs QC, alignment, coverage analysis, variant discovery and annotation, sample checks as well as ranking the found variants according to disease potential with a minimum of manual intervention. MIP is compatible with [Scout](https://github.com/Clinical-Genomics/scout) for visualization of identified variants. diff --git a/documentation/README.md b/documentation/README.md index 9f83c76ba..3cec5fc10 100644 --- a/documentation/README.md +++ b/documentation/README.md @@ -13,6 +13,13 @@ MIP enables identification of potential disease causing variants from sequencing ## Citing MIP +``` +Integration of whole genome sequencing into a healthcare setting: high diagnostic rates across multiple clinical entities in 3219 rare disease patients +Stranneheim H, Lagerstedt-Robinson K, Magnusson M, Kvarnung M, Nilsson D, Lesko N, Engvall M, Anderlid BM, Arnell H, Johansson CB, Barbaro M, Björck E, Bruhn H, Eisfeldt J, Freyer C, Grigelioniene G, Gustavsson P, Hammarsjö A, Hellström-Pigg M, Iwarsson E, Jemt A, Laaksonen M, Enoksson SL, Malmgren H, Naess K, Nordenskjöld M, Oscarson M, Pettersson M, Rasi C, Rosenbaum A, Sahlin E, Sardh E, Stödberg T, Tesi B, Tham E, Thonberg H, Töhönen V, von Döbeln U, Vassiliou D, Vonlanthen S, Wikström AC, Wincent J, Winqvist O, Wredenberg A, Ygberg S, Zetterström RH, Marits P, Soller MJ, Nordgren A, Wirta V, Lindstrand A, Wedell A. +Genome Med. 2021 Mar 17;13(1):40. doi: 10.1186/s13073-021-00855-5. +PMID: 33726816; PMCID: PMC7968334. +``` + ``` Rapid pulsed whole genome sequencing for comprehensive acute diagnostics of inborn errors of metabolism Stranneheim H, Engvall M, Naess K, Lesko N, Larsson P, Dahlberg M, Andeer R, Wredenberg A, Freyer C, Barbaro M, Bruhn H, Emahazion T, Magnusson M, Wibom R, Zetterström RH, Wirta V, von Döbeln U, Wedell A. @@ -22,6 +29,9 @@ PMID:25495354 ## Overview +**N.B. MIP is being rewritten in NextFlow as a part of the [nf-core](https://nf-co.re/) project. This repo will mainly receive bugfixes as we are focusing our resources on the new pipeline.** +**You can follow the progress here :point_right: [raredisease](https://github.com/nf-core/raredisease).** + MIP performs whole genome or target region analysis of sequenced single-end and/or paired-end reads from the Illumina platform in fastq\(.gz\) format to generate annotated ranked potential disease causing variants. MIP performs QC, alignment, coverage analysis, variant discovery and annotation, sample checks as well as ranking the found variants according to disease potential with a minimum of manual intervention. MIP is compatible with [Scout](https://github.com/Clinical-Genomics/scout) for visualization of identified variants. From 396123232fde876bd8864e4079603afd2b413b12 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 13 Oct 2021 13:45:21 +0200 Subject: [PATCH 092/116] removes "N.B." --- README.md | 2 +- documentation/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3cec5fc10..5a657a654 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ PMID:25495354 ## Overview -**N.B. MIP is being rewritten in NextFlow as a part of the [nf-core](https://nf-co.re/) project. This repo will mainly receive bugfixes as we are focusing our resources on the new pipeline.** +**MIP is being rewritten in NextFlow as a part of the [nf-core](https://nf-co.re/) project. This repo will mainly receive bugfixes as we are focusing our resources on the new pipeline.** **You can follow the progress here :point_right: [raredisease](https://github.com/nf-core/raredisease).** MIP performs whole genome or target region analysis of sequenced single-end and/or paired-end reads from the Illumina platform in fastq\(.gz\) format to generate annotated ranked potential disease causing variants. diff --git a/documentation/README.md b/documentation/README.md index 3cec5fc10..5a657a654 100644 --- a/documentation/README.md +++ b/documentation/README.md @@ -29,7 +29,7 @@ PMID:25495354 ## Overview -**N.B. MIP is being rewritten in NextFlow as a part of the [nf-core](https://nf-co.re/) project. This repo will mainly receive bugfixes as we are focusing our resources on the new pipeline.** +**MIP is being rewritten in NextFlow as a part of the [nf-core](https://nf-co.re/) project. This repo will mainly receive bugfixes as we are focusing our resources on the new pipeline.** **You can follow the progress here :point_right: [raredisease](https://github.com/nf-core/raredisease).** MIP performs whole genome or target region analysis of sequenced single-end and/or paired-end reads from the Illumina platform in fastq\(.gz\) format to generate annotated ranked potential disease causing variants. From e58aa05806ae3eca53115342cefdc0ec2c1ca3d7 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 13 Oct 2021 14:21:00 +0200 Subject: [PATCH 093/116] fixing qccollect test --- lib/MIP/Main/Qccollect.pm | 2 +- t/parse_limit_qc_output.t | 87 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 t/parse_limit_qc_output.t diff --git a/lib/MIP/Main/Qccollect.pm b/lib/MIP/Main/Qccollect.pm index e5533d5cf..b46f1ef80 100644 --- a/lib/MIP/Main/Qccollect.pm +++ b/lib/MIP/Main/Qccollect.pm @@ -38,7 +38,7 @@ BEGIN { our $VERSION = q{2.1.10}; # Functions and variables which can be optionally exported - our @EXPORT_OK = qw{ mip_qccollect }; + our @EXPORT_OK = qw{ mip_qccollect parse_limit_qc_output }; } sub mip_qccollect { diff --git a/t/parse_limit_qc_output.t b/t/parse_limit_qc_output.t new file mode 100644 index 000000000..42ea783cf --- /dev/null +++ b/t/parse_limit_qc_output.t @@ -0,0 +1,87 @@ +#! /usr/bin/env perl + +use 5.026; +use Carp; +use charnames qw{ :full :short }; +use English qw{ -no_match_vars }; +use File::Basename qw{ dirname }; +use File::Spec::Functions qw{ catdir }; +use FindBin qw{ $Bin }; +use open qw{ :encoding(UTF-8) :std }; +use Params::Check qw{ allow check last_error }; +use Test::More; +use utf8; +use warnings qw{ FATAL utf8 }; + +## CPANM +use autodie qw { :all }; +use Modern::Perl qw{ 2018 }; + +## MIPs lib/ +use lib catdir( dirname($Bin), q{lib} ); +use MIP::Constants qw{ $COMMA $SPACE }; + +BEGIN { + + use MIP::Test::Fixtures qw{ test_import }; + +### Check all internal dependency modules and imports +## Modules with import + my %perl_module = ( q{MIP::Main::Qccollect} => [qw{ parse_limit_qc_output }], ); + + test_import( { perl_module_href => \%perl_module, } ); +} + +use MIP::Main::Qccollect qw{ parse_limit_qc_output }; + +diag( q{Test parse_limit_qc_output from Qccollect.pm} + . $COMMA + . $SPACE . q{Perl} + . $SPACE + . $PERL_VERSION + . $SPACE + . $EXECUTABLE_NAME ); + +## Given a qc data hash +my %qc_data = ( + sample => { + sample_id => { + recipe_output_file => { + collectmultiplemetrics => { + header => q{data}, + }, + }, + another_file => { + collecthsmetrics => { + header => q{data}, + }, + }, + }, + }, +); + +## When executing sub +parse_limit_qc_output( + { + limit_qc_output => 1, + qc_href => \%qc_data, + } +); + +## Then remove key collectmultiplemetrics +my %expected_qc_data = ( + sample => { + sample_id => { + recipe_output_file => {}, + another_file => { + collecthsmetrics => { + header => q{data}, + }, + }, + }, + }, +); + +is_deeply( \%qc_data, \%expected_qc_data, q{Delete qc data from hash} ); + +done_testing(); From 18c58a342e20870c12a1e41448c53d164e88b0a7 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 13 Oct 2021 14:28:54 +0200 Subject: [PATCH 094/116] fixing variant annotation recipe test --- t/analysis_variant_annotation.t | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/t/analysis_variant_annotation.t b/t/analysis_variant_annotation.t index ac700cf96..d8ae7bbee 100644 --- a/t/analysis_variant_annotation.t +++ b/t/analysis_variant_annotation.t @@ -47,7 +47,7 @@ diag( q{Test analysis_variant_annotation from Variant_annotation.pm} . $SPACE . $EXECUTABLE_NAME ); -my $log = test_log( { log_name => q{MIP}, no_screen => 1, } ); +test_log( { log_name => q{MIP}, no_screen => 1, } ); ## Given analysis parameters my $recipe_name = q{variant_annotation}; @@ -109,4 +109,24 @@ my $is_ok = analysis_variant_annotation( ## Then return TRUE ok( $is_ok, q{ Executed analysis recipe } . $recipe_name ); +## Given no loqusdb file in toml - alters the bcftools command +$active_parameter{vcfanno_config} = + catfile( $Bin, qw{ data references grch37_vcfanno_config_bad_template-v1.0-.toml } ); + +$is_ok = analysis_variant_annotation( + { + active_parameter_href => \%active_parameter, + case_id => $case_id, + file_info_href => \%file_info, + job_id_href => \%job_id, + parameter_href => \%parameter, + profile_base_command => $slurm_mock_cmd, + recipe_name => $recipe_name, + sample_info_href => \%sample_info, + } +); + +## Then still return TRUE +ok( $is_ok, q{ Executed analysis recipe } . $recipe_name ); + done_testing(); From aea41eb0aace793df88bf952ea1ecdc2b84994a9 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 13 Oct 2021 14:36:27 +0200 Subject: [PATCH 095/116] fixed uninitalized log --- t/parse_containers.t | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/t/parse_containers.t b/t/parse_containers.t index a83b77aee..f80453a4a 100644 --- a/t/parse_containers.t +++ b/t/parse_containers.t @@ -20,7 +20,7 @@ use Modern::Perl qw{ 2018 }; ## MIPs lib/ use lib catdir( dirname($Bin), q{lib} ); use MIP::Constants qw{ $COMMA $SPACE }; -use MIP::Test::Fixtures qw{ test_mip_hashes }; +use MIP::Test::Fixtures qw{ test_log test_mip_hashes }; BEGIN { @@ -30,7 +30,7 @@ BEGIN { ## Modules with import my %perl_module = ( q{MIP::Environment::Container} => [qw{ parse_containers }], - q{MIP::Test::Fixtures} => [qw{ test_mip_hashes }], + q{MIP::Test::Fixtures} => [qw{ test_log test_mip_hashes }], ); test_import( { perl_module_href => \%perl_module, } ); @@ -46,6 +46,8 @@ diag( q{Test parse_containers from Container.pm} . $SPACE . $EXECUTABLE_NAME ); +test_log( {} ); + my %active_parameter = test_mip_hashes( { mip_hash_name => q{active_parameter}, From ef2558bb939aa10613eb2cb9831c9ee043644439 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 13 Oct 2021 17:24:41 +0200 Subject: [PATCH 096/116] MIP version update --- CHANGELOG.md | 2 +- lib/MIP/Constants.pm | 2 +- templates/mip_install_config.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6375e3103..f0a5a83d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). -## [Develop] +## [11.0.0] - HmtNote: annotate mitochondrial variants in VCF file - Updating to latest and greatest versions diff --git a/lib/MIP/Constants.pm b/lib/MIP/Constants.pm index c7144f4d9..223f06457 100644 --- a/lib/MIP/Constants.pm +++ b/lib/MIP/Constants.pm @@ -81,7 +81,7 @@ Readonly our %ANALYSIS => ( ); ## Set MIP version -Readonly our $MIP_VERSION => q{10.2.2}; +Readonly our $MIP_VERSION => q{11.0.0}; ## Cli Readonly our $MOOSEX_APP_SCEEN_WIDTH => 160; diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index 029a74ef7..f2d65841e 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -121,7 +121,7 @@ container: mip: executable: mip: - uri: docker.io/clinicalgenomics/mip:v10.2.2 + uri: docker.io/clinicalgenomics/mip:v11.0.0 multiqc: executable: multiqc: From 709f52a03e22fffd739cc4243ceda7a82b3e3481 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 14 Oct 2021 11:02:54 +0200 Subject: [PATCH 097/116] updates clinvar --- CHANGELOG.md | 2 +- lib/MIP/Cli/Mip/Download.pm | 8 ---- templates/grch38_mip_rd_dna_config.yaml | 2 +- .../mip_download_rd_dna_config_-1.0-.yaml | 38 +++++++++---------- templates/mip_dragen_rd_dna_config.yaml | 2 +- templates/mip_rd_dna_config.yaml | 2 +- templates/mip_rd_dna_panel_config.yaml | 2 +- templates/mip_rd_dna_vcf_rerun_config.yaml | 2 +- 8 files changed, 25 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0a5a83d9..cd9d3c44d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,7 @@ vep release_103.1 -> release_104.3 - gnomad: r3.0 -> r3.1.1 - [NEW] gnomad mt: r3.1 -- clinvar: 20210415 -> 20210919 +- clinvar: 20210415 -> 20211010 ## [10.2.2] diff --git a/lib/MIP/Cli/Mip/Download.pm b/lib/MIP/Cli/Mip/Download.pm index 6284073de..7b9750dca 100644 --- a/lib/MIP/Cli/Mip/Download.pm +++ b/lib/MIP/Cli/Mip/Download.pm @@ -155,14 +155,6 @@ sub _build_usage { ), ); - option( - q{container_config_file} => ( - documentation => q{File with install configuration parameters in YAML format}, - is => q{rw}, - isa => Str, - ) - ); - option( q{job_reservation_name} => ( documentation => q{Allocate node resources from named reservation}, diff --git a/templates/grch38_mip_rd_dna_config.yaml b/templates/grch38_mip_rd_dna_config.yaml index faebabb56..e2931e851 100755 --- a/templates/grch38_mip_rd_dna_config.yaml +++ b/templates/grch38_mip_rd_dna_config.yaml @@ -94,7 +94,7 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch38_clinvar_reformated_-20210919-.vcf.gz + path: cluster_constant_path!/references/grch38_clinvar_reformated_-20211010-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins diff --git a/templates/mip_download_rd_dna_config_-1.0-.yaml b/templates/mip_download_rd_dna_config_-1.0-.yaml index 17b308fa6..abb7f1ba4 100644 --- a/templates/mip_download_rd_dna_config_-1.0-.yaml +++ b/templates/mip_download_rd_dna_config_-1.0-.yaml @@ -27,7 +27,7 @@ reference: - v1.0 clinvar: - 20210415 - - 20210919 + - 20211010 dbnsfp: - 3.5a - 4.0b2a @@ -323,25 +323,16 @@ reference_feature: outfile_index: grch37_clinvar_-20210415-.vcf.gz.tbi outfile_check_method: md5sum url_prefix: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/weekly/ - 20210919: - file: clinvar_20210919.vcf.gz - file_check: clinvar_20210919.vcf.gz.md5 - file_index: clinvar_20210919.vcf.gz.tbi - outfile: grch37_clinvar_-20210919-.vcf.gz - outfile_check: grch37_clinvar_-20210919-.vcf.gz.md5 - outfile_index: grch37_clinvar_-20210919-.vcf.gz.tbi - outfile_check_method: md5sum - url_prefix: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/weekly + 20211010: + file: clinvar_20211010.vcf.gz + file_check: clinvar_20211010.vcf.gz.md5 + file_index: clinvar_20211010.vcf.gz.tbi + outfile: grch37_clinvar_-20211010-.vcf.gz + outfile_check: grch37_clinvar_-20211010-.vcf.gz.md5 + outfile_index: grch37_clinvar_-20211010-.vcf.gz.tbi + outfile_check_method: md5sum + url_prefix: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/weekly/ grch38: - 20200905: - file: clinvar_20200905.vcf.gz - file_check: clinvar_20200905.vcf.gz.md5 - file_index: clinvar_20200905.vcf.gz.tbi - outfile: grch38_clinvar_-20200905-.vcf.gz - outfile_check: grch38_clinvar_-20200905-.vcf.gz.md5 - outfile_index: grch38_clinvar_-20200905-.vcf.gz.tbi - outfile_check_method: md5sum - url_prefix: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/archive_2.0/2020/ 20210415: file: clinvar_20210415.vcf.gz file_check: clinvar_20210415.vcf.gz.md5 @@ -351,6 +342,15 @@ reference_feature: outfile_index: grch38_clinvar_-20210415-.vcf.gz.tbi outfile_check_method: md5sum url_prefix: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/weekly/ + 20211010: + file: clinvar_20211010.vcf.gz + file_check: clinvar_20211010.vcf.gz.md5 + file_index: clinvar_20211010.vcf.gz.tbi + outfile: grch38_clinvar_-20211010-.vcf.gz + outfile_check: grch38_clinvar_-20211010-.vcf.gz.md5 + outfile_index: grch38_clinvar_-20211010-.vcf.gz.tbi + outfile_check_method: md5sum + url_prefix: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/weekly/ dbnsfp: grch37: 3.5a: diff --git a/templates/mip_dragen_rd_dna_config.yaml b/templates/mip_dragen_rd_dna_config.yaml index 4a588e3a9..26f0a81b4 100644 --- a/templates/mip_dragen_rd_dna_config.yaml +++ b/templates/mip_dragen_rd_dna_config.yaml @@ -68,7 +68,7 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210919-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20211010-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins diff --git a/templates/mip_rd_dna_config.yaml b/templates/mip_rd_dna_config.yaml index 2b7f89b00..34d1f3d3d 100755 --- a/templates/mip_rd_dna_config.yaml +++ b/templates/mip_rd_dna_config.yaml @@ -75,7 +75,7 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210919-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20211010-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins diff --git a/templates/mip_rd_dna_panel_config.yaml b/templates/mip_rd_dna_panel_config.yaml index 038a1261d..84dde3c6d 100755 --- a/templates/mip_rd_dna_panel_config.yaml +++ b/templates/mip_rd_dna_panel_config.yaml @@ -49,7 +49,7 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210919-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20211010-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins diff --git a/templates/mip_rd_dna_vcf_rerun_config.yaml b/templates/mip_rd_dna_vcf_rerun_config.yaml index 01c56cf0e..0a459b86e 100644 --- a/templates/mip_rd_dna_vcf_rerun_config.yaml +++ b/templates/mip_rd_dna_vcf_rerun_config.yaml @@ -70,7 +70,7 @@ vep_custom_annotation: force_report_coordinates: 0 key: CLINVAR file_type: vcf - path: cluster_constant_path!/references/grch37_clinvar_reformated_-20210919-.vcf.gz + path: cluster_constant_path!/references/grch37_clinvar_reformated_-20211010-.vcf.gz vcf_fields: CLNSIG,CLNVID,CLNREVSTAT vep_directory_cache: cluster_constant_path!/references/ensembl-tools-data-104/cache/ vep_plugins_dir_path: cluster_constant_path!/references/ensembl-tools-data-104/cache/Plugins From 6cf9e11b02baace6127d4f4700302b811eaac5cc Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 14 Oct 2021 11:31:38 +0200 Subject: [PATCH 098/116] Renames clinvar test files --- ...210919-.vcf.gz => grch37_clinvar_reformated_-20211010-.vcf.gz} | 0 ...vcf.gz.tbi => grch37_clinvar_reformated_-20211010-.vcf.gz.tbi} | 0 ...210919-.vcf.gz => grch38_clinvar_reformated_-20211010-.vcf.gz} | 0 ...vcf.gz.tbi => grch38_clinvar_reformated_-20211010-.vcf.gz.tbi} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename t/data/references/{grch37_clinvar_reformated_-20210919-.vcf.gz => grch37_clinvar_reformated_-20211010-.vcf.gz} (100%) rename t/data/references/{grch37_clinvar_reformated_-20210919-.vcf.gz.tbi => grch37_clinvar_reformated_-20211010-.vcf.gz.tbi} (100%) rename t/data/references/{grch38_clinvar_reformated_-20210919-.vcf.gz => grch38_clinvar_reformated_-20211010-.vcf.gz} (100%) rename t/data/references/{grch38_clinvar_reformated_-20210919-.vcf.gz.tbi => grch38_clinvar_reformated_-20211010-.vcf.gz.tbi} (100%) diff --git a/t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz b/t/data/references/grch37_clinvar_reformated_-20211010-.vcf.gz similarity index 100% rename from t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz rename to t/data/references/grch37_clinvar_reformated_-20211010-.vcf.gz diff --git a/t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz.tbi b/t/data/references/grch37_clinvar_reformated_-20211010-.vcf.gz.tbi similarity index 100% rename from t/data/references/grch37_clinvar_reformated_-20210919-.vcf.gz.tbi rename to t/data/references/grch37_clinvar_reformated_-20211010-.vcf.gz.tbi diff --git a/t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz b/t/data/references/grch38_clinvar_reformated_-20211010-.vcf.gz similarity index 100% rename from t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz rename to t/data/references/grch38_clinvar_reformated_-20211010-.vcf.gz diff --git a/t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz.tbi b/t/data/references/grch38_clinvar_reformated_-20211010-.vcf.gz.tbi similarity index 100% rename from t/data/references/grch38_clinvar_reformated_-20210919-.vcf.gz.tbi rename to t/data/references/grch38_clinvar_reformated_-20211010-.vcf.gz.tbi From 01b79c306a03187dc0987ea13ed298ee28910ba0 Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 18 Oct 2021 18:46:35 +0200 Subject: [PATCH 099/116] fixing bcftools sort --- lib/MIP/Recipes/Analysis/Mip_vcfparser.pm | 6 ++++-- lib/MIP/Recipes/Analysis/Plink.pm | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/MIP/Recipes/Analysis/Mip_vcfparser.pm b/lib/MIP/Recipes/Analysis/Mip_vcfparser.pm index de3147626..c2c7ade78 100644 --- a/lib/MIP/Recipes/Analysis/Mip_vcfparser.pm +++ b/lib/MIP/Recipes/Analysis/Mip_vcfparser.pm @@ -4,7 +4,7 @@ use 5.026; use Carp; use charnames qw{ :full :short }; use English qw{ -no_match_vars }; -use File::Spec::Functions qw{ catfile }; +use File::Spec::Functions qw{ catdir catfile }; use List::Util qw{ any }; use open qw{ :encoding(UTF-8) :std }; use Params::Check qw{ allow check last_error }; @@ -639,13 +639,15 @@ sub analysis_mip_vcfparser_panel { say {$filehandle} q{## Prepare file for vcfparser}; my $bcftools_outfile_path = catfile( $outdir_path, $infile_name_prefix . q{.vcf.gz} ); + my $sort_memory = $recipe{memory} - 1; bcftools_sort( { filehandle => $filehandle, infile_path => $infile_path, + max_mem => $sort_memory . q{G}, outfile_path => $bcftools_outfile_path, output_type => q{z}, - temp_directory => $temp_directory, + temp_directory => catdir( $temp_directory, q{bcftools_sort} ), } ); say {$filehandle} $NEWLINE; diff --git a/lib/MIP/Recipes/Analysis/Plink.pm b/lib/MIP/Recipes/Analysis/Plink.pm index 8f50aa112..69fc94532 100644 --- a/lib/MIP/Recipes/Analysis/Plink.pm +++ b/lib/MIP/Recipes/Analysis/Plink.pm @@ -4,7 +4,7 @@ use 5.026; use Carp; use charnames qw{ :full :short }; use English qw{ -no_match_vars }; -use File::Spec::Functions qw{ catfile }; +use File::Spec::Functions qw{ catdir catfile }; use open qw{ :encoding(UTF-8) :std }; use Params::Check qw{ check allow last_error }; use utf8; @@ -308,7 +308,7 @@ sub analysis_plink { filehandle => $filehandle, max_mem => $sort_memory . q{G}, output_type => q{v}, - temp_directory => $temp_directory, + temp_directory => catdir( $temp_directory, q{bcftools_sort} ), } ); print {$filehandle} $PIPE . $SPACE; From eb33facfcb102ac90354fe418acea89113f71f5f Mon Sep 17 00:00:00 2001 From: jemten Date: Fri, 12 Nov 2021 09:46:23 +0100 Subject: [PATCH 100/116] bumps resources for rna pipeline --- definitions/rd_rna_parameters.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/definitions/rd_rna_parameters.yaml b/definitions/rd_rna_parameters.yaml index 72cb2dc53..3bf5a0213 100755 --- a/definitions/rd_rna_parameters.yaml +++ b/definitions/rd_rna_parameters.yaml @@ -142,14 +142,14 @@ recipe_time: bcftools_norm: 2 blobfish: 1 bootstrapann: 2 - build_sj_tracks: 1 + build_sj_tracks: 2 dna_vcf_reformat: 1 fastqc_ar: 1 - fusion_report: 1 - gatk_asereadcounter: 2 + fusion_report: 2 + gatk_asereadcounter: 3 gatk_baserecalibration: 5 gatk_haplotypecaller: 6 - gatk_splitncigarreads: 8 + gatk_splitncigarreads: 12 gatk_variantfiltration: 1 genebody_coverage: 3 gffcompare_ar: 1 @@ -160,14 +160,14 @@ recipe_time: merge_fusion_reports: 1 picardtools_collectrnaseqmetrics: 4 picardtools_mergesamfiles: 2 - preseq_ar: 2 + preseq_ar: 3 qccollect_ar: 1 rseqc: 3 sacct: 1 salmon_quant: 5 star_aln: 8 star_fusion: 16 - stringtie_ar: 2 + stringtie_ar: 4 svdb_merge_fusion: 1 trim_galore_ar: 6 varianteffectpredictor: 4 @@ -180,7 +180,7 @@ recipe_memory: data_type: HASH default: analysisrunstatus: 1 - build_sj_tracks: 8 + build_sj_tracks: 12 dna_vcf_reformat: 2 fusion_report: 15 gatk_asereadcounter: 25 From abb9da5e65baad15cdafb71d3f775eda2a5b5a90 Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 15 Nov 2021 11:54:33 +0100 Subject: [PATCH 101/116] bumb vcparser rna memory --- definitions/rd_rna_parameters.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/definitions/rd_rna_parameters.yaml b/definitions/rd_rna_parameters.yaml index 3bf5a0213..08292c8f9 100755 --- a/definitions/rd_rna_parameters.yaml +++ b/definitions/rd_rna_parameters.yaml @@ -196,6 +196,7 @@ recipe_memory: stringtie_ar: 2 trim_galore_ar: 5 varianteffectpredictor: 12 + vcfparser_ar: 8 version_collect_ar: 8 type: mip recipe_core_number: From e4f939e3c48320c32e47a404b3744d1e5e07be86 Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 15 Nov 2021 19:12:24 +0100 Subject: [PATCH 102/116] updates rankmodel for DV switch --- README.md | 2 +- documentation/README.md | 2 +- lib/MIP/Recipes/Install/Mip_scripts.pm | 2 +- t/mip_core.t | 2 +- .../mip_download_rd_dna_config_-1.0-.yaml | 16 +++++----- ...del_-v1.31-.ini => rank_model_-v1.33-.ini} | 32 ++++++++++++------- 6 files changed, 33 insertions(+), 23 deletions(-) rename templates/{rank_model_-v1.31-.ini => rank_model_-v1.33-.ini} (97%) diff --git a/README.md b/README.md index 5a657a654..dd1f1b60b 100644 --- a/README.md +++ b/README.md @@ -233,6 +233,6 @@ MIP will place any generated data files in the output data directory specified b [Miniconda]: http://conda.pydata.org/miniconda.html [Pedigree file]: https://github.com/Clinical-Genomics/MIP/tree/master/templates/643594-miptest_pedigree.yaml [Perl]:https://www.perl.org/ -[Rank model file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/rank_model_-v1.31-.ini +[Rank model file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/rank_model_-v1.33-.ini [SV rank model file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/svrank_model_-v1.8-.ini [Qc regexp file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/qc_regexp_-v1.26-.yaml diff --git a/documentation/README.md b/documentation/README.md index 5a657a654..dd1f1b60b 100644 --- a/documentation/README.md +++ b/documentation/README.md @@ -233,6 +233,6 @@ MIP will place any generated data files in the output data directory specified b [Miniconda]: http://conda.pydata.org/miniconda.html [Pedigree file]: https://github.com/Clinical-Genomics/MIP/tree/master/templates/643594-miptest_pedigree.yaml [Perl]:https://www.perl.org/ -[Rank model file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/rank_model_-v1.31-.ini +[Rank model file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/rank_model_-v1.33-.ini [SV rank model file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/svrank_model_-v1.8-.ini [Qc regexp file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/qc_regexp_-v1.26-.yaml diff --git a/lib/MIP/Recipes/Install/Mip_scripts.pm b/lib/MIP/Recipes/Install/Mip_scripts.pm index a59a73d8c..1d76777a1 100644 --- a/lib/MIP/Recipes/Install/Mip_scripts.pm +++ b/lib/MIP/Recipes/Install/Mip_scripts.pm @@ -90,7 +90,7 @@ sub install_mip_scripts { program_test_cmds.yaml qc_eval_metric_-v1.3-.yaml qc_regexp_-v1.26-.yaml - rank_model_-v1.31-.ini + rank_model_-v1.33-.ini svrank_model_-v1.8-.ini } ], diff --git a/t/mip_core.t b/t/mip_core.t index 08e47b15e..e0f5e1893 100755 --- a/t/mip_core.t +++ b/t/mip_core.t @@ -330,7 +330,7 @@ sub mip_scripts { mip_rd_rna_config.yaml program_test_cmds.yaml qc_regexp_-v1.26-.yaml - rank_model_-v1.31-.ini + rank_model_-v1.33-.ini svrank_model_-v1.8-.ini } ], diff --git a/templates/mip_download_rd_dna_config_-1.0-.yaml b/templates/mip_download_rd_dna_config_-1.0-.yaml index abb7f1ba4..1eacbe84a 100644 --- a/templates/mip_download_rd_dna_config_-1.0-.yaml +++ b/templates/mip_download_rd_dna_config_-1.0-.yaml @@ -104,8 +104,8 @@ reference: - gold_standard_indels rank_model: - v0.1 - - v1.31 - v1.32 + - v1.33 reduced_penetrance: - 2017 scout_exons: @@ -805,13 +805,6 @@ reference_feature: url_prefix: https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/ rank_model: grch37: - v1.31: - file: rank_model_-v1.31-.ini - file_check: rank_model_-v1.31-.ini.md5 - outfile: rank_model_-v1.31-.ini - outfile_check: rank_model_-v1.31-.ini.md5 - outfile_check_method: md5sum - url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/rank_model/ v1.32: file: rank_model_-v1.32-.ini file_check: rank_model_-v1.32-.ini.md5 @@ -819,6 +812,13 @@ reference_feature: outfile_check: rank_model_-v1.32-.ini.md5 outfile_check_method: md5sum url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/rank_model/ + v1.33: + file: rank_model_-v1.33-.ini + file_check: rank_model_-v1.33-.ini.md5 + outfile: rank_model_-v1.33-.ini + outfile_check: rank_model_-v1.33-.ini.md5 + outfile_check_method: md5sum + url_prefix: https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/rare-disease/rank_model/ grch38: v0.2: file: grch38_rank_model_-v0.2-.ini diff --git a/templates/rank_model_-v1.31-.ini b/templates/rank_model_-v1.33-.ini similarity index 97% rename from templates/rank_model_-v1.31-.ini rename to templates/rank_model_-v1.33-.ini index 607a1296e..4d667fd6d 100644 --- a/templates/rank_model_-v1.31-.ini +++ b/templates/rank_model_-v1.33-.ini @@ -1,5 +1,5 @@ [Version] - version = 1.31 + version = 1.33 name = rank_model [Categories] @@ -340,6 +340,11 @@ [[low_qual]] score = -5 lower = 0 + upper = 10 + + [[medium_qual]] + score = -2 + lower = 10 upper = 20 [[high_qual]] @@ -446,9 +451,9 @@ [loqusdb] category = allele_frequency data_type = float - description = LoqusDb observation count for cases=3295 + description = LoqusDb observation frequency for cases=5092 field = INFO - info_key = Obs + info_key = Frq record_rule = max separators = ',', @@ -457,23 +462,23 @@ [[common]] score = -12 - lower = 35 - upper = 10000000 + lower = 0.02 + upper = 1.1 [[intermediate]] score = 1 - lower = 20 - upper = 35 + lower = 0.005 + upper = 0.02 [[rare]] score = 2 - lower = 5 - upper = 20 + lower = 0.0005 + upper = 0.005 [[very_rare]] score = 3 - lower = 1 - upper = 5 + lower = 0 + upper = 0.0005 [polyphen] category = protein_prediction @@ -852,6 +857,11 @@ priority = 1 string = 'PASS' + [[dot]] + score = 3 + priority = 2 + string = '.' + [dbnsfp_gerp++_rs] category = conservation csq_key = GERP++_RS From f4b1e10d195e716d3fce61c296ed520b612fa4c6 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 16 Nov 2021 17:45:05 +0100 Subject: [PATCH 103/116] updates duplicate calculation regex --- lib/MIP/Recipes/Analysis/Markduplicates.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MIP/Recipes/Analysis/Markduplicates.pm b/lib/MIP/Recipes/Analysis/Markduplicates.pm index a1bb8e3ca..02fbb822f 100644 --- a/lib/MIP/Recipes/Analysis/Markduplicates.pm +++ b/lib/MIP/Recipes/Analysis/Markduplicates.pm @@ -786,7 +786,7 @@ sub _calculate_fraction_duplicates_for_all_metric_files { $regexp .= q?while (<>) { ?; # Find duplicate metric - $regexp .= q?if($_=~/duplicates/ && $_=~/^(\d+)/) { ?; + $regexp .= q?if($_=~/\d+\sduplicates/ && $_=~/^(\d+)/) { ?; # Add to previous duplicate metrics $regexp .= q?$feature{dup} = $feature{dup} + $1 } ?; From 66657c30f0a87d40796b3bd3f4927d427452c27e Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 22 Nov 2021 14:28:25 +0100 Subject: [PATCH 104/116] genmod resource bump --- lib/MIP/Recipes/Analysis/Rankvariant.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MIP/Recipes/Analysis/Rankvariant.pm b/lib/MIP/Recipes/Analysis/Rankvariant.pm index 70071c81c..45952d736 100644 --- a/lib/MIP/Recipes/Analysis/Rankvariant.pm +++ b/lib/MIP/Recipes/Analysis/Rankvariant.pm @@ -31,7 +31,7 @@ BEGIN { } Readonly my $FOUR => 4; -Readonly my $MAX_PARALLEL_PROCESSES => 13; +Readonly my $MAX_PARALLEL_PROCESSES => 10; sub analysis_rankvariant { From 3e6b8f31c50b96e2ab4f2438bb9bf1786b63d08a Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 7 Dec 2021 16:22:08 +0100 Subject: [PATCH 105/116] rna resource bump --- definitions/rd_rna_parameters.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/definitions/rd_rna_parameters.yaml b/definitions/rd_rna_parameters.yaml index 08292c8f9..c92078099 100755 --- a/definitions/rd_rna_parameters.yaml +++ b/definitions/rd_rna_parameters.yaml @@ -149,15 +149,15 @@ recipe_time: gatk_asereadcounter: 3 gatk_baserecalibration: 5 gatk_haplotypecaller: 6 - gatk_splitncigarreads: 12 + gatk_splitncigarreads: 16 gatk_variantfiltration: 1 genebody_coverage: 3 gffcompare_ar: 1 gzip_fastq: 2 markduplicates: 8 megafusion_ar: 1 - multiqc_ar: 1 merge_fusion_reports: 1 + multiqc_ar: 1 picardtools_collectrnaseqmetrics: 4 picardtools_mergesamfiles: 2 preseq_ar: 3 @@ -165,7 +165,7 @@ recipe_time: rseqc: 3 sacct: 1 salmon_quant: 5 - star_aln: 8 + star_aln: 12 star_fusion: 16 stringtie_ar: 4 svdb_merge_fusion: 1 @@ -223,6 +223,7 @@ recipe_core_number: gffcompare_ar: 1 gzip_fastq: 0 markduplicates: 13 + megafusion_ar: 1 merge_fusion_reports: 1 multiqc_ar: 1 picardtools_collectrnaseqmetrics: 1 @@ -236,7 +237,6 @@ recipe_core_number: star_fusion: 36 stringtie_ar: 36 svdb_merge_fusion: 1 - megafusion_ar: 1 varianteffectpredictor: 1 vcfparser_ar: 1 version_collect_ar: 1 From 2fb2a41fc25f8146fe54ce4af056832076f03f1f Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 7 Dec 2021 16:31:06 +0100 Subject: [PATCH 106/116] roll back deepvariant to v1.1.0 --- CHANGELOG.md | 1 - documentation/Setup.md | 2 +- templates/mip_install_config.yaml | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd9d3c44d..064c7b5e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,6 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Tools cyrius v1.1 -> v1.1.1 -deepvariant 1.1.0-gpu -> 1.2.0-gpu deeptrio 1.1.0-gpu -> 1.2.0-gpu gatk 4.2.0.0 -> 4.2.2.0 glnexus v1.3.1 -> v1.4.1 diff --git a/documentation/Setup.md b/documentation/Setup.md index 878b622bc..1b9b2b5ff 100644 --- a/documentation/Setup.md +++ b/documentation/Setup.md @@ -49,7 +49,7 @@ You can speed up, for instance, the Readonly module by also installing the compa - [Expansionhunter] (version 4.0.2) - [FastQC] (version: 0.11.9) - [Deeptrio] (version: 1.2.0) -- [Deepvariant] (version: 1.2.0) +- [Deepvariant] (version: 1.1.0) - [Delly] (version 0.8.7) - [GATK] (version: 3.8.1 and 4.2.2.0) - [GENMOD] (version: 3.7.3) diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index f2d65841e..df7b53062 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -60,7 +60,7 @@ container: call_variants: /opt/deepvariant/bin/call_variants postprocess_variants: /opt/deepvariant/bin/postprocess_variants gpu_support: 0 - uri: docker.io/google/deepvariant:1.2.0 + uri: docker.io/google/deepvariant:1.1.0 deeptrio: executable: run_deeptrio: /opt/deepvariant/bin/deeptrio/run_deeptrio From dbe7a3b63e7498dce0d152982364c05eb5ff7733 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 7 Dec 2021 16:45:59 +0100 Subject: [PATCH 107/116] adds version regexp for DeepVariant --- lib/MIP/Environment/Executable.pm | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/MIP/Environment/Executable.pm b/lib/MIP/Environment/Executable.pm index 43efcb937..7187efb2d 100644 --- a/lib/MIP/Environment/Executable.pm +++ b/lib/MIP/Environment/Executable.pm @@ -449,6 +449,11 @@ q?'my ($version) = /read_duplication.py\s+(\S+)/xms; if($version) {print $versio version_regexp => q?'my ($version) = /version\s+(\S+)/xms; if($version) {print $version;last;}'?, }, + run_deepvariant => { + version_cmd => q{--version}, + version_regexp => +q?'my ($version) = /DeepVariant\s+version\s+(\S+)/xms; if($version) {print $version;last;}'? + }, salmon => { version_cmd => q{--version}, version_regexp => From 55f0fb12ad75f36235adf30903c64efaca51edcf Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 9 Dec 2021 15:19:35 +0100 Subject: [PATCH 108/116] only merge SV calls that passed filter --- lib/MIP/Program/Svdb.pm | 29 +++++++++++++------ .../Analysis/Sv_combinevariantcallsets.pm | 1 + lib/MIP/Recipes/Analysis/Tiddit.pm | 1 + t/svdb_merge.t | 13 +++++---- 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/lib/MIP/Program/Svdb.pm b/lib/MIP/Program/Svdb.pm index 4e4f9fca0..e7b1a4f43 100644 --- a/lib/MIP/Program/Svdb.pm +++ b/lib/MIP/Program/Svdb.pm @@ -38,6 +38,7 @@ sub svdb_merge { ## : $infile_paths_ref => Infile path {REF} ## : $notag => Do not add the the VARID and set entries to the info field ## : $outfile_path => Outfile path +## : $pass_only => Only merge variants labeled PASS ## : $priority => Priority order of structural variant calls ## : $same_order => Across all input vcf files, the order of the sample columns are the same ## : $stderrfile_path => Stderrfile path @@ -51,6 +52,7 @@ sub svdb_merge { my $infile_paths_ref; my $notag; my $outfile_path; + my $pass_only; my $priority; my $same_order; my $stderrfile_path; @@ -66,10 +68,15 @@ sub svdb_merge { store => \$infile_paths_ref, strict_type => 1, }, - notag => { store => \$notag, strict_type => 1, }, - outfile_path => { store => \$outfile_path, strict_type => 1, }, - priority => { store => \$priority, strict_type => 1, }, - same_order => { store => \$same_order, strict_type => 1, }, + notag => { store => \$notag, strict_type => 1, }, + outfile_path => { store => \$outfile_path, strict_type => 1, }, + pass_only => { + allow => [ undef, 0, 1 ], + store => \$pass_only, + strict_type => 1, + }, + priority => { store => \$priority, strict_type => 1, }, + same_order => { store => \$same_order, strict_type => 1, }, stderrfile_path => { store => \$stderrfile_path, strict_type => 1, @@ -86,9 +93,13 @@ sub svdb_merge { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; - my @commands = ( get_executable_base_command( { base_command => $BASE_COMMAND, } ), - qw{ --merge } ); + my @commands = + ( get_executable_base_command( { base_command => $BASE_COMMAND, } ), qw{ --merge } ); + if ($pass_only) { + + push @commands, q{--pass_only},; + } if ($priority) { push @commands, q{--priority} . $SPACE . $priority; @@ -175,7 +186,7 @@ sub svdb_query { filehandle => { store => \$filehandle }, in_frequency_tag => { strict_type => 1, store => \$in_frequency_tag }, in_allele_count_tag => { strict_type => 1, store => \$in_allele_count_tag }, - infile_path => { + infile_path => { required => 1, defined => 1, strict_type => 1, @@ -205,8 +216,8 @@ sub svdb_query { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; - my @commands = ( get_executable_base_command( { base_command => $BASE_COMMAND, } ), - qw{ --query } ); + my @commands = + ( get_executable_base_command( { base_command => $BASE_COMMAND, } ), qw{ --query } ); if ($bnd_distance) { diff --git a/lib/MIP/Recipes/Analysis/Sv_combinevariantcallsets.pm b/lib/MIP/Recipes/Analysis/Sv_combinevariantcallsets.pm index 2e8c0cdea..4fd17f8ee 100644 --- a/lib/MIP/Recipes/Analysis/Sv_combinevariantcallsets.pm +++ b/lib/MIP/Recipes/Analysis/Sv_combinevariantcallsets.pm @@ -281,6 +281,7 @@ sub analysis_sv_combinevariantcallsets { filehandle => $filehandle, infile_paths_ref => \@svdb_infile_paths, priority => $active_parameter_href->{sv_svdb_merge_prioritize}, + pass_only => 1, same_order => 1, stdoutfile_path => $outfile_path, } diff --git a/lib/MIP/Recipes/Analysis/Tiddit.pm b/lib/MIP/Recipes/Analysis/Tiddit.pm index 6fc95adc3..6e5a5201b 100644 --- a/lib/MIP/Recipes/Analysis/Tiddit.pm +++ b/lib/MIP/Recipes/Analysis/Tiddit.pm @@ -271,6 +271,7 @@ sub analysis_tiddit { filehandle => $filehandle, infile_paths_ref => \@svdb_infile_paths, notag => 1, + pass_only => 1, stdoutfile_path => $outfile_path, } ); diff --git a/t/svdb_merge.t b/t/svdb_merge.t index 9d784776a..db318a113 100644 --- a/t/svdb_merge.t +++ b/t/svdb_merge.t @@ -23,16 +23,13 @@ use lib catdir( dirname($Bin), q{lib} ); use MIP::Constants qw{ $COMMA $SPACE }; use MIP::Test::Commands qw{ test_function }; - BEGIN { use MIP::Test::Fixtures qw{ test_import }; ### Check all internal dependency modules and imports ## Modules with import - my %perl_module = ( - q{MIP::Program::Svdb} => [qw{ svdb_merge }], -); + my %perl_module = ( q{MIP::Program::Svdb} => [qw{ svdb_merge }], ); test_import( { perl_module_href => \%perl_module, } ); } @@ -73,7 +70,7 @@ my %base_argument = ( ## to enable testing of each individual argument my %required_argument = ( infile_paths_ref => { - inputs_ref => [ catfile(qw{ a test infile_1 }), catfile(qw{ a test infile_2 }) ], + inputs_ref => [ catfile(qw{ a test infile_1 }), catfile(qw{ a test infile_2 }) ], expected_output => q{--vcf} . $SPACE . catfile(qw{ a test infile_1 }) @@ -84,7 +81,7 @@ my %required_argument = ( my %specific_argument = ( infile_paths_ref => { - inputs_ref => [ catfile(qw{ a test infile_1 }), catfile(qw{ a test infile_2 }) ], + inputs_ref => [ catfile(qw{ a test infile_1 }), catfile(qw{ a test infile_2 }) ], expected_output => q{--vcf} . $SPACE . catfile(qw{ a test infile_1 }) @@ -95,6 +92,10 @@ my %specific_argument = ( input => q{1}, expected_output => q{--notag}, }, + pass_only => { + input => 1, + expected_output => q{--pass_only}, + }, priority => { input => q{manta,delly,cnvnator,tiddit}, expected_output => q{--priority} . $SPACE . q{manta,delly,cnvnator,tiddit}, From b2e9728f1975329393c0f31667974527269ede3f Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 15 Dec 2021 12:04:09 +0100 Subject: [PATCH 109/116] fix for sv_annotate --- lib/MIP/Recipes/Analysis/Sv_annotate.pm | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/lib/MIP/Recipes/Analysis/Sv_annotate.pm b/lib/MIP/Recipes/Analysis/Sv_annotate.pm index 784c14879..4886e54c4 100644 --- a/lib/MIP/Recipes/Analysis/Sv_annotate.pm +++ b/lib/MIP/Recipes/Analysis/Sv_annotate.pm @@ -129,7 +129,7 @@ sub analysis_sv_annotate { check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!}; use MIP::File_info qw{ get_io_files parse_io_outfiles }; - use MIP::Program::Gnu::Coreutils qw( gnu_cat gnu_mv gnu_tee ); + use MIP::Program::Gnu::Coreutils qw( gnu_cat gnu_cp gnu_mv gnu_rm gnu_tee ); use MIP::Io::Read qw{ read_from_file }; use MIP::Processmanagement::Processes qw{ submit_recipe }; use MIP::Program::Bcftools qw{ bcftools_filter bcftools_view }; @@ -318,12 +318,24 @@ sub analysis_sv_annotate { ## Remove FILTER ne PASS and filter on frequency if ( $active_parameter_href->{sv_frequency_filter} ) { + say {$filehandle} +q{## Create temp file as to not read and write to the same file in the case of wes samples}; + my $temp_filter_infile_path = $outfile_path_prefix . q{_temp} . $outfile_suffix; + gnu_cp( + { + filehandle => $filehandle, + infile_path => $sort_outfile_path, + outfile_path => $temp_filter_infile_path, + } + ); + say ${filehandle} $NEWLINE; + say {$filehandle} q{## Remove FILTER ne PASS, annotate and remove common variants}; bcftools_view( { apply_filters_ref => [qw{ PASS }], filehandle => $filehandle, - infile_path => $sort_outfile_path, + infile_path => $temp_filter_infile_path, output_type => q{v}, } ); @@ -403,7 +415,14 @@ sub analysis_sv_annotate { stdoutfile_path => $outfile_path, } ); + say {$filehandle} $NEWLINE; } + gnu_rm( + { + filehandle => $filehandle, + infile_path => $temp_filter_infile_path, + } + ); } close $filehandle or $log->logcroak(q{Could not close filehandle}); From ba3a1141e547a4b4e630cdf5586b835387c3aa8a Mon Sep 17 00:00:00 2001 From: jemten Date: Fri, 14 Jan 2022 16:26:37 +0100 Subject: [PATCH 110/116] updates bootrstrapann --- containers/bootstrapann/Dockerfile | 24 ++++++++++++------------ templates/mip_install_config.yaml | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/containers/bootstrapann/Dockerfile b/containers/bootstrapann/Dockerfile index 6d27a95c6..73fb6a5e4 100644 --- a/containers/bootstrapann/Dockerfile +++ b/containers/bootstrapann/Dockerfile @@ -1,30 +1,30 @@ ################## BASE IMAGE ###################### -FROM clinicalgenomics/mip_base:2.1 +FROM python:2.7-slim ################## METADATA ###################### -LABEL base_image="clinicalgenomics/mip_base:2.1" -LABEL version="1" +LABEL base_image="python:2.7-slim" +LABEL version="2" LABEL software="BootstrapAnn" -LABEL software.version="df02f35" +LABEL software.version="ebc81ed" LABEL extra.binaries="BootstrapAnn.py" LABEL maintainer="Clinical-Genomics/MIP" -RUN conda install pip python=2.7 numpy scipy +RUN apt-get update && apt-get install -y git +RUN apt-get clean && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -## Clean up after conda -RUN /opt/conda/bin/conda clean -tipsy +RUN pip install numpy scipy ## Clone git repository -RUN git clone https://github.com/J35P312/BootstrapAnn.git /opt/conda/share/BootstrapAnn +RUN git clone https://github.com/J35P312/BootstrapAnn.git /usr/local/BootstrapAnn -RUN cd /opt/conda/share/BootstrapAnn && git checkout df02f35 +RUN cd /usr/local/BootstrapAnn && git checkout ebc81ed -## Move to smn_caller directory -RUN cd /opt/conda/share/BootstrapAnn && \ +RUN cd /usr/local/BootstrapAnn && \ chmod a+x BootstrapAnn.py -RUN ln --symbolic --force /opt/conda/share/BootstrapAnn/BootstrapAnn.py /opt/conda/bin/BootstrapAnn.py +RUN ln --symbolic --force /usr/local/BootstrapAnn/BootstrapAnn.py /usr/local/bin/BootstrapAnn.py WORKDIR /data/ diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index df7b53062..1aa841545 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -16,7 +16,7 @@ container: bootstrapann: executable: BootstrapAnn.py: - uri: docker.io/clinicalgenomics/bootstrapann:df02f35 + uri: docker.io/clinicalgenomics/bootstrapann:ebc81ed bwa: executable: bwa: From 908b6c8a5598aa1beaf44b07af8d1f5f6d261df9 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 26 Jan 2022 15:41:13 +0100 Subject: [PATCH 111/116] bump resources and turns off stringtie-gffcompare --- definitions/rd_rna_parameters.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/definitions/rd_rna_parameters.yaml b/definitions/rd_rna_parameters.yaml index c92078099..9fc71d61e 100755 --- a/definitions/rd_rna_parameters.yaml +++ b/definitions/rd_rna_parameters.yaml @@ -145,7 +145,7 @@ recipe_time: build_sj_tracks: 2 dna_vcf_reformat: 1 fastqc_ar: 1 - fusion_report: 2 + fusion_report: 4 gatk_asereadcounter: 3 gatk_baserecalibration: 5 gatk_haplotypecaller: 6 @@ -167,7 +167,7 @@ recipe_time: salmon_quant: 5 star_aln: 12 star_fusion: 16 - stringtie_ar: 4 + stringtie_ar: 6 svdb_merge_fusion: 1 trim_galore_ar: 6 varianteffectpredictor: 4 @@ -184,6 +184,7 @@ recipe_memory: dna_vcf_reformat: 2 fusion_report: 15 gatk_asereadcounter: 25 + gatk_splitncigarreads: 16 gatk_variantfiltration: 8 genebody_coverage: 16 multiqc_ar: 10 @@ -193,7 +194,6 @@ recipe_memory: rseqc: 35 salmon_quant: 2 star_aln: 3 - stringtie_ar: 2 trim_galore_ar: 5 varianteffectpredictor: 12 vcfparser_ar: 8 @@ -217,7 +217,7 @@ recipe_core_number: gatk_asereadcounter: 1 gatk_baserecalibration: 13 gatk_haplotypecaller: 13 - gatk_splitncigarreads: 12 + gatk_splitncigarreads: 8 gatk_variantfiltration: 1 genebody_coverage: 1 gffcompare_ar: 1 @@ -621,7 +621,7 @@ stringtie_ar: associated_recipe: - mip data_type: SCALAR - default: 1 + default: 0 file_tag: _strg outfile_suffix: ".gtf" program_executables: @@ -645,7 +645,7 @@ gffcompare_ar: associated_recipe: - mip data_type: SCALAR - default: 1 + default: 0 file_tag: _gffcmp outfile_suffix: ".gtf" program_executables: From ceb6c6f7381d153cb1671cbb108f496fd20ba8c4 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 27 Jan 2022 09:50:49 +0100 Subject: [PATCH 112/116] increased gatk_splitncigarreads memory --- definitions/rd_rna_parameters.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/definitions/rd_rna_parameters.yaml b/definitions/rd_rna_parameters.yaml index 9fc71d61e..5e3c2c415 100755 --- a/definitions/rd_rna_parameters.yaml +++ b/definitions/rd_rna_parameters.yaml @@ -184,7 +184,7 @@ recipe_memory: dna_vcf_reformat: 2 fusion_report: 15 gatk_asereadcounter: 25 - gatk_splitncigarreads: 16 + gatk_splitncigarreads: 20 gatk_variantfiltration: 8 genebody_coverage: 16 multiqc_ar: 10 From 69a927309579c011972989e66edb07047b5dddb0 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 30 Mar 2022 17:58:28 +0200 Subject: [PATCH 113/116] update bootstrapann --- containers/bootstrapann/Dockerfile | 4 ++-- templates/mip_install_config.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/containers/bootstrapann/Dockerfile b/containers/bootstrapann/Dockerfile index 73fb6a5e4..171bcc42d 100644 --- a/containers/bootstrapann/Dockerfile +++ b/containers/bootstrapann/Dockerfile @@ -7,7 +7,7 @@ FROM python:2.7-slim LABEL base_image="python:2.7-slim" LABEL version="2" LABEL software="BootstrapAnn" -LABEL software.version="ebc81ed" +LABEL software.version="e557dd3" LABEL extra.binaries="BootstrapAnn.py" LABEL maintainer="Clinical-Genomics/MIP" @@ -20,7 +20,7 @@ RUN pip install numpy scipy ## Clone git repository RUN git clone https://github.com/J35P312/BootstrapAnn.git /usr/local/BootstrapAnn -RUN cd /usr/local/BootstrapAnn && git checkout ebc81ed +RUN cd /usr/local/BootstrapAnn && git checkout e557dd3 RUN cd /usr/local/BootstrapAnn && \ chmod a+x BootstrapAnn.py diff --git a/templates/mip_install_config.yaml b/templates/mip_install_config.yaml index 3d9acc333..2d76eeac4 100644 --- a/templates/mip_install_config.yaml +++ b/templates/mip_install_config.yaml @@ -16,7 +16,7 @@ container: bootstrapann: executable: BootstrapAnn.py: - uri: docker.io/clinicalgenomics/bootstrapann:ebc81ed + uri: docker.io/clinicalgenomics/bootstrapann:e557dd3 bwa: executable: bwa: From d7f6fe0213500bfa6424cbac15bc9acb9be949a8 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 30 Mar 2022 18:04:40 +0200 Subject: [PATCH 114/116] turned off star_caller and telomerecat --- CHANGELOG.md | 1 + definitions/rd_dna_parameters.yaml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 708b70761..850d27cb5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). - GATK Haplotypecaller has been turned off in favour of Deepvariant - Introduces possibility to store singularity images locally as a .sif file - Increased allele frequency cut off for when a variant is filtered out to 0.7 +- Turned off Star_caller and Telomerecat by default ### Tools diff --git a/definitions/rd_dna_parameters.yaml b/definitions/rd_dna_parameters.yaml index 3e8c11c7c..9ce0a2440 100755 --- a/definitions/rd_dna_parameters.yaml +++ b/definitions/rd_dna_parameters.yaml @@ -671,7 +671,7 @@ star_caller: associated_recipe: - mip data_type: SCALAR - default: 1 + default: 0 file_tag: _cyp2d6 outfile_suffix: ".tsv" program_executables: @@ -682,7 +682,7 @@ telomerecat_ar: associated_recipe: - mip data_type: SCALAR - default: 1 + default: 0 file_tag: _tel outfile_suffix: ".csv" program_executables: From 055fd037f58fb3fe16bd382bcf9d0fadabba8a4c Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 30 Mar 2022 18:41:16 +0200 Subject: [PATCH 115/116] linting chromograph dockerfile --- containers/chromograph/Dockerfile | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/containers/chromograph/Dockerfile b/containers/chromograph/Dockerfile index d0c18bc52..b330eacc8 100644 --- a/containers/chromograph/Dockerfile +++ b/containers/chromograph/Dockerfile @@ -17,20 +17,14 @@ RUN apt-get update --fix-missing && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -RUN conda install pip python=3.9 matplotlib +RUN conda install pip python=3.9 matplotlib && \ + /opt/conda/bin/conda clean -ya -## Clean up after conda -RUN /opt/conda/bin/conda clean -ya +WORKDIR /opt/conda/share -## Download release -RUN wget --no-verbose https://github.com/mikaell/chromograph/archive/refs/tags/v1.1.4.zip -O /opt/conda/share/chromograph-1.1.4.zip - -## Extract -RUN unzip -o /opt/conda/share/chromograph-1.1.4.zip -d /opt/conda/share/ && \ - rm /opt/conda/share/chromograph-1.1.4.zip - -## Move to chromograph directory -RUN cd /opt/conda/share/chromograph-1.1.4 && \ +RUN wget --no-verbose https://github.com/mikaell/chromograph/archive/refs/tags/v1.1.4.zip && \ + unzip v1.1.4.zip && \ + cd chromograph-1.1.4 && \ python -m pip install --no-cache-dir . WORKDIR /data/ From e160280d9336918badf310478b593282eb612968 Mon Sep 17 00:00:00 2001 From: jemten Date: Thu, 31 Mar 2022 09:49:00 +0200 Subject: [PATCH 116/116] bumbing test coverage --- t/mip_analyse_rd_rna.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/t/mip_analyse_rd_rna.test b/t/mip_analyse_rd_rna.test index c6d0feb3c..f2880587d 100644 --- a/t/mip_analyse_rd_rna.test +++ b/t/mip_analyse_rd_rna.test @@ -92,6 +92,8 @@ my @base_cmds = ( q{--ifd}, catfile( $cluster_constant_path, qw{ 118-rnatest test_data 118-2-2U fastq=118-2-2U } ), qw{--dra --vb }, + q{--stringtie_ar}, + 2, ); my %genome_build_config = (