Skip to content

Commit

Permalink
adds optiion to run gatk for mt variant calling
Browse files Browse the repository at this point in the history
  • Loading branch information
jemten committed Feb 4, 2022
1 parent 5c8c075 commit 2bcfaba
Show file tree
Hide file tree
Showing 10 changed files with 182 additions and 45 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Expand Up @@ -8,7 +8,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
- HmtNote: annotate mitochondrial variants in VCF file
- Updating to latest and greatest versions
- Mitochondrial deletion analysis
- GATK Haplotypecaller has been turned off in favour of Deepvariant
- GATK Haplotypecaller has been turned off in favour of Deepvariant (for all contigs but the mitochondria).
- Introduces possibility to store singularity images locally as a .sif file
- Increased allele frequency cut off for when a variant is filtered out to 0.7

Expand Down
40 changes: 24 additions & 16 deletions definitions/rd_dna_parameters.yaml
Expand Up @@ -163,11 +163,11 @@ recipe_core_number:
fastqc_ar: 0
variant_annotation: 13
frequency_filter: 13
gatk_baserecalibration: 13
gatk_baserecalibration: 1
gatk_combinevariantcallsets: 1
gatk_gathervcfs: 1
gatk_genotypegvcfs: 1
gatk_haplotypecaller: 16
gatk_haplotypecaller: 1
gatk_variantevalall: 1
gatk_variantevalexome: 1
gatk_variantrecalibration: 1
Expand Down Expand Up @@ -234,11 +234,11 @@ recipe_memory:
delly_reformat: 3
endvariantannotationblock: 8
expansionhunter: 2
gatk_baserecalibration: 10
gatk_baserecalibration: 7
gatk_combinevariantcallsets: 25
gatk_gathervcfs: 7
gatk_genotypegvcfs: 12
gatk_variantrecalibration: 30
gatk_genotypegvcfs: 8
gatk_variantrecalibration: 10
glnexus_merge: 10
markduplicates: 10
mitodel: 2
Expand Down Expand Up @@ -285,14 +285,14 @@ recipe_time:
fastqc_ar: 10
variant_annotation: 3
frequency_filter: 2
gatk_baserecalibration: 20
gatk_baserecalibration: 2
gatk_combinevariantcallsets: 2
gatk_genotypegvcfs: 10
gatk_gathervcfs: 2
gatk_haplotypecaller: 30
gatk_genotypegvcfs: 1
gatk_gathervcfs: 1
gatk_haplotypecaller: 1
gatk_variantevalall: 2
gatk_variantevalexome: 2
gatk_variantrecalibration: 10
gatk_variantrecalibration: 1
glnexus_merge: 5
gzip_fastq: 2
manta: 30
Expand Down Expand Up @@ -350,6 +350,13 @@ picardtools_path:
- sv_reformat
data_type: SCALAR
type: path
call_mt_variants_with_gatk:
associated_recipe:
- mip
data_type: SCALAR
default: 1
mandatory: no
type: mip
### Programs
## Gzip
gzip_fastq:
Expand Down Expand Up @@ -523,7 +530,7 @@ gatk_baserecalibration:
associated_recipe:
- mip
data_type: SCALAR
default: 0
default: 1
file_tag: _brecal
outfile_suffix: ".bam"
program_executables:
Expand Down Expand Up @@ -1192,7 +1199,7 @@ gatk_haplotypecaller:
associated_recipe:
- mip
data_type: SCALAR
default: 0
default: 1
file_tag: _haptc
outfile_suffix: ".vcf"
program_executables:
Expand Down Expand Up @@ -1263,7 +1270,7 @@ gatk_genotypegvcfs:
associated_recipe:
- mip
data_type: SCALAR
default: 0
default: 1
file_tag: _gent
outfile_suffix: ".vcf"
program_executables:
Expand All @@ -1288,7 +1295,7 @@ gatk_gathervcfs:
associated_recipe:
- mip
data_type: SCALAR
default: 0
default: 1
file_tag: ""
outfile_suffix: ".vcf"
program_executables:
Expand All @@ -1306,7 +1313,7 @@ gatk_variantrecalibration:
associated_recipe:
- mip
data_type: SCALAR
default: 0
default: 1
file_tag: _vrecal
program_executables:
- bcftools
Expand Down Expand Up @@ -1459,14 +1466,15 @@ gatk_combinevariants_prioritize_caller:
associated_recipe:
- gatk_combinevariantcallsets
data_type: SCALAR
default: deepvariant
default: haplotypecaller,deepvariant
mandatory: no
type: recipe_argument
gatk_combinevariants_callers_to_combine:
associated_recipe:
- gatk_combinevariantcallsets
data_type: ARRAY
default:
- gatk_variantrecalibration
- glnexus_merge
mandatory: no
type: recipe_argument
Expand Down
70 changes: 69 additions & 1 deletion lib/MIP/Analysis.pm
Expand Up @@ -15,7 +15,7 @@ use warnings qw{ FATAL utf8 };

## CPANM
use autodie;
use List::MoreUtils qw{ all any };
use List::MoreUtils qw{ all any uniq };
use Log::Log4perl;
use Readonly;

Expand Down Expand Up @@ -48,6 +48,7 @@ BEGIN {
update_recipe_mode_for_fastq_compatibility
update_recipe_mode_for_pedigree
update_recipe_mode_for_wes
update_recipe_mode_for_mt
};
}

Expand Down Expand Up @@ -1404,6 +1405,73 @@ sub update_recipe_mode_for_wes {
return @warning_msgs;
}

sub update_recipe_mode_for_mt {

##Function : Update recipe mode for MT variant calling
##Returns :
##Arguments: $active_parameter_href => Active parameters for this analysis hash {REF}
## : $consensus_analysis_type => Consensus analysis_type
## : $recipes_ref => Recipes to update {REF}

my ($arg_href) = @_;

## Flatten argument(s)
my $active_parameter_href;
my $consensus_analysis_type;
my $recipes_ref;

my $tmpl = {
active_parameter_href => {
default => {},
defined => 1,
required => 1,
store => \$active_parameter_href,
strict_type => 1,
},
consensus_analysis_type => {
defined => 1,
required => 1,
store => \$consensus_analysis_type,
strict_type => 1,
},
recipes_ref => {
default => [],
defined => 1,
required => 1,
store => \$recipes_ref,
strict_type => 1,
},
};

check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!};

return
if ( ( $active_parameter_href->{call_mt_variants_with_gatk} )
&& ( $consensus_analysis_type eq q{wgs} ) );

my $log = Log::Log4perl->get_logger($LOG_NAME);

RECIPE:
foreach my $recipe ( @{$recipes_ref} ) {

$active_parameter_href->{$recipe} = 0;

$log->warn(qq{Turned off $recipe for MT variant calling});
}

my @variant_callers = split $COMMA,
$active_parameter_href->{gatk_combinevariants_prioritize_caller};
my @prioritized_callers = grep { !/haplotypecaller/xms } @variant_callers;
$active_parameter_href->{gatk_combinevariants_prioritize_caller} = join q{,},
@prioritized_callers;

my @callers_to_combine = grep { !/gatk_variantrecalibration/xms }
@{ $active_parameter_href->{gatk_combinevariants_callers_to_combine} };
@{ $active_parameter_href->{gatk_combinevariants_callers_to_combine} } = @callers_to_combine;

return;
}

sub _parse_parameter_to_broadcast {

## Function : Parse parameter to broadcast
Expand Down
8 changes: 8 additions & 0 deletions lib/MIP/Cli/Mip/Analyse/Rd_dna.pm
Expand Up @@ -185,6 +185,14 @@ q{gatk_baserecalibration_known_sites, gatk_haplotypecaller_snp_known_set, gatk_v
)
);

option(
q{call_mt_variants_with_gatk} => (
documentation => q{Use GATK when calling MT variants},
is => q{rw},
isa => Bool,
)
);

option(
q{gatk_disable_auto_index_and_file_lock} => (
cmd_flag => q{gatk_dis_auto_ind_fl},
Expand Down
14 changes: 10 additions & 4 deletions lib/MIP/Recipes/Analysis/Gatk_baserecalibration.pm
Expand Up @@ -6,6 +6,7 @@ use charnames qw{ :full :short };
use English qw{ -no_match_vars };
use open qw{ :encoding(UTF-8) :std };
use File::Spec::Functions qw{ catdir catfile };
use List::Util qw{ first };
use Params::Check qw{ allow check last_error };
use POSIX;
use utf8;
Expand Down Expand Up @@ -190,6 +191,11 @@ sub analysis_gatk_baserecalibration {
sample_id => $sample_id,
}
);
my @contigs_size_ordered = @{ $file_info_href->{bam_contigs_size_ordered} };
if ( $active_parameter_href->{call_mt_variants_with_gatk} ) {

@contigs_size_ordered = ( $contigs_size_ordered[-1] );
}

## Outpaths
## Assign suffix
Expand All @@ -202,7 +208,7 @@ sub analysis_gatk_baserecalibration {
map {
catdir( $outsample_directory,
$merged_infile_prefix . $outfile_tag . $DOT . $_ . $outfile_suffix )
} @{ $file_info_href->{bam_contigs_size_ordered} };
} @contigs_size_ordered;

## Set and get the io files per chain, id and stream
%io = (
Expand Down Expand Up @@ -252,7 +258,7 @@ sub analysis_gatk_baserecalibration {
my %gatk_intervals = get_gatk_intervals(
{
analysis_type => $analysis_type,
contigs_ref => \@{ $file_info_href->{bam_contigs_size_ordered} },
contigs_ref => \@contigs_size_ordered,
exome_target_bed_href => $active_parameter_href->{exome_target_bed},
filehandle => $filehandle,
file_ending => $file_info_href->{exome_target_bed}[0],
Expand Down Expand Up @@ -291,7 +297,7 @@ sub analysis_gatk_baserecalibration {

my @base_quality_score_recalibration_files;
CONTIG:
foreach my $contig ( @{ $file_info_href->{bam_contigs_size_ordered} } ) {
foreach my $contig (@contigs_size_ordered) {

my $base_quality_score_recalibration_file =
$outfile_path_prefix . $DOT . $contig . $DOT . q{grp};
Expand Down Expand Up @@ -347,7 +353,7 @@ sub analysis_gatk_baserecalibration {
);

CONTIG:
foreach my $contig ( @{ $file_info_href->{bam_contigs_size_ordered} } ) {
foreach my $contig (@contigs_size_ordered) {

my $stderrfile_path = $xargs_file_path_prefix . $DOT . $contig . $DOT . q{stderr.txt};
gatk_applybqsr(
Expand Down
12 changes: 9 additions & 3 deletions lib/MIP/Recipes/Analysis/Gatk_gathervcfs.pm
Expand Up @@ -192,9 +192,13 @@ sub analysis_gatk_gathervcfs {
}
);

my @contigs =
$active_parameter_href->{call_mt_variants_with_gatk}
? ( $file_info_href->{contigs}[-1] )
: @{ $file_info_href->{contigs} };

## Gather vcf files
my @gatk_infile_paths =
map { $infile_path{$_} } @{ $file_info_href->{contigs} };
my @gatk_infile_paths = map { $infile_path{$_} } @contigs;

## GATK GatherVcfsCloud
gatk_gathervcfscloud(
Expand All @@ -214,7 +218,9 @@ sub analysis_gatk_gathervcfs {
if ( $active_parameter_href->{gatk_gathervcfs_bcf_file} ) {

# Exome or panel analysis
if ( $consensus_analysis_type =~ /wes|panel/xms ) {
if ( ( $consensus_analysis_type =~ /wes|panel/xms )
|| $active_parameter_href->{call_mt_variants_with_gatk} )
{

say {$filehandle} q{### Remove extra reference samples};
say {$filehandle} q{## GATK SelectVariants};
Expand Down
15 changes: 12 additions & 3 deletions lib/MIP/Recipes/Analysis/Gatk_genotypegvcfs.pm
Expand Up @@ -192,8 +192,13 @@ sub analysis_gatk_genotypegvcfs {
}
);

my @contigs =
$active_parameter_href->{call_mt_variants_with_gatk}
? ( $file_info_href->{contigs}[-1] )
: @{ $file_info_href->{contigs} };

CONTIG:
foreach my $contig ( @{ $file_info_href->{contigs} } ) {
foreach my $contig (@contigs) {

## Creates recipe directories (info & data & script), recipe script filenames and writes sbatch header
my ($recipe_file_path) = setup_script(
Expand Down Expand Up @@ -233,7 +238,9 @@ sub analysis_gatk_genotypegvcfs {
stream => q{in},
}
);
if ( $consensus_analysis_type =~ /wes|panel/xms ) {
if ( ( $consensus_analysis_type =~ /wes|panel/xms )
|| $active_parameter_href->{call_mt_variants_with_gatk} )
{

push @sample_vcf_path_lines,
$sample_id . $TAB . $sample_io{in}{file_path} . $NEWLINE;
Expand Down Expand Up @@ -264,7 +271,9 @@ sub analysis_gatk_genotypegvcfs {
say {$filehandle} q{## GATK GenomicsDBImport};

## Files to import into GenomicsDB
if ( $consensus_analysis_type =~ /wes|panel/xms ) {
if ( ( $consensus_analysis_type =~ /wes|panel/xms )
|| $active_parameter_href->{call_mt_variants_with_gatk} )
{

$sample_name_map_path = catfile( $outdir_path_prefix, q{analysis_sample_map.txt} );
my $echo_outfile_path = catfile( $outdir_path_prefix, q{dynamic_sample_map.txt} );
Expand Down

0 comments on commit 2bcfaba

Please sign in to comment.