Skip to content

Commit

Permalink
feat(refactor): Added get_contig_set sub for getting primary contig v…
Browse files Browse the repository at this point in the history
…ersions and sets

- Added primary contig test fixture
- Added tests
  • Loading branch information
henrikstranneheim committed Dec 22, 2020
1 parent a6ca0fd commit 1661518
Show file tree
Hide file tree
Showing 7 changed files with 422 additions and 84 deletions.
57 changes: 51 additions & 6 deletions lib/MIP/Contigs.pm
Expand Up @@ -31,6 +31,7 @@ BEGIN {
delete_non_wes_contig
delete_male_contig
generate_contig_interval_file
get_contig_set
set_contigs
sort_contigs_to_contig_set
update_contigs_for_run
Expand Down Expand Up @@ -83,8 +84,7 @@ sub check_select_file_contigs {

$log->fatal( q{Option 'vcfparser_select_file' contig(s): } . join $SPACE,
@unique_select_contigs );
$log->fatal(
q{Is not a subset of the human genome reference contigs: } . join $SPACE,
$log->fatal( q{Is not a subset of the human genome reference contigs: } . join $SPACE,
@{$contigs_ref} );
exit 1;
}
Expand Down Expand Up @@ -357,6 +357,52 @@ sub generate_contig_interval_file {
return %bed_file_path;
}

sub get_contig_set {

## Function : Get contig set per genome build version
## Returns : @{$PRIMARY_CONTIG{$version}{$set}} | %{$PRIMARY_CONTIG{$version}}
## Arguments: $set => Name of contig set to get
## : $version => Version of the human genome reference

my ($arg_href) = @_;

## Flatten argument(s)
my $set;
my $version;

my $tmpl = {
set => {
allow => [qw{ contigs contigs_size_ordered synonyms_map }],
defined => 1,
store => \$set,
strict_type => 1,
},
version => {
defined => 1,
required => 1,
store => \$version,
strict_type => 1,
},
};

check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!};

use MIP::Validate::Data qw{ %constraint };

return if ( not exists $PRIMARY_CONTIG{$version} );

if ($set) {

return @{ $PRIMARY_CONTIG{$version}{$set} }
if ( ref $PRIMARY_CONTIG{$version}{$set} eq q{ARRAY} );

return %{ $PRIMARY_CONTIG{$version}{$set} }
if ( ref $PRIMARY_CONTIG{$version}{$set} eq q{HASH} );

}
return %{ $PRIMARY_CONTIG{$version} };
}

sub set_contigs {

## Function : Set contig prefix and contig names depending on reference used.
Expand Down Expand Up @@ -414,8 +460,8 @@ sub set_contigs {

set_primary_contigs(
{
file_info_href => $file_info_href,
primary_contigs_ref => \@{ $primary_contig_clone{$version}{$contig_set} },
file_info_href => $file_info_href,
primary_contigs_ref => \@{ $primary_contig_clone{$version}{$contig_set} },
primary_contig_set_name => $contig_set,
}
);
Expand Down Expand Up @@ -683,8 +729,7 @@ sub _split_interval_file_contigs {

if ( defined $contig ) {

$outfile_path =
defined $file_ending ? $outfile_path . $file_ending : $outfile_path;
$outfile_path = defined $file_ending ? $outfile_path . $file_ending : $outfile_path;

perl_nae_oneliners(
{
Expand Down
44 changes: 25 additions & 19 deletions lib/MIP/File/Format/Vep.pm
Expand Up @@ -17,7 +17,7 @@ use autodie qw{ :all };
use Readonly;

## MIPs lib/
use MIP::Constants qw{ $NEWLINE %PRIMARY_CONTIG $SPACE $TAB };
use MIP::Constants qw{ $LOG_NAME $NEWLINE %PRIMARY_CONTIG $SPACE $TAB };

BEGIN {
require Exporter;
Expand All @@ -31,25 +31,18 @@ sub create_vep_synonyms_file {

## Function : Create the synonyms file for VEP option '--synonyms'
## Returns : undef or $outfile_path
## Arguments: $log => Log object
## : $outfile_path => Outfile path to write to
## Arguments: $outfile_path => Outfile path to write to
## : $version => Human genome version {REF}

my ($arg_href) = @_;

## Flatten argument(s)
my $log;
my $outfile_path;
my $version;

## Default(s)

my $tmpl = {
log => {
defined => 1,
required => 1,
store => \$log,
},
outfile_path => {
defined => 1,
required => 1,
Expand All @@ -66,33 +59,46 @@ sub create_vep_synonyms_file {

check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!};

## Make local copy
my %primary_contig = Readonly::Clone %PRIMARY_CONTIG;
use MIP::Contigs qw{ get_contig_set };

my $log = Log::Log4perl->get_logger($LOG_NAME);

my %contig_synonyms_map = get_contig_set(
{
set => q{synonyms_map},
version => $version,
}
);

return if ( not %contig_synonyms_map );

## No defined synonyms map
return if ( not exists $primary_contig{$version}{synonyms_map} );
my @contigs = get_contig_set(
{
set => q{contigs},
version => $version,
}
);

$log->info( q{Creating VEP synonyms file: } . $outfile_path, $NEWLINE );

## Create dir if it does not exists
make_path( dirname($outfile_path) );

open my $filehandle_SYS, q{>}, $outfile_path
open my $filehandle, q{>}, $outfile_path
or $log->logdie(qq{Cannot open $outfile_path: $ERRNO });

CONTIG:
foreach my $primary_contig ( @{ $primary_contig{$version}{contigs} } ) {
foreach my $primary_contig (@contigs) {

## Unpack
my $synonymous_contig = $primary_contig{$version}{synonyms_map}{$primary_contig};
my $synonymous_contig = $contig_synonyms_map{$primary_contig};

## No defined synonym
next CONTIG if ( not $synonymous_contig );

say {$filehandle_SYS} $primary_contig . $TAB . $synonymous_contig;
say {$filehandle} $primary_contig . $TAB . $synonymous_contig;
}
$log->info( q{Wrote: } . $outfile_path, $NEWLINE );
close $filehandle_SYS;
close $filehandle;
return $outfile_path;
}

Expand Down
96 changes: 42 additions & 54 deletions lib/MIP/Recipes/Analysis/Vep.pm
Expand Up @@ -162,7 +162,6 @@ sub analysis_vep_wgs {
## Constants
Readonly my $VEP_FORK_NUMBER => 4;

## Retrieve logger object
my $log = Log::Log4perl->get_logger($LOG_NAME);

## Unpack parameters
Expand Down Expand Up @@ -268,7 +267,6 @@ sub analysis_vep_wgs {
## Get the vep synonyms file path for if required (grch38)
my $vep_synonyms_file_path = create_vep_synonyms_file(
{
log => $log,
outfile_path => catfile( $outdir_path_prefix, q{synonyms.tsv} ),
version => $genome_reference_version,
}
Expand Down Expand Up @@ -334,10 +332,8 @@ sub analysis_vep_wgs {
}
}

my $stderrfile_path =
$xargs_file_path_prefix . $DOT . $contig . $DOT . q{stderr.txt};
my $stdoutfile_path =
$xargs_file_path_prefix . $DOT . $contig . $DOT . q{stdout.txt};
my $stderrfile_path = $xargs_file_path_prefix . $DOT . $contig . $DOT . q{stderr.txt};
my $stdoutfile_path = $xargs_file_path_prefix . $DOT . $contig . $DOT . q{stdout.txt};
variant_effect_predictor(
{
assembly => $assembly_version,
Expand All @@ -353,12 +349,12 @@ sub analysis_vep_wgs {
outfile_path => $outfile_path{$contig},
plugins_dir_path => $active_parameter_href->{vep_plugins_dir_path},
plugins_ref => \@plugins,
reference_path => $active_parameter_href->{human_genome_reference},
regions_ref => [$contig],
stderrfile_path => $stderrfile_path,
stdoutfile_path => $stdoutfile_path,
synonyms_file_path => $vep_synonyms_file_path,
vep_features_ref => \@vep_features_ref,
reference_path => $active_parameter_href->{human_genome_reference},
regions_ref => [$contig],
stderrfile_path => $stderrfile_path,
stdoutfile_path => $stdoutfile_path,
synonyms_file_path => $vep_synonyms_file_path,
vep_features_ref => \@vep_features_ref,
}
);
say {$xargsfilehandle} $NEWLINE;
Expand Down Expand Up @@ -391,13 +387,13 @@ sub analysis_vep_wgs {

submit_recipe(
{
base_command => $profile_base_command,
case_id => $case_id,
dependency_method => q{sample_to_case},
job_id_chain => $job_id_chain,
job_id_href => $job_id_href,
job_reservation_name => $active_parameter_href->{job_reservation_name},
log => $log,
base_command => $profile_base_command,
case_id => $case_id,
dependency_method => q{sample_to_case},
job_id_chain => $job_id_chain,
job_id_href => $job_id_href,
job_reservation_name => $active_parameter_href->{job_reservation_name},
log => $log,
max_parallel_processes_count_href =>
$file_info_href->{max_parallel_processes_count},
recipe_file_path => $recipe_file_path,
Expand Down Expand Up @@ -516,8 +512,7 @@ sub analysis_vep_sv_wes {
use MIP::Processmanagement::Processes qw{ submit_recipe };
use MIP::Program::Vep qw{ variant_effect_predictor };
use MIP::Script::Setup_script qw{ setup_script };
use MIP::Sample_info
qw{ set_recipe_metafile_in_sample_info set_recipe_outfile_in_sample_info };
use MIP::Sample_info qw{ set_recipe_metafile_in_sample_info set_recipe_outfile_in_sample_info };

### PREPROCESSING:

Expand Down Expand Up @@ -640,7 +635,6 @@ sub analysis_vep_sv_wes {
## Get the vep synonyms file path for if required (grch38)
my $vep_synonyms_file_path = create_vep_synonyms_file(
{
log => $log,
outfile_path => catfile( $outdir_path_prefix, q{synonyms.tsv} ),
version => $genome_reference_version,
}
Expand Down Expand Up @@ -676,8 +670,7 @@ sub analysis_vep_sv_wes {
push @vep_features_ref, $vep_feature;
}

my $vep_infile_path =
$infile_path_prefix . $UNDERSCORE . q{fixedsvlength} . $infile_suffix;
my $vep_infile_path = $infile_path_prefix . $UNDERSCORE . q{fixedsvlength} . $infile_suffix;
my $stderrfile_path = $recipe_file_path . $DOT . q{stderr.txt};
my $stdoutfile_path = $recipe_file_path . $DOT . q{stdout.txt};
variant_effect_predictor(
Expand Down Expand Up @@ -716,13 +709,13 @@ sub analysis_vep_sv_wes {
);
submit_recipe(
{
base_command => $profile_base_command,
case_id => $case_id,
dependency_method => q{sample_to_case},
job_id_chain => $job_id_chain,
job_id_href => $job_id_href,
job_reservation_name => $active_parameter_href->{job_reservation_name},
log => $log,
base_command => $profile_base_command,
case_id => $case_id,
dependency_method => q{sample_to_case},
job_id_chain => $job_id_chain,
job_id_href => $job_id_href,
job_reservation_name => $active_parameter_href->{job_reservation_name},
log => $log,
max_parallel_processes_count_href =>
$file_info_href->{max_parallel_processes_count},
recipe_file_path => $recipe_file_path,
Expand Down Expand Up @@ -840,8 +833,7 @@ sub analysis_vep_sv_wgs {
use MIP::Processmanagement::Processes qw{ submit_recipe };
use MIP::Program::Vep qw{ variant_effect_predictor };
use MIP::Recipes::Analysis::Xargs qw{ xargs_command };
use MIP::Sample_info
qw{ set_recipe_metafile_in_sample_info set_recipe_outfile_in_sample_info };
use MIP::Sample_info qw{ set_recipe_metafile_in_sample_info set_recipe_outfile_in_sample_info };
use MIP::Script::Setup_script qw{ setup_script };

### PREPROCESSING:
Expand Down Expand Up @@ -970,7 +962,6 @@ sub analysis_vep_sv_wgs {
## Get the vep synonyms file path for if required (grch38)
my $vep_synonyms_file_path = create_vep_synonyms_file(
{
log => $log,
outfile_path => catfile( $outdir_path_prefix, q{synonyms.tsv} ),
version => $genome_reference_version,
}
Expand All @@ -985,8 +976,7 @@ sub analysis_vep_sv_wgs {
## Get genome source and version to be compatible with VEP
$assembly_version = _get_assembly_name( { assembly_version => $assembly_version, } );

my $vep_infile_path =
$infile_path_prefix . $UNDERSCORE . q{fixedsvlength} . $infile_suffix;
my $vep_infile_path = $infile_path_prefix . $UNDERSCORE . q{fixedsvlength} . $infile_suffix;

# VEP custom annotations
my @custom_annotations = _get_custom_annotation_cmds(
Expand Down Expand Up @@ -1112,13 +1102,13 @@ sub analysis_vep_sv_wgs {
);
submit_recipe(
{
base_command => $profile_base_command,
case_id => $case_id,
dependency_method => q{sample_to_case},
job_id_chain => $job_id_chain,
job_id_href => $job_id_href,
job_reservation_name => $active_parameter_href->{job_reservation_name},
log => $log,
base_command => $profile_base_command,
case_id => $case_id,
dependency_method => q{sample_to_case},
job_id_chain => $job_id_chain,
job_id_href => $job_id_href,
job_reservation_name => $active_parameter_href->{job_reservation_name},
log => $log,
max_parallel_processes_count_href =>
$file_info_href->{max_parallel_processes_count},
recipe_file_path => $recipe_file_path,
Expand Down Expand Up @@ -1317,7 +1307,6 @@ sub analysis_vep {
## Get the vep synonyms file path for if required (grch38)
my $vep_synonyms_file_path = create_vep_synonyms_file(
{
log => $log,
outfile_path => catfile( $outdir_path_prefix, q{synonyms.tsv} ),
version => $genome_reference_version,
}
Expand Down Expand Up @@ -1436,13 +1425,13 @@ sub analysis_vep {

submit_recipe(
{
base_command => $profile_base_command,
case_id => $case_id,
dependency_method => q{sample_to_case},
job_id_chain => $job_id_chain,
job_id_href => $job_id_href,
job_reservation_name => $active_parameter_href->{job_reservation_name},
log => $log,
base_command => $profile_base_command,
case_id => $case_id,
dependency_method => q{sample_to_case},
job_id_chain => $job_id_chain,
job_id_href => $job_id_href,
job_reservation_name => $active_parameter_href->{job_reservation_name},
log => $log,
max_parallel_processes_count_href =>
$file_info_href->{max_parallel_processes_count},
recipe_file_path => $recipe_file_path,
Expand Down Expand Up @@ -1486,8 +1475,7 @@ sub _get_custom_annotation_cmds {
foreach my $annotation_href ( values %{$vep_custom_annotation_href} ) {

## Remove all undef elements and then join
my $cmd = join $COMMA,
grep { defined } @{$annotation_href}{@order_custom_options};
my $cmd = join $COMMA, grep { defined } @{$annotation_href}{@order_custom_options};
push @custom_annotations, $cmd;
}
return @custom_annotations;
Expand Down Expand Up @@ -1639,7 +1627,7 @@ q?foreach my $bit (split /\;/, $data[7]) { my ($key, $value) = split /\=/, $bit;
# Add $end position
$perl_fix_sv_nolengths .= q?if(defined($info{END})) { $end = $info{END}; } ?;

# If SV, strip SV type entry and check if no length, then do not print variant else print
# If SV, strip SV type entry and check if no length, then do not print variant else print
$perl_fix_sv_nolengths .=
q?if($alt=~ /\<|\[|\]|\>/) { $alt=~ s/\<|\>//g; $alt=~ s/\:.+//g; if($start >= $end && $alt=~ /del/i) {} else {print $_} } ?;

Expand Down

0 comments on commit 1661518

Please sign in to comment.