Skip to content

Commit

Permalink
use seperate list of so terms for sv
Browse files Browse the repository at this point in the history
  • Loading branch information
jemten committed Nov 25, 2022
1 parent 0148fb8 commit d2fc3bf
Show file tree
Hide file tree
Showing 7 changed files with 214 additions and 26 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
- Turn on Stringtie and gffcompare by default
- Run varg on research vcf
- Increase max for coverage calculation to 500x
- Separate list of ranked SO terms for structural variants to ensure that the right SO term gets picked as the most severe for SVs

### Tools

Expand Down
13 changes: 11 additions & 2 deletions lib/MIP/Cli/Mip/Vcfparser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ use MIP::Main::Vcfparser qw{ mip_vcfparser };

command_short_description(q{MIP vcfparser command});

command_long_description(
q{Entry point for splitting VCF into clinical and research variants});
command_long_description(q{Entry point for splitting VCF into clinical and research variants});

command_usage(q{vcfparser [options] infile.vcf [OPTIONS] > outfile.vcf});

Expand Down Expand Up @@ -60,6 +59,7 @@ sub run {
my $padding = $arg_href->{padding};
my $per_gene = $arg_href->{per_gene};
my $pli_values_file_path = $arg_href->{pli_values_file};
my $variant_type = $arg_href->{variant_type} // q{snv};
my $write_software_tag = $arg_href->{write_software_tag};
my $log_file = $arg_href->{log_file};

Expand Down Expand Up @@ -100,6 +100,7 @@ sub run {
select_feature_file => $select_feature_file,
select_feature_matching_column => $select_feature_matching_column,
select_outfile_path => $select_outfile,
variant_type => $variant_type,
write_software_tag => $write_software_tag,
}
);
Expand Down Expand Up @@ -223,6 +224,14 @@ sub _build_usage {
)
);

option(
q{variant_type} => (
documentation => q{Variant type to parse; snv(default)/sv},
is => q{rw},
isa => enum( [qw{ snv sv}] ),
)
);

option(
q{write_software_tag} => (
default => 1,
Expand Down
160 changes: 160 additions & 0 deletions lib/MIP/Constants.pm
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ BEGIN {
$SEMICOLON
$SINGLE_QUOTE
%SO_CONSEQUENCE_SEVERITY
%SO_CONSEQUENCE_SEVERITY_SV
$SPACE
$TAB
$TEST_MODE
Expand Down Expand Up @@ -246,6 +247,165 @@ Readonly our %SO_CONSEQUENCE_SEVERITY => (
},
);

Readonly our %SO_CONSEQUENCE_SEVERITY_SV => (
transcript_ablation => {
rank => 1,
genetic_region_annotation => q{exonic},
},
splice_acceptor_variant => {
rank => 2,
genetic_region_annotation => q{splicing},
},
splice_donor_variant => {
rank => 3,
genetic_region_annotation => q{splicing},
},
stop_gained => {
rank => 4,
genetic_region_annotation => q{exonic},
},
frameshift_variant => {
rank => 5,
genetic_region_annotation => q{exonic},
},
stop_lost => {
rank => 6,
genetic_region_annotation => q{exonic},
},
start_lost => {
rank => 7,
genetic_region_annotation => q{exonic},
},
coding_sequence_variant => {
rank => 8,
genetic_region_annotation => q{exonic},
},
transcript_amplification => {
rank => 9,
genetic_region_annotation => q{exonic},
},
inframe_insertion => {
rank => 10,
genetic_region_annotation => q{exonic},
},
inframe_deletion => {
rank => 11,
genetic_region_annotation => q{exonic},
},
missense_variant => {
rank => 12,
genetic_region_annotation => q{exonic},
},
protein_altering_variant => {
rank => 13,
genetic_region_annotation => q{exonic},
},
splice_region_variant => {
rank => 14,
genetic_region_annotation => q{splicing},
},
splice_donor_5th_base_variant => {
rank => 15,
genetic_region_annotation => q{splicing},
},
splice_donor_region_variant => {
rank => 16,
genetic_region_annotation => q{splicing},
},
splice_polypyrimidine_tract_variant => {
rank => 17,
genetic_region_annotation => q{splicing},
},
incomplete_terminal_codon_variant => {
rank => 18,
genetic_region_annotation => q{exonic},
},
non_coding_transcript_exon_variant => {
rank => 19,
genetic_region_annotation => q{ncRNA_exonic},
},
synonymous_variant => {
rank => 20,
genetic_region_annotation => q{exonic},
},
start_retained_variant => {
rank => 21,
genetic_region_annotation => q{exonic},
},
stop_retained_variant => {
rank => 22,
genetic_region_annotation => q{exonic},
},
mature_miRNA_variant => {
rank => 23,
genetic_region_annotation => q{ncRNA_exonic},
},
q{5_prime_UTR_variant} => {
rank => 24,
genetic_region_annotation => q{5UTR},
},
q{3_prime_UTR_variant} => {
rank => 25,
genetic_region_annotation => q{3UTR},
},
intron_variant => {
rank => 26,
genetic_region_annotation => q{intronic},
},
NMD_transcript_variant => {
rank => 27,
genetic_region_annotation => q{ncRNA},
},
non_coding_transcript_variant => {
rank => 28,
genetic_region_annotation => q{ncRNA},
},
upstream_gene_variant => {
rank => 29,
genetic_region_annotation => q{upstream},
},
downstream_gene_variant => {
rank => 30,
genetic_region_annotation => q{downstream},
},
TFBS_ablation => {
rank => 31,
genetic_region_annotation => q{TFBS},
},
TFBS_amplification => {
rank => 32,
genetic_region_annotation => q{TFBS},
},
TF_binding_site_variant => {
rank => 33,
genetic_region_annotation => q{TFBS},
},
regulatory_region_ablation => {
rank => 34,
genetic_region_annotation => q{regulatory_region},
},
regulatory_region_amplification => {
rank => 35,
genetic_region_annotation => q{regulatory_region},
},
feature_elongation => {
rank => 36,
genetic_region_annotation => q{genomic_feature},
},
regulatory_region_variant => {
rank => 37,
genetic_region_annotation => q{regulatory_region},
},
feature_truncation => {
rank => 38,
genetic_region_annotation => q{genomic_feature},
},
intergenic_variant => {
rank => 39,
genetic_region_annotation => q{intergenic},
},
);

## Contigs
Readonly our %PRIMARY_CONTIG => (
38 => {
Expand Down
19 changes: 14 additions & 5 deletions lib/MIP/Main/Vcfparser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ use autodie qw{ :all };
use Readonly;

## MIPs lib/
use MIP::Constants qw{ $COLON $LOG_NAME $NEWLINE %SO_CONSEQUENCE_SEVERITY $TAB };
use MIP::Constants
qw{ $COLON $LOG_NAME $NEWLINE %SO_CONSEQUENCE_SEVERITY %SO_CONSEQUENCE_SEVERITY_SV $TAB };
use MIP::File::Format::Feature_file qw{ read_feature_file };
use MIP::File::Format::Pli qw{ load_pli_file };
use MIP::Log::MIP_log4perl qw{ retrieve_log };
Expand Down Expand Up @@ -46,6 +47,7 @@ sub mip_vcfparser {
## : $select_feature_file => Select feature file
## : $select_feature_matching_column => Select feature matching column
## : $select_outfile_path => Select file path
## : $variant_type => Type of variants to parse
## : $vcf_in_fh => VCF in filehandle
## : $write_software_tag => Write software tag to vcf header switch

Expand All @@ -56,6 +58,7 @@ sub mip_vcfparser {
my $pli_values_file_path;
my $range_feature_annotation_columns_ref;
my $range_feature_file;
my $variant_type;
my $select_feature_annotation_columns_ref;
my $select_feature_matching_column;
my $select_outfile_path;
Expand Down Expand Up @@ -114,9 +117,14 @@ sub mip_vcfparser {
store => \$select_feature_matching_column,
strict_type => 1,
},
select_outfile_path => { store => \$select_outfile_path, strict_type => 1, },
vcf_in_fh => { defined => 1, required => 1, store => \$vcf_in_fh, },
write_software_tag => {
select_outfile_path => { store => \$select_outfile_path, strict_type => 1, },
variant_type => {
allow => [qw{ snv sv }],
store => \$variant_type,
strict_type => 1,
},
vcf_in_fh => { defined => 1, required => 1, store => \$vcf_in_fh, },
write_software_tag => {
allow => [ 0, 1 ],
default => 1,
store => \$write_software_tag,
Expand All @@ -127,7 +135,8 @@ sub mip_vcfparser {
check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!};

## Constants
my %consequence_severity = %SO_CONSEQUENCE_SEVERITY;
my %consequence_severity =
( $variant_type eq q{sv} ) ? %SO_CONSEQUENCE_SEVERITY_SV : %SO_CONSEQUENCE_SEVERITY;

my ( %meta_data, %range_data, %tree, %pli_score );

Expand Down
12 changes: 12 additions & 0 deletions lib/MIP/Program/Mip.pm
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ sub mip_vcfparser {
## : $stderrfile_path => Stderrfile path
## : $stderrfile_path_append => Append stderr info to file path
## : $stdoutfile_path => Stdoutfile path
## : $variant_type => Type of variants to parse <snv/sv>

my ($arg_href) = @_;

Expand All @@ -322,6 +323,7 @@ sub mip_vcfparser {
my $stderrfile_path;
my $stderrfile_path_append;
my $stdoutfile_path;
my $variant_type;

## Default(s)
my $padding;
Expand Down Expand Up @@ -393,6 +395,11 @@ sub mip_vcfparser {
store => \$stdoutfile_path,
strict_type => 1,
},
variant_type => {
allow => [ undef, qw{snv sv} ],
store => \$variant_type,
strict_type => 1,
},
};

check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!};
Expand Down Expand Up @@ -465,6 +472,11 @@ sub mip_vcfparser {
push @commands, q{--select_outfile} . $SPACE . $select_outfile;
}

if ($variant_type) {

push @commands, q{--variant_type} . $SPACE . $variant_type;
}

push @commands,
unix_standard_streams(
{
Expand Down
2 changes: 2 additions & 0 deletions lib/MIP/Recipes/Analysis/Mip_vcfparser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1073,6 +1073,7 @@ sub analysis_mip_vcfparser_sv_wes {
select_feature_file_path => $select_file,
select_feature_matching_column => $select_file_matching_column,
select_outfile => $select_outfile,
variant_type => q{sv},
}
);
say {$filehandle} $NEWLINE;
Expand Down Expand Up @@ -1411,6 +1412,7 @@ sub analysis_mip_vcfparser_sv_wgs {
select_feature_file_path => $select_file,
select_feature_matching_column => $select_file_matching_column,
select_outfile => $select_outfile,
variant_type => q{sv},
}
);

Expand Down

0 comments on commit d2fc3bf

Please sign in to comment.