Permalink
Browse files

add SpliceRegion plugin

  • Loading branch information...
1 parent bab925d commit 8c906a973a9b221c2ca1d58aa1aae41f0706a492 @willmclaren willmclaren committed May 3, 2017
Showing with 171 additions and 0 deletions.
  1. +160 −0 SpliceRegion.pm
  2. +11 −0 plugin_config.txt
View
@@ -0,0 +1,160 @@
+=head1 LICENSE
+
+Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
+Copyright [2016-2017] EMBL-European Bioinformatics Institute
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+=head1 CONTACT
+
+ Ensembl <dev@ensembl.org>
+
+=cut
+
+=head1 NAME
+
+ SpliceRegion
+
+=head1 SYNOPSIS
+
+ mv SpliceRegion.pm ~/.vep/Plugins
+ ./vep -i variations.vcf --plugin SpliceRegion
+
+=head1 DESCRIPTION
+
+ This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
+ provides more granular predictions of splicing effects.
+
+ Three additional terms may be added:
+
+ # splice_donor_5th_base_variant : variant falls in the 5th base after the splice donor junction (5' end of intron)
+
+ v
+ ...EEEEEIIIIIIIIII...
+
+ (E = exon, I = intron, v = variant location)
+
+ # splice_donor_region_variant : variant falls in region between 3rd and 6th base after splice junction (5' end of intron)
+
+ vv vvv
+ ...EEEEEIIIIIIIIII...
+
+ # splice_polypyrimidine_tract_variant : variant falls in polypyrimidine tract at 3' end of intron, between 17 and 3 bases from the end
+
+ vvvvvvvvvvvvvvv
+ ...IIIIIIIIIIIIIIIIIIIIEEEEE...
+
+
+=cut
+
+package SpliceRegion;
+
+use strict;
+use warnings;
+
+use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap);
+use Bio::EnsEMBL::Variation::Utils::Constants qw(%OVERLAP_CONSEQUENCES);
+
+use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
+
+my %TERM_RANK = (
+ splice_donor_5th_base_variant => 1,
+ splice_donor_region_variant => 2,
+ splice_polypyrimidine_tract_variant => 3,
+);
+
+sub feature_types {
+ return ['Transcript'];
+}
+
+sub get_header_info {
+ return {
+ SpliceRegion => "SpliceRegion predictions",
+ };
+}
+
+sub run {
+ my ($self, $tva) = @_;
+
+ my $vf = $tva->variation_feature;
+ my ($vf_start, $vf_end) = ($vf->{start}, $vf->{end});
+
+ my $is_insertion = 0;
+ if($vf_start > $vf_end) {
+ ($vf_start, $vf_end) = ($vf_end, $vf_start);
+ $is_insertion = 1;
+ }
+
+ my $tv = $tva->transcript_variation;
+ my $tr = $tv->transcript;
+ my $vf_tr_seq = $tva->feature_seq;
+
+ # define some variables depending on transcript strand
+ my ($strand_mod, $donor_coord, $acc_coord);
+ if($tr->strand > 0) {
+ $strand_mod = 1;
+ $donor_coord = 'start';
+ $acc_coord = 'end';
+ }
+ else {
+ $strand_mod = -1;
+ $donor_coord = 'end';
+ $acc_coord = 'start';
+ }
+
+ my %results;
+
+ for my $intron(@{$tv->_overlapped_introns($vf_start, $vf_end)}) {
+
+ # define terms to check for and their regions
+ my @terms = (
+ {
+ term => 'splice_donor_5th_base_variant',
+ region => [$intron->{$donor_coord} + (4 * $strand_mod), $intron->{$donor_coord} + (4 * $strand_mod)]
+ },
+ {
+ term => 'splice_donor_region_variant',
+ region => [$intron->{$donor_coord} + (2 * $strand_mod), $intron->{$donor_coord} + (5 * $strand_mod)]
+ },
+ {
+ term => 'splice_polypyrimidine_tract_variant',
+ region => [$intron->{$acc_coord} + (-16 * $strand_mod), $intron->{$acc_coord} + (-2 * $strand_mod)],
+ # allele_specific_mod => {
+ # A => '_to_purine',
+ # G => '_to_purine',
+ # }
+ },
+ );
+
+ foreach my $term_hash(@terms) {
+ my $pass = overlap($vf_start, $vf_end, sort {$a <=> $b} @{$term_hash->{region}});
+ if($pass) {
+ my $term = $term_hash->{term};
+
+ # if(my $allele_specific_mods = $term_hash->{allele_specific_mod}) {
+ # $term .= $allele_specific_mods->{$vf_tr_seq} || '';
+ # }
+
+ $results{$term}++;
+ last;
+ }
+ }
+ }
+
+ return {} unless %results;
+
+ return { SpliceRegion => [sort {$TERM_RANK{$a} <=> $TERM_RANK{$b}} keys %results]};
+}
+
+1;
+
View
@@ -471,6 +471,17 @@ my $VEP_PLUGIN_CONFIG = {
]
},
+ # SpliceRegion
+ {
+ "key" => "SpliceRegion",
+ "label" => "SpliceRegion",
+ "helptip" => "More granular predictions of splicing effects",
+ "available" => 0,
+ "enabled" => 0,
+ "section" => "Splicing predictions",
+ "plugin_url" => "https://raw.githubusercontent.com/Ensembl/VEP_plugins/release/88/SpliceRegion.pm",
+ },
+
## CONSERVATION
###############

0 comments on commit 8c906a9

Please sign in to comment.