Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| =head1 LICENSE | |
| Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute | |
| Copyright [2016-2017] EMBL-European Bioinformatics Institute | |
| Licensed under the Apache License, Version 2.0 (the "License"); | |
| you may not use this file except in compliance with the License. | |
| You may obtain a copy of the License at | |
| http://www.apache.org/licenses/LICENSE-2.0 | |
| Unless required by applicable law or agreed to in writing, software | |
| distributed under the License is distributed on an "AS IS" BASIS, | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| See the License for the specific language governing permissions and | |
| limitations under the License. | |
| =head1 CONTACT | |
| Ensembl <dev@ensembl.org> | |
| =cut | |
| =head1 NAME | |
| SpliceRegion | |
| =head1 SYNOPSIS | |
| mv SpliceRegion.pm ~/.vep/Plugins | |
| ./vep -i variations.vcf --plugin SpliceRegion | |
| =head1 DESCRIPTION | |
| This is a plugin for the Ensembl Variant Effect Predictor (VEP) that | |
| provides more granular predictions of splicing effects. | |
| Three additional terms may be added: | |
| # splice_donor_5th_base_variant : variant falls in the 5th base after the splice donor junction (5' end of intron) | |
| v | |
| ...EEEEEIIIIIIIIII... | |
| (E = exon, I = intron, v = variant location) | |
| # splice_donor_region_variant : variant falls in region between 3rd and 6th base after splice junction (5' end of intron) | |
| vv vvv | |
| ...EEEEEIIIIIIIIII... | |
| # splice_polypyrimidine_tract_variant : variant falls in polypyrimidine tract at 3' end of intron, between 17 and 3 bases from the end | |
| vvvvvvvvvvvvvvv | |
| ...IIIIIIIIIIIIIIIIIIIIEEEEE... | |
| =cut | |
| package SpliceRegion; | |
| use strict; | |
| use warnings; | |
| use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap); | |
| use Bio::EnsEMBL::Variation::Utils::Constants qw(%OVERLAP_CONSEQUENCES); | |
| use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin); | |
| my %TERM_RANK = ( | |
| splice_donor_5th_base_variant => 1, | |
| splice_donor_region_variant => 2, | |
| splice_polypyrimidine_tract_variant => 3, | |
| ); | |
| sub feature_types { | |
| return ['Transcript']; | |
| } | |
| sub get_header_info { | |
| return { | |
| SpliceRegion => "SpliceRegion predictions", | |
| }; | |
| } | |
| sub run { | |
| my ($self, $tva) = @_; | |
| my $vf = $tva->variation_feature; | |
| my ($vf_start, $vf_end) = ($vf->{start}, $vf->{end}); | |
| my $is_insertion = 0; | |
| if($vf_start > $vf_end) { | |
| ($vf_start, $vf_end) = ($vf_end, $vf_start); | |
| $is_insertion = 1; | |
| } | |
| my $tv = $tva->transcript_variation; | |
| my $tr = $tv->transcript; | |
| my $vf_tr_seq = $tva->feature_seq; | |
| # define some variables depending on transcript strand | |
| my ($strand_mod, $donor_coord, $acc_coord); | |
| if($tr->strand > 0) { | |
| $strand_mod = 1; | |
| $donor_coord = 'start'; | |
| $acc_coord = 'end'; | |
| } | |
| else { | |
| $strand_mod = -1; | |
| $donor_coord = 'end'; | |
| $acc_coord = 'start'; | |
| } | |
| my %results; | |
| for my $intron(@{$tv->_overlapped_introns($vf_start, $vf_end)}) { | |
| # define terms to check for and their regions | |
| my @terms = ( | |
| { | |
| term => 'splice_donor_5th_base_variant', | |
| region => [$intron->{$donor_coord} + (4 * $strand_mod), $intron->{$donor_coord} + (4 * $strand_mod)] | |
| }, | |
| { | |
| term => 'splice_donor_region_variant', | |
| region => [$intron->{$donor_coord} + (2 * $strand_mod), $intron->{$donor_coord} + (5 * $strand_mod)] | |
| }, | |
| { | |
| term => 'splice_polypyrimidine_tract_variant', | |
| region => [$intron->{$acc_coord} + (-16 * $strand_mod), $intron->{$acc_coord} + (-2 * $strand_mod)], | |
| # allele_specific_mod => { | |
| # A => '_to_purine', | |
| # G => '_to_purine', | |
| # } | |
| }, | |
| ); | |
| foreach my $term_hash(@terms) { | |
| my $pass = overlap($vf_start, $vf_end, sort {$a <=> $b} @{$term_hash->{region}}); | |
| if($pass) { | |
| my $term = $term_hash->{term}; | |
| # if(my $allele_specific_mods = $term_hash->{allele_specific_mod}) { | |
| # $term .= $allele_specific_mods->{$vf_tr_seq} || ''; | |
| # } | |
| $results{$term}++; | |
| last; | |
| } | |
| } | |
| } | |
| return {} unless %results; | |
| return { SpliceRegion => [sort {$TERM_RANK{$a} <=> $TERM_RANK{$b}} keys %results]}; | |
| } | |
| 1; | |