Skip to content

Commit

Permalink
fix to remove duplicate transcripts as they appear in refseq GRCh37 c…
Browse files Browse the repository at this point in the history
…ache
  • Loading branch information
William McLaren committed Dec 9, 2016
1 parent 60e190d commit 8ee92c7
Showing 1 changed file with 17 additions and 0 deletions.
17 changes: 17 additions & 0 deletions modules/Bio/EnsEMBL/Variation/Utils/VEP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -3933,13 +3933,30 @@ sub fetch_transcripts {
}

## hack to copy HGNC IDs and RefSeq stuff
my %counts;
foreach my $tr(@{$tr_cache->{$chr}}) {
$tr->{_gene_hgnc_id} = $hgnc_ids{$tr->{_gene_symbol}} if defined($tr->{_gene_symbol}) && defined($hgnc_ids{$tr->{_gene_symbol}});

if(defined($config->{refseq}) || defined($config->{merged})) {
$tr->{$_} ||= $refseq_stuff{$tr->{_gene}->stable_id}->{$_} for qw(_gene_symbol _gene_symbol_source _gene_hgnc_id);
$counts{$tr->{stable_id}}++
}
}

## now a further hack to remove duplicates...
if(defined($config->{refseq}) || defined($config->{merged})) {
my @new;
foreach my $tr(@{$tr_cache->{$chr}}) {
if($counts{$tr->{stable_id}} > 1) {
push @new, $tr unless $tr->{source} eq 'ensembl';
}
else {
push @new, $tr;
}
}

$tr_cache->{$chr} = \@new;
}
}

$tr_cache->{$chr} ||= [];
Expand Down

0 comments on commit 8ee92c7

Please sign in to comment.