Skip to content

Commit

Permalink
restrict number of open Bio::DB::HTS::Tabix object filehandles
Browse files Browse the repository at this point in the history
  • Loading branch information
William McLaren committed Jun 8, 2017
1 parent e4096bc commit b2790f0
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 4 deletions.
44 changes: 41 additions & 3 deletions modules/Bio/EnsEMBL/VEP/AnnotationSource/Cache/VariationTabix.pm
Expand Up @@ -215,9 +215,7 @@ sub _annotate_pm {
foreach my $chr(keys %$by_chr) {

my $source_chr = $self->get_source_chr_name($chr);
my $file = $self->get_dump_file_name($source_chr);
next unless -e $file;
my $tabix_obj = $self->{tabix_obj}->{$chr} ||= Bio::DB::HTS::Tabix->new(filename => $file);
my $tabix_obj = $self->_get_tabix_obj($source_chr);
next unless $tabix_obj;

foreach my $vf(@{$by_chr->{$chr}}) {
Expand All @@ -242,6 +240,46 @@ sub _annotate_pm {
}


=head2 _get_tabix_obj
Arg 1 : string $chr
Example : $as->_get_tabix_obj($chr);
Description: Get Bio::DB::HTS::Tabix object for this chromosome.
Uses a cache that limits the number of open filehandles.
Returntype : Bio::DB::HTS::Tabix
Exceptions : none
Caller : _annotate_pm()
Status : Stable
=cut

sub _get_tabix_obj {
my ($self, $chr) = @_;

# use a cache and limit the number of open files
my $cache = $self->{_tabix_obj_cache} ||= [];
my $tabix_obj;

unless(($tabix_obj) = map {$_->{obj}} grep {$_->{chr} eq $chr} @$cache) {
my $file = $self->get_dump_file_name($chr);

if($file && -e $file) {
$tabix_obj = Bio::DB::HTS::Tabix->new(filename => $file);
}

push @$cache, { obj => $tabix_obj, chr => $chr };

# restrict number of open objects
while(scalar @$cache > 5) {
my $tmp_hash = shift @$cache;
$tmp_hash->{obj}->close() if $tmp_hash->{obj};
}
}

return $tabix_obj;
}


=head2 delimiter
Example : $delim = $as->delimiter();
Expand Down
9 changes: 8 additions & 1 deletion t/AnnotationSource_Cache_VariationTabix.t
Expand Up @@ -407,13 +407,20 @@ SKIP: {
SKIP: {

## REMEMBER TO UPDATE THIS SKIP NUMBER IF YOU ADD MORE TESTS!!!!
skip 'Bio::DB::HTS::Tabix module not available', 3 unless $Bio::EnsEMBL::VEP::AnnotationSource::Cache::VariationTabix::CAN_USE_TABIX_PM;
skip 'Bio::DB::HTS::Tabix module not available', 6 unless $Bio::EnsEMBL::VEP::AnnotationSource::Cache::VariationTabix::CAN_USE_TABIX_PM;

$p = Bio::EnsEMBL::VEP::Parser::VCF->new({config => $cfg, file => $test_cfg->{test_vcf}, valid_chromosomes => [21]});
$ib = Bio::EnsEMBL::VEP::InputBuffer->new({config => $cfg, parser => $p});
$ib->next();
$vf = $ib->buffer->[0];

my $tabix_obj = $c->_get_tabix_obj(21);
is(ref($tabix_obj), 'Bio::DB::HTS::Tabix', '_get_tabix_obj - ref');
ok($tabix_obj eq $c->_get_tabix_obj(21), '_get_tabix_obj - cache OK');

delete $c->{_tabix_obj_cache};
ok($tabix_obj ne $c->_get_tabix_obj(21), '_get_tabix_obj - clear cache new obj');

$vf_hash = {
21 => [$vf],
};
Expand Down

0 comments on commit b2790f0

Please sign in to comment.