Skip to content

Commit

Permalink
make PartialSorter capable of estimating the size of each item in the…
Browse files Browse the repository at this point in the history
… stream to manage its memory usage
  • Loading branch information
rbuels committed Jan 31, 2013
1 parent c10bab9 commit d607ce9
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 7 deletions.
4 changes: 2 additions & 2 deletions src/perl5/Bio/JBrowse/HashStore.pm
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,9 @@ reading them back in sorted order.
sub sort_stream {
my ( $self, $in_stream ) = @_;

use Data::Dump;
my $sorted_stream = Bio::JBrowse::PartialSorter
->new(
size => 20_000_000,
mem => $self->{sort_mem} || 265 * 2**20,
compare => sub($$) {
$_[0][0] cmp $_[1][0]
},
Expand Down Expand Up @@ -177,6 +176,7 @@ sub sort_stream {
};
}


=head2 empty
Clear the store of all contents. Deletes all files and directories
Expand Down
31 changes: 26 additions & 5 deletions src/perl5/Bio/JBrowse/PartialSorter.pm
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Bio::JBrowse::PartialSorter - partially sort a stream
=head1 METHODS
=head2 new( size => $num_items, compare => sub($$) )
=head2 new( size => $num_items, mem => $mem_bytes, compare => sub($$) )
=cut

Expand All @@ -26,21 +26,42 @@ Returns another stream, partially sorted with the comparison function.
sub sort {
my ( $self, $in ) = @_;

my $size = $self->{size} || 10_000_000;
my $compare = $self->{compare} || sub { $a cmp $b };
my @buffer;
#$#buffer = $size;

my $size = $self->{size} ||= do {
my $item_size = $self->_estimate_item_size( $in, 100, \@buffer );
sprintf('%.0f',($self->{mem} || 256*1024*1024) / $item_size )
};

my $compare = $self->{compare} || sub { $a cmp $b };

return sub {
unless( @buffer ) {
while( @buffer < $size && ( my $d = $in->() ) ) {
push @buffer, $d;
}
return unless @buffer; # stream ended
@buffer = sort $compare @buffer;
}
return unless @buffer;
return shift @buffer;
};
}

sub _estimate_item_size {
require List::Util;
require Devel::Size;

my ( $self, $in_stream, $sample_size, $buffer ) = @_;

while( @$buffer < $sample_size && ( my $d = $in_stream->() ) ) {
push @$buffer, $d;
}

my $avg_size = List::Util::sum(
map Devel::Size::total_size( $_ ), @$buffer
) / $sample_size;

return $avg_size;
}

1;

0 comments on commit d607ce9

Please sign in to comment.