Skip to content

Commit

Permalink
remove unimplemented --incremental option from generate-names.pl, twe…
Browse files Browse the repository at this point in the history
…ak handling of HashStore meta.json
  • Loading branch information
rbuels committed Jan 17, 2013
1 parent 5b2e69e commit 566fc3c
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 32 deletions.
38 changes: 16 additions & 22 deletions bin/generate-names.pl
Expand Up @@ -38,15 +38,6 @@ =head1 OPTIONS
built. If not passed, tries to estimate this based on the size of the
input names files.
=item --incremental
Add new entries to the names index, not deleting the old ones. When
using this option, it is best to pass the --totalNames parameter as
well. Otherwise, the first call to generate-names.pl will initialize
the names index to be optimal for the number of names added just in
that first call, which could lead to the names index being constructed
non-optimally.
=item --verbose
Print more progress messages.
Expand Down Expand Up @@ -82,13 +73,10 @@ =head1 OPTIONS

use GenomeDB;

my %trackHash;
my @includedTrackNames;
my @tracksWithNames;

my $outDir = "data";
my $verbose = 0;
my $incremental;
my $help;
my $max_completions = 20;
my $max_locations = 100;
Expand All @@ -100,7 +88,6 @@ =head1 OPTIONS
"locationLimit=i" => \$max_locations,
"verbose+" => \$verbose,
"thresh=i" => \$thresh,
"incremental" => \$incremental,
"totalNames=i" => \$est_total_name_records,
'tracks=s' => \@includedTrackNames,
'hashBits=i' => \$hash_bits,
Expand Down Expand Up @@ -148,17 +135,15 @@ =head1 OPTIONS

#print STDERR "Names files:\n", map " $_->{fullpath}\n", @names_files;

unless( $incremental ) {
# estimate the total number of name records we probably have based on the input file sizes
$est_total_name_records ||= int( (sum( map { -s $_->{fullpath} } @names_files )||0) / 70 );
if( $verbose ) {
print STDERR "Estimated $est_total_name_records total name records to index.\n";
}
# estimate the total number of name records we probably have based on the input file sizes
$est_total_name_records ||= int( (sum( map { -s $_->{fullpath} } @names_files )||0) / 70 );
if( $verbose ) {
print STDERR "Estimated $est_total_name_records total name records to index.\n";
}

my $nameStore = Bio::JBrowse::HashStore->open(
dir => catdir( $outDir, "names" ),
empty => !$incremental,
empty => 1,

# set the hash size to try to get about 10 name records per file
# (does not count prefix completions) if the store has existing
Expand All @@ -181,7 +166,11 @@ =head1 OPTIONS
for my $ref ( @refSeqs ) {
push @namerecord_buffer, [ @{$ref}{ qw/ name length name seqDir start end seqChunkSize/ }];
}
my $record_stream = $nameStore->sort_stream( sub {


my %trackHash;
my @tracksWithNames;
my $record_stream = sub {
while( ! @namerecord_buffer ) {
my $nameinfo = $name_records_iterator->() || do {
my $file = shift @names_files;
Expand All @@ -204,15 +193,20 @@ =head1 OPTIONS
}
}
return shift @namerecord_buffer;
});
};

# sort the stream by hash key to improve cache locality
$record_stream = $nameStore->sort_stream( $record_stream );

# now write it to the store
while( my $record = $record_stream->() ) {
insert( $nameStore, $record );
}

# store the list of tracks that have names
$nameStore->{meta}{track_names} = \@tracksWithNames;


# set up the name store in the trackList.json
$gdb->modifyTrackList( sub {
my ( $data ) = @_;
Expand Down
13 changes: 5 additions & 8 deletions src/perl5/Bio/JBrowse/HashStore.pm
Expand Up @@ -55,10 +55,11 @@ sub open {

%$self = (
%$self,
%{$self->_read_meta}
meta => $self->_read_meta
);

$self->{hash_bits} ||= 16;
$self->{hash_bits} ||= $self->{meta}{hash_bits} || 16;
$self->{meta}{hash_bits} = $self->{hash_bits};
$self->{hash_characters} = int( $self->{hash_bits}/4 );
$self->{file_extension} = '.json';

Expand All @@ -74,12 +75,8 @@ sub DESTROY {
File::Path::mkpath( $self->{dir} );
my $meta_path = $self->_meta_path;
CORE::open my $out, '>', $meta_path or die "$! writing $meta_path";
$out->print( JSON::to_json(
{
hash_bits => $self->{hash_bits},
%{ $self->{meta} || {} }
}
)) or die "$! writing $meta_path";
$out->print( JSON::to_json( $self->{meta} ) )
or die "$! writing $meta_path";
}
sub _meta_path {
File::Spec->catfile( shift->{dir}, 'meta.json' );
Expand Down
2 changes: 1 addition & 1 deletion tests/data/volvox_formatted_names/names/2.json
@@ -1 +1 @@
{"f0":{"exact":[],"prefix":["f05","f02","f03","f04","f01","f06","f09","f07","f08"]},"ctg":{"exact":[],"prefix":["ctgB","ctgA"]},"ag":{"exact":[],"prefix":["agt767.5","agt221.3","agt830.3","agt221.5","agt767.3","agt830.5"]},"protein":{"exact":[],"prefix":["Protein:HGA","Protein:HGB"]},"f12":{"exact":[["f12",0,"f12","ctgA",49757,50000]],"prefix":[]},"m02":{"exact":[["m02",3,"m02","ctgA",28331,30033]],"prefix":[]},"b10":{"exact":[],"prefix":["b101.2"]},"seg":{"exact":[],"prefix":["seg04","seg14","seg13","seg03","seg12","seg02","seg05","seg15","seg10","seg07","seg08","seg06","seg09","seg11","seg01"]},"agt830.3":{"exact":[["agt830.3",10,"agt830.3","ctgA",5409,7503]],"prefix":[]},"JBROWSE_TRACKS_WITH_NAMES":["ExampleFeatures","NameTest","snps","Motifs","Alignments","Genes","ReadingFrame","CDS","Transcript","Clones","EST"],"seg13":{"exact":[["seg13",4,"seg13","ctgA",49405,49476],["seg13",4,"seg13","ctgA",49761,50000]],"prefix":[]},"agt7":{"exact":[],"prefix":["agt767.5","agt767.3"]}}
{"f0":{"exact":[],"prefix":["f05","f02","f03","f04","f01","f06","f09","f07","f08"]},"ctg":{"exact":[],"prefix":["ctgB","ctgA"]},"ag":{"exact":[],"prefix":["agt767.5","agt221.3","agt830.3","agt221.5","agt767.3","agt830.5"]},"protein":{"exact":[],"prefix":["Protein:HGA","Protein:HGB"]},"f12":{"exact":[["f12",0,"f12","ctgA",49757,50000]],"prefix":[]},"m02":{"exact":[["m02",3,"m02","ctgA",28331,30033]],"prefix":[]},"b10":{"exact":[],"prefix":["b101.2"]},"seg":{"exact":[],"prefix":["seg04","seg14","seg13","seg03","seg12","seg02","seg05","seg15","seg10","seg07","seg08","seg06","seg09","seg11","seg01"]},"agt830.3":{"exact":[["agt830.3",10,"agt830.3","ctgA",5409,7503]],"prefix":[]},"seg13":{"exact":[["seg13",4,"seg13","ctgA",49405,49476],["seg13",4,"seg13","ctgA",49761,50000]],"prefix":[]},"agt7":{"exact":[],"prefix":["agt767.5","agt767.3"]}}
2 changes: 1 addition & 1 deletion tests/data/volvox_formatted_names/names/meta.json
@@ -1 +1 @@
{"hash_bits":"4"}
{"track_names":["ExampleFeatures","NameTest","snps","Motifs","Alignments","Genes","ReadingFrame","CDS","Transcript","Clones","EST"],"hash_bits":"4","last_changed_entry":"apple2"}

0 comments on commit 566fc3c

Please sign in to comment.