Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

work on parallel parsing

  • Loading branch information...
commit 8dacf6a309e602cb8ed48fb5cc7cf44a74fd4267 1 parent b0bd697
@dimus dimus authored
View
2  .rvmrc
@@ -1 +1 @@
-rvm use ruby-1.9.2-p290@biodiversity--create
+rvm use ruby-1.9.2-p290@biodiversity --create
View
6 Gemfile
@@ -1,10 +1,12 @@
source "http://rubygems.org"
-gem "jeweler"
gem "treetop"
-gem "facter"
gem "parallel"
+group :development do
+ gem "jeweler"
+end
+
group :test do
gem "ruby-debug19", :require => "ruby-debug"
gem "rspec"
View
2  Gemfile.lock
@@ -4,7 +4,6 @@ GEM
archive-tar-minitar (0.5.2)
columnize (0.3.4)
diff-lcs (1.1.3)
- facter (1.6.3)
git (1.2.5)
jeweler (1.6.4)
bundler (~> 1.0)
@@ -41,7 +40,6 @@ PLATFORMS
ruby
DEPENDENCIES
- facter
jeweler
parallel
rspec
View
5 README.rdoc
@@ -85,6 +85,11 @@ You can use it as a library
# to get detailed information about elements of the name
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
+ # to parse using several CPUs (4 seem to be optimal)
+ parser = ParallelParser.new # ParallelParser.new(4) will try to run 4 processes if hardware allows
+ array_of_names = ["Betula alba", "Homo sapiens"....]
+ parser.parse(array_of_names) # -> {"Betula alba" => "{scientificName...}", "Homo sapiens" => "{scientificName...}", ...}
+
# to resolve lsid and get back RDF file
LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
View
1  Rakefile
@@ -30,6 +30,7 @@ begin
gem.bindir = 'bin'
gem.executables = ['nnparse', 'parserver']
gem.add_dependency('treetop')
+ gem.add_dependency('parallel')
gem.add_dependency('json') if ruby_version < 19
gem.add_development_dependency "rspec"
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
View
11 lib/biodiversity/parser.rb
@@ -25,11 +25,14 @@ def self.clean(a_string)
class ParallelParser
- def initialize
- require 'facter'
+ def initialize(processes_num = nil)
require 'parallel'
cpu_num
- @processes_num = cpu_num > 1 ? cpu_num - 1 : 1
+ if processes_num.to_i > 0
+ @processes_num = [processes_num, cpu_num - 1].min
+ else
+ @processes_num = cpu_num > 3 ? cpu_num - 2 : 1
+ end
end
def parse(names_list)
@@ -38,7 +41,7 @@ def parse(names_list)
end
def cpu_num
- @cpu_num ||= Facter.processorcount.to_i
+ @cpu_num ||= Parallel.processor_count
end
private
View
9 spec/parser/scientific_name.spec.rb
@@ -61,4 +61,13 @@
res.keys.size.should == names.size
end
+ it "should parse several names in parallel with given num of processes" do
+ names = []
+ read_test_file { |n| names << (n[:name]) if n[:name] }
+ names.uniq!
+ pparser = ParallelParser.new(4)
+ res = pparser.parse(names)
+ names.size.should > 100
+ res.keys.size.should == names.size
+ end
end
Please sign in to comment.
Something went wrong with that request. Please try again.