<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -13,5 +13,5 @@
 
 * Minor enhancements
 
-  *HTTP request response time recorded in Page.
+  * HTTP request response time recorded in Page.
   * Use of persistent HTTP connections.
\ No newline at end of file</diff>
      <filename>CHANGELOG.rdoc</filename>
    </modified>
    <modified>
      <diff>@@ -1,6 +1,6 @@
 spec = Gem::Specification.new do |s| 
   s.name = &quot;anemone&quot;
-  s.version = &quot;0.2.2&quot;
+  s.version = &quot;0.2.3&quot;
   s.author = &quot;Chris Kite&quot;
   s.homepage = &quot;http://anemone.rubyforge.org&quot;
   s.rubyforge_project = &quot;anemone&quot;
@@ -20,7 +20,6 @@ spec = Gem::Specification.new do |s|
     README.rdoc
     bin/anemone
     lib/anemone.rb
-    lib/anemone/anemone.rb
     lib/anemone/core.rb
     lib/anemone/http.rb
     lib/anemone/page.rb</diff>
      <filename>anemone.gemspec</filename>
    </modified>
    <modified>
      <diff>@@ -1,2 +1,2 @@
 require 'rubygems'
-require 'anemone/anemone'
\ No newline at end of file
+require 'anemone/core'
\ No newline at end of file</diff>
      <filename>lib/anemone.rb</filename>
    </modified>
    <modified>
      <diff>@@ -1,19 +1,51 @@
-require 'net/http'
 require 'thread'
+require 'robots'
 require 'anemone/tentacle'
 require 'anemone/page'
 require 'anemone/page_hash'
 
 module Anemone
+
+  VERSION = '0.2.3';
+
+  #
+  # Convenience method to start a crawl
+  #
+  def Anemone.crawl(urls, options = {}, &amp;block)
+    Core.crawl(urls, options, &amp;block)
+  end  
+
   class Core
     # PageHash storing all Page objects encountered during the crawl
     attr_reader :pages
-    
+
+    # Hash of options for the crawl
+    attr_accessor :opts
+
+    DEFAULT_OPTS = {
+      # run 4 Tentacle threads to fetch pages
+      :threads =&gt; 4,
+      # disable verbose output
+      :verbose =&gt; false,
+      # don't throw away the page response body after scanning it for links
+      :discard_page_bodies =&gt; false,
+      # identify self as Anemone/VERSION
+      :user_agent =&gt; &quot;Anemone/#{Anemone::VERSION}&quot;,
+      # no delay between requests
+      :delay =&gt; 0,
+      # don't obey the robots exclusion protocol
+      :obey_robots_txt =&gt; false,
+      # by default, don't limit the depth of the crawl
+      :depth_limit =&gt; false,
+      # number of times HTTP redirects will be followed
+      :redirect_limit =&gt; 5
+    }
+
     #
     # Initialize the crawl with starting *urls* (single URL or Array of URLs)
     # and optional *block*
     #
-    def initialize(urls)
+    def initialize(urls, opts = {})
       @urls = [urls].flatten.map{ |url| url.is_a?(URI) ? url : URI(url) }
       @urls.each{ |url| url.path = '/' if url.path.empty? }
 
@@ -23,10 +55,8 @@ module Anemone
       @on_pages_like_blocks = Hash.new { |hash,key| hash[key] = [] }
       @skip_link_patterns = []
       @after_crawl_blocks = []
-      
-      if Anemone.options.obey_robots_txt
-        @robots = Robots.new(Anemone.options.user_agent)
-      end
+
+      process_options opts
 
       yield self if block_given?
     end
@@ -34,8 +64,8 @@ module Anemone
     #
     # Convenience method to start a new crawl
     #
-    def self.crawl(root)
-      self.new(root) do |core|
+    def self.crawl(urls, opts = {})
+      self.new(urls, opts) do |core|
         yield core if block_given?
         core.run
       end
@@ -104,8 +134,8 @@ module Anemone
       link_queue = Queue.new
       page_queue = Queue.new
 
-      Anemone.options.threads.times do
-        @tentacles &lt;&lt; Thread.new { Tentacle.new(link_queue, page_queue).run }
+      @opts[:threads].times do
+        @tentacles &lt;&lt; Thread.new { Tentacle.new(link_queue, page_queue, @opts).run }
       end
       
       @urls.each{ |url| link_queue.enq(url) }
@@ -115,12 +145,12 @@ module Anemone
         
         @pages[page.url] = page
         
-        puts &quot;#{page.url} Queue: #{link_queue.size}&quot; if Anemone.options.verbose
+        puts &quot;#{page.url} Queue: #{link_queue.size}&quot; if @opts[:verbose]
         
         # perform the on_every_page blocks for this page
         do_page_blocks(page)
 
-        page.discard_doc! if Anemone.options.discard_page_bodies
+        page.discard_doc! if @opts[:discard_page_bodies]
         
         links_to_follow(page).each do |link|
           link_queue.enq([link, page])
@@ -158,7 +188,15 @@ module Anemone
     end
     
     private    
-    
+
+    def process_options(options)
+      @opts = DEFAULT_OPTS.merge options
+
+      @opts[:threads] = 1 if @opts[:delay] &gt; 0
+
+      @robots = Robots.new(@opts[:user_agent]) if @opts[:obey_robots_txt]
+    end
+
     #
     # Execute the after_crawl blocks
     #
@@ -199,10 +237,10 @@ module Anemone
     # Returns +false+ otherwise.
     #
     def visit_link?(link, from_page = nil)
-      allowed = Anemone.options.obey_robots_txt ? @robots.allowed?(link) : true
+      allowed = @opts[:obey_robots_txt] ? @robots.allowed?(link) : true
       
-      if from_page
-        too_deep = from_page.depth &gt;= Anemone.options.depth_limit rescue false
+      if from_page &amp;&amp; @opts[:depth_limit]
+        too_deep = from_page.depth &gt;= @opts[:depth_limit]
       else
         too_deep = false
       end
@@ -215,8 +253,7 @@ module Anemone
     # its URL matches a skip_link pattern.
     #
     def skip_link?(link)
-      @skip_link_patterns.each { |p| return true if link.path =~ p}
-      false
+      @skip_link_patterns.any? { |p| link.path =~ p }
     end
     
   end</diff>
      <filename>lib/anemone/core.rb</filename>
    </modified>
    <modified>
      <diff>@@ -4,10 +4,11 @@ require 'anemone/page'
 module Anemone
   class HTTP
     # Maximum number of redirects to follow on each get_response
-    REDIRECTION_LIMIT = 5
+    REDIRECT_LIMIT = 5
 
-    def initialize
+    def initialize(opts = {})
       @connections = {}
+      @opts = opts
     end
 
     #
@@ -31,7 +32,7 @@ module Anemone
 
         return Page.new(url, response.body.dup, code, response.to_hash, aka, referer, depth, response_time)
       rescue =&gt; e
-        if Anemone.options.verbose
+        if verbose?
           puts e.inspect
           puts e.backtrace
         end        
@@ -50,7 +51,7 @@ module Anemone
       code = Integer(response.code)
       loc = url
       
-      limit = REDIRECTION_LIMIT
+      limit = redirect_limit
       while response.is_a?(Net::HTTPRedirection) and limit &gt; 0
           loc = URI(response['location'])
           loc = url.merge(loc) if loc.relative?
@@ -66,7 +67,6 @@ module Anemone
     #
     def get_response(url, referer = nil)
       full_path = url.query.nil? ? url.path : &quot;#{url.path}?#{url.query}&quot;
-      user_agent = Anemone.options.user_agent rescue nil
       
       opts = {}
       opts['User-Agent'] = user_agent if user_agent
@@ -104,5 +104,18 @@ module Anemone
       end
       @connections[url.host][url.port] = http.start      
     end
+
+    def redirect_limit
+      @opts[:redirect_limit] || REDIRECT_LIMIT
+    end
+
+    def user_agent
+      @opts[:user_agent]
+    end
+
+    def verbose?
+      @opts[:verbose]
+    end
+
   end
 end</diff>
      <filename>lib/anemone/http.rb</filename>
    </modified>
    <modified>
      <diff>@@ -6,10 +6,11 @@ module Anemone
     #
     # Create a new Tentacle
     #
-    def initialize(link_queue, page_queue)
+    def initialize(link_queue, page_queue, opts = {})
       @link_queue = link_queue
       @page_queue = page_queue
-      @http = Anemone::HTTP.new
+      @http = Anemone::HTTP.new(opts)
+      @opts = opts
     end
     
     #
@@ -22,11 +23,17 @@ module Anemone
         
         break if link == :END
 
-        @page_queue.enq @http.fetch_page(link, from_page)
+        @page_queue &lt;&lt; @http.fetch_page(link, from_page)
 
-        sleep Anemone.options.delay
+        delay
       end
     end
 
+    private
+
+    def delay
+      sleep @opts[:delay] if @opts[:delay]
+    end
+
   end
 end
\ No newline at end of file</diff>
      <filename>lib/anemone/tentacle.rb</filename>
    </modified>
    <modified>
      <diff>@@ -1,42 +1,11 @@
 require File.dirname(__FILE__) + '/spec_helper'
 
 describe Anemone do
-
-  after(:each) do
-    # reset global options object to defaults
-    Anemone::DEFAULTS.each { |key, value| Anemone.options.send(&quot;#{key}=&quot;, value) }
-  end
-
+  
   it &quot;should have a version&quot; do
     Anemone.const_defined?('VERSION').should == true
   end
 
-  it &quot;should have options&quot; do
-    Anemone.should respond_to(:options)
-  end
-  
-  it &quot;should accept options for the crawl&quot; do
-    Anemone.crawl(SPEC_DOMAIN, :verbose =&gt; false, 
-                               :threads =&gt; 2, 
-                               :discard_page_bodies =&gt; true,
-                               :user_agent =&gt; 'test',
-                               :obey_robots_txt =&gt; true,
-                               :depth_limit =&gt; 3)
-
-    Anemone.options.verbose.should == false
-    Anemone.options.threads.should == 2
-    Anemone.options.discard_page_bodies.should == true
-    Anemone.options.delay.should == 0
-    Anemone.options.user_agent.should == 'test'
-    Anemone.options.obey_robots_txt.should == true
-    Anemone.options.depth_limit.should == 3
-  end
-  
-  it &quot;should use 1 thread if a delay is requested&quot; do
-    Anemone.crawl(SPEC_DOMAIN, :delay =&gt; 0.01, :threads =&gt; 2)
-    Anemone.options.threads.should == 1
-  end
-  
   it &quot;should return a Anemone::Core from the crawl, which has a PageHash&quot; do
     result = Anemone.crawl(SPEC_DOMAIN)
     result.should be_an_instance_of(Anemone::Core)</diff>
      <filename>spec/anemone_spec.rb</filename>
    </modified>
    <modified>
      <diff>@@ -173,5 +173,29 @@ module Anemone
         core.should have(4).pages
       end
     end
+
+    describe &quot;options&quot; do
+      it &quot;should accept options for the crawl&quot; do
+        core = Anemone.crawl(SPEC_DOMAIN, :verbose =&gt; false,
+                                          :threads =&gt; 2,
+                                          :discard_page_bodies =&gt; true,
+                                          :user_agent =&gt; 'test',
+                                          :obey_robots_txt =&gt; true,
+                                          :depth_limit =&gt; 3)
+
+        core.opts[:verbose].should == false
+        core.opts[:threads].should == 2
+        core.opts[:discard_page_bodies].should == true
+        core.opts[:delay].should == 0
+        core.opts[:user_agent].should == 'test'
+        core.opts[:obey_robots_txt].should == true
+        core.opts[:depth_limit].should == 3
+      end
+
+      it &quot;should use 1 thread if a delay is requested&quot; do
+        Anemone.crawl(SPEC_DOMAIN, :delay =&gt; 0.01, :threads =&gt; 2).opts[:threads].should == 1
+      end
+    end
+
   end
 end</diff>
      <filename>spec/core_spec.rb</filename>
    </modified>
  </modified>
  <removed type="array">
    <removed>
      <filename>lib/anemone/anemone.rb</filename>
    </removed>
  </removed>
  <parents type="array">
    <parent>
      <id>378a15ee41173590c5e678697aada44f9ebd8fbe</id>
    </parent>
  </parents>
  <author>
    <name>Chris Kite</name>
    <email>chris@chriskite.com</email>
  </author>
  <url>http://github.com/chriskite/anemone/commit/32153103240b1c34b8384b5eb691164c83efd1d6</url>
  <id>32153103240b1c34b8384b5eb691164c83efd1d6</id>
  <committed-date>2009-11-01T14:58:12-08:00</committed-date>
  <authored-date>2009-11-01T14:58:12-08:00</authored-date>
  <message>use per-crawl options instead of specifying options at the module level</message>
  <tree>14214dee4b7e3543f2d93523328560dc209466b5</tree>
  <committer>
    <name>Chris Kite</name>
    <email>chris@chriskite.com</email>
  </committer>
</commit>
