<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -46,7 +46,7 @@ namespace :examples do
     t.rcov_opts = %[--exclude &quot;gems/*,/Library/Ruby/*,config/*&quot; --text-summary  --sort coverage --no-validator-links]
   end
   
-  RAILS_VERSIONS = %w[2.0.2 2.1.0 2.1.1 2.2.2 2.3.1]
+  RAILS_VERSIONS = %w[2.0.2 2.1.0 2.1.1 2.2.2 2.3.1 2.3.2]
   
   desc &quot;Run exmaples with multiple versions of rails&quot;
   task :multi_rails do</diff>
      <filename>Rakefile</filename>
    </modified>
    <modified>
      <diff>@@ -147,6 +147,24 @@ describe Relevance::Tarantula::Crawler do
       crawler.expects(:blip)
       crawler.crawl_queued_forms
     end
+    
+    it &quot;breaks out early if a timeout is set&quot; do
+      crawler = Relevance::Tarantula::Crawler.new
+      stub_puts_and_print(crawler)
+      crawler.proxy = stub
+      response = stub(:code =&gt; &quot;200&quot;)
+      crawler.links_to_crawl = [stub(:href =&gt; &quot;/foo&quot;, :method =&gt; :get)]
+      crawler.proxy.expects(:get).returns(response).times(4)
+      crawler.forms_to_crawl &lt;&lt; stub_everything(:method =&gt; &quot;post&quot;, 
+                                                :action =&gt; &quot;/foo&quot;,
+                                                :data =&gt; &quot;some data&quot;,
+                                                :to_s =&gt; &quot;stub&quot;)
+      crawler.proxy.expects(:post).returns(response).times(2)
+      crawler.expects(:links_completed_count).returns(0,1,2,3,4,5).times(6)
+      crawler.times_to_crawl = 2
+      crawler.crawl
+                                                
+    end
 
     it &quot;resets to the initial links/forms on subsequent crawls when times_to_crawl &gt; 1&quot; do
       crawler = Relevance::Tarantula::Crawler.new
@@ -160,7 +178,7 @@ describe Relevance::Tarantula::Crawler do
                                                 :data =&gt; &quot;some data&quot;,
                                                 :to_s =&gt; &quot;stub&quot;)
       crawler.proxy.expects(:post).returns(response).times(2)
-      crawler.expects(:links_completed_count).returns(*(0..6).to_a).times(6)
+      crawler.expects(:links_completed_count).returns(0,1,2,3,4,5).times(6)
       crawler.times_to_crawl = 2
       crawler.crawl
     end
@@ -182,9 +200,11 @@ describe Relevance::Tarantula::Crawler do
     it &quot;blips the current progress if !verbose&quot; do
       crawler = Relevance::Tarantula::Crawler.new
       crawler.stubs(:verbose).returns false
+      crawler.stubs(:timeout_if_too_long)
       crawler.expects(:print).with(&quot;\r 0 of 0 links completed               &quot;)
       crawler.blip
     end
+    
     it &quot;blips nothing if verbose&quot; do
       crawler = Relevance::Tarantula::Crawler.new
       crawler.stubs(:verbose).returns true
@@ -220,7 +240,7 @@ describe Relevance::Tarantula::Crawler do
     crawler.expects(:finished?).times(3).returns(false, false, true)
     crawler.expects(:crawl_queued_links).times(2)
     crawler.expects(:crawl_queued_forms).times(2)
-    crawler.do_crawl
+    crawler.do_crawl(1)
   end
   
   it &quot;asks each reporter to write its report in report_dir&quot; do
@@ -296,6 +316,7 @@ describe Relevance::Tarantula::Crawler do
   end
   
   describe &quot;allow_nnn_for&quot; do
+
     it &quot;installs result as a response_code_handler&quot; do
       crawler = Relevance::Tarantula::Crawler.new
       crawler.response_code_handler.should == Relevance::Tarantula::Result
@@ -312,6 +333,54 @@ describe Relevance::Tarantula::Crawler do
       crawler = Relevance::Tarantula::Crawler.new
       lambda{crawler.foo}.should raise_error(NoMethodError)
     end
+    
+  end
+  
+  describe &quot;timeouts&quot; do
+
+    it &quot;sets start and end times for a single crawl&quot; do
+      start_time = Time.parse(&quot;March 1st, 2008 10:00am&quot;)
+      end_time = Time.parse(&quot;March 1st, 2008 10:10am&quot;)
+      Time.stubs(:now).returns(start_time, end_time)
+
+      crawler = Relevance::Tarantula::Crawler.new
+      stub_puts_and_print(crawler)
+      crawler.proxy = stub_everything(:get =&gt; response = stub(:code =&gt; &quot;200&quot;))
+      crawler.crawl
+      crawler.crawl_start_times.first.should == start_time
+      crawler.crawl_end_times.first.should == end_time
+    end
+    
+    it &quot;has elasped time for a crawl&quot; do
+      start_time = Time.parse(&quot;March 1st, 2008 10:00am&quot;)
+      elasped_time_check = Time.parse(&quot;March 1st, 2008, 10:10:00am&quot;)
+      Time.stubs(:now).returns(start_time, elasped_time_check)
+
+      crawler = Relevance::Tarantula::Crawler.new
+      stub_puts_and_print(crawler)
+      crawler.proxy = stub_everything(:get =&gt; response = stub(:code =&gt; &quot;200&quot;))
+      crawler.crawl
+      crawler.elasped_time_for_pass(0).should == 600.seconds
+    end
+    
+    it &quot;raises out of the crawl if elasped time is greater then the crawl timeout&quot; do
+      start_time = Time.parse(&quot;March 1st, 2008 10:00am&quot;)
+      elasped_time_check = Time.parse(&quot;March 1st, 2008, 10:35:00am&quot;)
+      Time.stubs(:now).returns(start_time, elasped_time_check)
+
+      crawler = Relevance::Tarantula::Crawler.new
+      crawler.crawl_timeout = 5.minutes
+      
+      crawler.links_to_crawl = [stub(:href =&gt; &quot;/foo1&quot;, :method =&gt; :get), stub(:href =&gt; &quot;/foo2&quot;, :method =&gt; :get)]
+      crawler.proxy = stub
+      crawler.proxy.stubs(:get).returns(response = stub(:code =&gt; &quot;200&quot;))
+      
+      stub_puts_and_print(crawler)
+      lambda {
+        crawler.do_crawl(0)
+      }.should raise_error
+    end
+    
   end
   
 end
\ No newline at end of file</diff>
      <filename>examples/relevance/tarantula/crawler_example.rb</filename>
    </modified>
    <modified>
      <diff>@@ -7,11 +7,13 @@ class Relevance::Tarantula::Crawler
   extend Forwardable
   include Relevance::Tarantula
 
+  class CrawlTimeout &lt; RuntimeError; end
+
   attr_accessor :proxy, :handlers, :skip_uri_patterns, :log_grabber,
                 :reporters, :links_to_crawl, :links_queued, :forms_to_crawl,
                 :form_signatures_queued, :max_url_length, :response_code_handler,
-                :times_to_crawl, :fuzzers, :test_name
-  attr_reader   :transform_url_patterns, :referrers, :failures, :successes
+                :times_to_crawl, :fuzzers, :test_name, :crawl_timeout
+  attr_reader   :transform_url_patterns, :referrers, :failures, :successes, :crawl_start_times, :crawl_end_times
 
   def initialize
     @max_url_length = 1024
@@ -22,6 +24,8 @@ class Relevance::Tarantula::Crawler
     @form_signatures_queued = Set.new
     @links_to_crawl = []
     @forms_to_crawl = []
+    @crawl_start_times, @crawl_end_times = [], []
+    @crawl_timeout = 20.minutes
     @referrers = {}
     @skip_uri_patterns = [
       /^javascript/,
@@ -53,13 +57,18 @@ class Relevance::Tarantula::Crawler
     orig_form_signatures_queued = @form_signatures_queued.dup
     orig_links_to_crawl = @links_to_crawl.dup
     orig_forms_to_crawl = @forms_to_crawl.dup
-    @times_to_crawl.times do |i|
+    @times_to_crawl.times do |num|
       queue_link url
-      do_crawl
-
-      puts &quot;#{(i+1).ordinalize} crawl&quot; if @times_to_crawl &gt; 1
+      
+      begin 
+        do_crawl num
+      rescue CrawlTimeout =&gt; e
+        puts e.message
+      end
+      
+      puts &quot;#{(num+1).ordinalize} crawl&quot; if @times_to_crawl &gt; 1
 
-      if i + 1 &lt; @times_to_crawl
+      if num + 1 &lt; @times_to_crawl
         @links_queued = orig_links_queued
         @form_signatures_queued = orig_form_signatures_queued
         @links_to_crawl = orig_links_to_crawl
@@ -77,19 +86,21 @@ class Relevance::Tarantula::Crawler
     @links_to_crawl.empty? &amp;&amp; @forms_to_crawl.empty?
   end
 
-  def do_crawl
+  def do_crawl(number)
     while (!finished?)
-      crawl_queued_links
-      crawl_queued_forms
+      @crawl_start_times &lt;&lt; Time.now
+      crawl_queued_links(number)
+      crawl_queued_forms(number)
+      @crawl_end_times &lt;&lt; Time.now
     end
   end
 
-  def crawl_queued_links
+  def crawl_queued_links(number = 0)
     while (link = @links_to_crawl.pop)
       response = proxy.send(link.method, link.href)
       log &quot;Response #{response.code} for #{link}&quot;
       handle_link_results(link, response)
-      blip
+      blip(number)
     end
   end
 
@@ -124,13 +135,17 @@ class Relevance::Tarantula::Crawler
     Relevance::Tarantula::Response.new(:code =&gt; &quot;404&quot;, :body =&gt; e.message, :content_type =&gt; &quot;text/plain&quot;)
   end
 
-  def crawl_queued_forms
+  def crawl_queued_forms(number = 0)
     while (form = @forms_to_crawl.pop)
       response = crawl_form(form)
       handle_form_results(form, response)
-      blip
+      blip(number)
     end
   end
+  
+  def elasped_time_for_pass(num)
+    Time.now - crawl_start_times[num]
+  end
 
   def grab_log!
     @log_grabber &amp;&amp; @log_grabber.grab!
@@ -234,9 +249,16 @@ class Relevance::Tarantula::Crawler
       total_links_count - links_remaining_count
   end
 
-  def blip
+  def blip(number = 0)
     unless verbose
       print &quot;\r #{links_completed_count} of #{total_links_count} links completed               &quot;
+      timeout_if_too_long(number)
+    end
+  end
+  
+  def timeout_if_too_long(number = 0)
+    if elasped_time_for_pass(number) &gt; crawl_timeout
+      raise CrawlTimeout, &quot;Exceeded crawl time of #{crawl_timeout} - breaking...&quot;
     end
   end
 end</diff>
      <filename>lib/relevance/tarantula/crawler.rb</filename>
    </modified>
    <modified>
      <diff>@@ -3,7 +3,7 @@ begin
   gem 'tidy'
   require 'tidy'
 rescue Gem::LoadError
-  # tidy not available
+  puts &quot;Tidy gem not available -- 'gem install tidy' to get it.&quot;
 end
 
 if defined? Tidy</diff>
      <filename>lib/relevance/tarantula/tidy_handler.rb</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>bd13ffee53a17eb3117607ba4c7ef011e46a3692</id>
    </parent>
  </parents>
  <author>
    <name>Rob Sanheim</name>
    <email>rsanheim@gmail.com</email>
  </author>
  <url>http://github.com/relevance/tarantula/commit/c39c561b54ff4e522e132dc265f7c5dcbfdee918</url>
  <id>c39c561b54ff4e522e132dc265f7c5dcbfdee918</id>
  <committed-date>2009-04-06T20:32:38-07:00</committed-date>
  <authored-date>2009-04-06T20:32:38-07:00</authored-date>
  <message>implement a timeout for each crawl (or &quot;pass&quot;) ...defaults to 20 minutes</message>
  <tree>0cf22a9c15480ee8852e04b0364e1a991a8f533d</tree>
  <committer>
    <name>Rob Sanheim</name>
    <email>rsanheim@gmail.com</email>
  </committer>
</commit>
