<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -12,7 +12,6 @@ documents including (but not in any way limited to) web pages}
   s.has_rdoc=true
   s.extra_rdoc_files=['README', 'LICENSE']
   s.rdoc_options=['--main', 'README']
-  s.test_files=Dir.glob('test/test_*.rb')
   s.executables = ['ariel']
   s.files = Dir['lib/**/*'] + Dir['test/**/*'] + s.extra_rdoc_files + Dir['examples/**/*'] + Dir['bin/*'] 
 end</diff>
      <filename>ariel.gemspec</filename>
    </modified>
    <modified>
      <diff>@@ -3,7 +3,6 @@
 require 'optparse'
 require 'yaml'
 
-require 'breakpoint'
 options = {}
 
 OptionParser.new do |opts|</diff>
      <filename>bin/ariel</filename>
    </modified>
    <modified>
      <diff>@@ -25,33 +25,39 @@ require 'ariel/rule_set'
 # 1. Define a structure for the data you wish to extract. For example:
 #
 #     @structure = Ariel::StructureNode.new do |r|
-#       r.article do |a|
-#         a.title
-#         a.author
-#         a.date
-#         a.body
+#       r.item :article do |a|
+#         a.item :title
+#         a.item :author
+#         a.item :date
+#         a.item :body
 #       end
-#       r.comment_list do |c|
-#         c.author
-#         c.date
-#         c.body
+#       r.list :comments do |c|
+#         c.list_item :comment do |c|
+#           c.item :author
+#           c.item :date
+#           c.item :body
+#         end
 #       end
 #     end
 # 2. Label these fields in a few example documents (normally at least 3).
 #    Labels are in the form of &lt;tt&gt;&lt;l:label_name&gt;...&lt;/l:label_name&gt;&lt;/tt&gt;
 # 3. Ariel will read these examples, and try to generate suitable rules that can
-#    be used to extract this data from other similarly structured documents.
+#    be used to extract this data from other similarly structured documents. Use
+#    Ariel#learn to initiate learn ruling.
 # 4. A wrapper has been generated - we can now happily load documents with the
 #    same structure (normally documents generated by the same rules, so
 #    different pages from a single site perhaps) and query the extracted data.
+#    See Ariel#extract.
 module Ariel
 
   class &lt;&lt; self
     # Given a root Node::Structure and a list of labeled_files (either IO objects
-    # or strings representing a file that can be opened with File.read, will learn
+    # or strings representing files that can be opened with File.read, will learn
     # rules using the labeled examples. The passed Node::Structure tree is
     # returned with new RuleSets added containing the learnt rules. This structure
-    # can now be used with Ariel.extract.
+    # can now be used with Ariel#extract on unlabeled documents.
+    #
+    # &lt;tt&gt;Ariel.learn structure, 'file1.html', fileobj, 'file2.html'&lt;/tt&gt;
     def learn(structure, *labeled_files)
       raise ArgumentError, &quot;Passed structure is not the parent of the document tree&quot; unless structure.parent.nil?
       labeled_strings=collect_strings(labeled_files)
@@ -63,6 +69,9 @@ module Ariel
     # string the parameter will be opened using File.read). If a block is given,
     # each root Node::Extracted is yielded. An array of each root extracted node
     # is returned.
+    #
+    # &lt;tt&gt;Ariel.extract structure, 'file1.txt', fileobj, 'file2.html'  # =&gt;&lt;/tt&gt; an
+    # array of 3 Node::Extracted objects
     def extract(structure, *files_to_extract)
       raise ArgumentError, &quot;Passed structure is not the parent of the document tree&quot; unless structure.parent.nil?
       extractions=[]</diff>
      <filename>lib/ariel.rb</filename>
    </modified>
    <modified>
      <diff>@@ -5,11 +5,10 @@ module Ariel
   # methods will remove candidates from the internal candidates array.
   class CandidateRefiner
 
-    attr_accessor :candidates, :cache
-    def initialize(candidates, examples, cache={})
+    attr_accessor :candidates
+    def initialize(candidates, examples)
       @candidates=candidates.dup #Just in case we directly modify the array. Shouldn't happen.
       @examples=examples
-      @cache=cache
     end
 
     # Selects the Rule candidates that have the most matches of a given type</diff>
      <filename>lib/ariel/candidate_refiner.rb</filename>
    </modified>
    <modified>
      <diff>@@ -1,6 +1,6 @@
 module Ariel
 
-  # A set of methods for use when dealing with strings from labeled documents
+  # A set of methods for use when dealing with strings from labeled documents.
   module LabelUtils
     S_LABEL=&quot;&lt;&quot;
     E_LABEL=&quot;&gt;&quot;</diff>
      <filename>lib/ariel/label_utils.rb</filename>
    </modified>
    <modified>
      <diff>@@ -1,5 +1,4 @@
 module Ariel
-  require 'breakpoint'
   
   # Provides methods that read an example document, using a Node::Structure tree
   # to populate a tree of Nodes with each labeled example.
@@ -7,6 +6,11 @@ module Ariel
 
     class &lt;&lt; self
 
+      # As its first argument it takes a root Node::Structure to which any
+      # learnt rules will be added. The following arguments are strings
+      # containing labeled examples for members of the passed Node::Structure
+      # tree. Ariel#learn is the preferred interface for rule-learning - this
+      # one may change.
       def supervise_learning(structure, *labeled_strings)
         raise ArgumentError, &quot;No labeled strings were given&quot; if labeled_strings.size==0
         loaded_example_hash=process_labeled_strings(structure, *labeled_strings)</diff>
      <filename>lib/ariel/labeled_document_loader.rb</filename>
    </modified>
    <modified>
      <diff>@@ -48,6 +48,7 @@ module Ariel
       end
 #      rule = order_rule(rule) #STALKER paper suggests that the generated rules should be ordered. This doesn't make sense, seeing as they are all generated based only on examples not matched by previous rules
       Log.debug &quot;Generated rules: #{combined_rules.inspect}&quot;
+      Rule.clear_cache
       return combined_rules
     end
 </diff>
      <filename>lib/ariel/learner.rb</filename>
    </modified>
    <modified>
      <diff>@@ -5,12 +5,13 @@ module Ariel
   # Very simple Log class. By default outputs to stdout and ignored messages
   # below :info level. Should probably get rid of the usage of Singleton as it's
   # used very little, with the classes eigenclass/singleton class used mostly
-  # for the same purpose.
+  # for the same purpose. Use Log.set_level to lower/raise the logging level.
   class Log
     include Singleton
 
     SEVERITY={:debug=&gt;0, :info=&gt;1, :warn=&gt;2, :error=&gt;3}
 
+    # Level defaults to :debug if $DEBUG is set and :info if not.
     def initialize
       self.class.output_to_stdout
       if $DEBUG
@@ -25,6 +26,7 @@ module Ariel
         define_method(level) {|message| instance; log message, level}
       end
 
+      # Set the log level to the given key from the SEVERITY constant.
       def set_level(level)
         if SEVERITY.has_key? level
           @log_level=level
@@ -41,10 +43,13 @@ module Ariel
         @output=:stdout
       end
 
+      # Sends all output to a file called debug.log in the current directory.
       def output_to_file
         @output=:file
       end
 
+      # Not intended to be used directly, preferred to use the methods
+      # corresponding to different serverity levels.
       def log(message, level)
         if SEVERITY[@log_level] &lt;= SEVERITY[level]
           message = &quot;#{level}: #{message}&quot;</diff>
      <filename>lib/ariel/log.rb</filename>
    </modified>
    <modified>
      <diff>@@ -1,10 +1,15 @@
 module Ariel
- 
+
+  # A generic Node object. As an end user, you have no need to use this. All
+  # children are stored in a hash. #id and #type are undefined so they can be
+  # used freely as part of a Node::Structure
   class Node
     removed_methods=[:id, :type]
     removed_methods.each {|meth| undef_method meth}
     attr_accessor :parent, :children, :node_name
 
+    # If the name is a string, it's converted to a symbol. If not it's just
+    # stored as is.
     def initialize(name)
       @children={}
       if name.kind_of? String
@@ -25,6 +30,7 @@ module Ariel
       meta.send(:define_method, node.node_name.to_s.to_sym) {@children[node.node_name]}
     end
 
+    # Yields each descendant node. If passed true will also yield itself.
     def each_descendant(include_self=false)
       if include_self
         node_queue=[self]</diff>
      <filename>lib/ariel/node.rb</filename>
    </modified>
    <modified>
      <diff>@@ -3,7 +3,10 @@ require 'ariel/node'
 module Ariel
 
   # Each Node::Extracted has a name, a TokenStream and a structure which points to
-  # the relevant Node::Structure.
+  # the relevant Node::Structure. Skip straight to #search, #/ and #at for the
+  # query interface. This is strongly recommended over using the built in method
+  # accessors (a method isn't defined if a given field isn't extracted, so
+  # you're going to have to catch a lot of potential errors).
   class Node::Extracted &lt; Node
     attr_accessor :tokenstream, :structure_node
 
@@ -13,7 +16,7 @@ module Ariel
       @tokenstream=tokenstream
     end
 
-    # Returns the text contained in the extracted nodes TokenStream.
+    # Returns the text contained in the TokenStream.
     def extracted_text
       tokenstream.text
     end
@@ -71,6 +74,8 @@ module Ariel
     end
     alias :/ :search
 
+    # Acts exactly like #search, but returns only the first match or nil if
+    # there are no matches.
     def at(search_string)
       self.search(search_string).first
     end</diff>
      <filename>lib/ariel/node/extracted.rb</filename>
    </modified>
    <modified>
      <diff>@@ -16,8 +16,8 @@ module Ariel
 
     # Used to extend an already created Node. e.g.
     #  node.extend_structure do |r|
-    #    r.new_field1
-    #    r.new_field2
+    #    r.item :new_field1
+    #    r.item :new_field2
     #  end
     def extend_structure(&amp;block)
       yield self if block_given?
@@ -44,7 +44,7 @@ module Ariel
       return extractions
     end
 
-    # Applies the extraction rules stored in the current StructureNode and all its
+    # Applies the extraction rules stored in the current Node::Structure and all its
     # descendant children.
     def apply_extraction_tree_on(root_node, extract_labels=false)
       extraction_queue = [root_node]
@@ -62,6 +62,18 @@ module Ariel
       return root_node
     end
 
+    # Use when defining any object that occurs once. #list is a synonym, but
+    # it's recommended you use it when defining a container for list_items. The
+    # children of a list_item are just items. e.g.
+    # &lt;tt&gt;structure = Ariel::Node::Structure.new do |r|
+    #   r.list :comments do |c|  # r.item :comments would be equivalent, but less readable
+    #     c.list_item :comment do |c|
+    #       c.item :author  # Now these are just normal items, as they are extracted once from their parent
+    #       c.item :date
+    #       c.item :body
+    #     end
+    #   end
+    # end
     def item(name, &amp;block)
       self.add_child(Node::Structure.new(name, &amp;block))
     end
@@ -69,6 +81,8 @@ module Ariel
     # people probably still prefer to call a list a list.
     alias :list :item
 
+    # See the docs for #item for a discussion of when to use #item and when to
+    # use #list_item.
     def list_item(name, &amp;block)
       self.add_child(Node::Structure.new(name, :list_item, &amp;block))
     end</diff>
      <filename>lib/ariel/node/structure.rb</filename>
    </modified>
    <modified>
      <diff>@@ -186,5 +186,9 @@ module Ariel
       end
       token_locs.sort_by {|token_loc| (label_index-token_loc).abs}.first
     end
+
+    def self.clear_cache
+      @@cache.clear
+    end
   end
 end</diff>
      <filename>lib/ariel/rule.rb</filename>
    </modified>
    <modified>
      <diff>@@ -36,9 +36,10 @@ module Ariel
       @start_loc &lt;=&gt; t.start_loc
     end
       
-    # Accepts either a string or symbol representing a wildcard in
-    # Wildcards#list. Returns true if the whole Token is consumed by the wildcard or the
-    # string is equal to Token#text, and false if the match fails. Raises an
+    # Accepts either a string a symbol representing a wildcard in
+    # Wildcards#list or an an arbitrary regex. Returns true if the
+    # whole Token is consumed by the wildcard or the string is equal
+    # to Token#text, and false if the match fails. Raises an
     # error if the passed symbol is not a member of Wildcards#list.
     def matches?(landmark)
       if landmark.kind_of? Symbol or landmark.kind_of? Regexp
@@ -65,7 +66,7 @@ module Ariel
       return Wildcards.matching(self.text)
     end
 
-    # Redefined for caching purposes
+    # Redefined for caching purposes. This proved to be too slow.
 #    def hash
 #      [@text, @start_loc, @end_loc, @label_tag].hash
 #    end</diff>
      <filename>lib/ariel/token.rb</filename>
    </modified>
    <modified>
      <diff>@@ -129,7 +129,7 @@ module Ariel
       end
     end
 
-    # Returns all text represented by the instance's stored tokens it will not
+    # Returns all text represented by the instance's stored tokens. It will not
     # strip label tags even if the stream is marked to contain them. However,
     # you should not expect to get the raw_text once any label_tags have been
     # filtered (TokenStream#remove_label_tags).</diff>
      <filename>lib/ariel/token_stream.rb</filename>
    </modified>
    <modified>
      <diff>@@ -1,7 +1,6 @@
 require 'ariel'
 require 'fixtures'
 include Fixtures
-require 'breakpoint'
 
 context &quot;Querying LabelUtils for label tag locating Regular Expressions&quot; do
   specify &quot;label_regex should return an array of two Regexp to locate a start tag or an end tag with the given tag contents&quot; do</diff>
      <filename>test/specs/label_utils_spec.rb</filename>
    </modified>
    <modified>
      <diff>@@ -1,5 +1,4 @@
 require 'ariel'
-require 'breakpoint'
 
 context &quot;A new Node::Extracted&quot; do
   setup do</diff>
      <filename>test/specs/node_extracted_spec.rb</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>7b908a32f97cd9a68fbb878d56d703959a379fc4</id>
    </parent>
  </parents>
  <author>
    <name>Alex Bradbury</name>
    <email>rforge @nospam@ tekcentral.org</email>
  </author>
  <url>http://github.com/jashmenn/ariel/commit/0448ffb726459945d31ef3a547ecc9d5056434b0</url>
  <id>0448ffb726459945d31ef3a547ecc9d5056434b0</id>
  <committed-date>2006-08-20T15:27:32-07:00</committed-date>
  <authored-date>2006-08-20T15:27:32-07:00</authored-date>
  <message>More changes and fixups before release. Mostly docs, stray debugging lines.</message>
  <tree>c6bc79e71e64cd914fecc22af8fa39c1ed2425ff</tree>
  <committer>
    <name>Alex Bradbury</name>
    <email>rforge @nospam@ tekcentral.org</email>
  </committer>
</commit>
