<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -2,4 +2,7 @@
 *.so
 a.out
 Makefile
-
+ext/rmmseg
+doc
+*.gem
+!ext/rmmseg/*.{cpp,h}</diff>
      <filename>.gitignore</filename>
    </modified>
    <modified>
      <diff>@@ -20,7 +20,7 @@ module RMMSeg
       end
       
       def token_stream(field, text)
-        t = PunctuationFilter.new(Tokenizer.new(text))
+        t = Tokenizer.new(text)
         if @brk
           @brk.call(t)
         else
@@ -61,56 +61,5 @@ module RMMSeg
         @algor = Algorithm.new(@text)
       end
     end
-
-    # PunctuationFilter filter out the stand alone Chinese
-    # punctuation tokens.
-    class PunctuationFilter &lt; ::Ferret::Analysis::TokenStream
-      # The punctuation dictionary.
-      class Dictionary
-        include Singleton
-
-        DIC_FILE = File.join(File.dirname(__FILE__),
-                             &quot;..&quot;,
-                             &quot;..&quot;,
-                             &quot;data&quot;,
-                             &quot;punctuation.dic&quot;)
-        def initialize
-          @dic = Hash.new
-          File.open(DIC_FILE, &quot;r&quot;) do |f|
-            f.each_line { |line|
-              @dic[line.chomp.freeze] = nil
-            }
-          end
-        end
-
-        def include?(str)
-          @dic.has_key?(str)
-        end
-      end
-      
-      def initialize(stream)
-        @stream = stream
-      end
-
-      # Get next token, skip stand alone Chinese punctuations.
-      def next
-        token = @stream.next
-        dic = Dictionary.instance
-
-        until token.nil? || !(dic.include? token.text)
-          token = @stream.next
-        end
-
-        token
-      end
-
-      def text
-        @stream.text
-      end
-
-      def text=(str)
-        @stream.text = str
-      end
-    end
   end
 end</diff>
      <filename>lib/rmmseg/ferret.rb</filename>
    </modified>
    <modified>
      <diff>@@ -19,3 +19,4 @@ spec = Gem::Specification.new do |s|
                          '--line-numbers'
   s.extra_rdoc_files =   ['README']
 end
+</diff>
      <filename>rmmseg-cpp.gemspec</filename>
    </modified>
  </modified>
  <removed type="array">
    <removed>
      <filename>data/punctuation.dic</filename>
    </removed>
  </removed>
  <parents type="array">
    <parent>
      <id>1ad1d3ba8a3477b9145dd70fb5aef15cef851e6d</id>
    </parent>
  </parents>
  <author>
    <name>pluskid</name>
    <email>pluskid@gmail.com</email>
  </author>
  <url>http://github.com/pluskid/rmmseg-cpp/commit/9036639e199ac43be5af3d79ac8b5c4e9c3ccbd4</url>
  <id>9036639e199ac43be5af3d79ac8b5c4e9c3ccbd4</id>
  <committed-date>2008-05-22T04:17:54-07:00</committed-date>
  <authored-date>2008-05-22T04:17:34-07:00</authored-date>
  <message>Removed punctuation filter.

It is duplicated with Ferret Stopword filter.</message>
  <tree>a5943cafc4f5e82a3a9efff9cb6ca39495a87a1e</tree>
  <committer>
    <name>pluskid</name>
    <email>pluskid@gmail.com</email>
  </committer>
</commit>
