Returning the parser to its strict quote checking behavior.

JEG2 · Mar 16, 2010 · 1fe73ae · 1fe73ae
1 parent 529effc
commit 1fe73ae
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 10 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -2,6 +2,11 @@
 
 Below is a complete listing of changes for each revision of FasterCSV.
 
+== 1.5.3
+
+* A bug fix from Timothy Elliott to return the new parser to its strict quote
+  tolerance.
+
 == 1.5.2
 
 * A bug fix to allow IO Exceptions to reach the calling code from Moses Hohman.

diff --git a/lib/faster_csv.rb b/lib/faster_csv.rb
@@ -82,7 +82,7 @@
 # 
 class FasterCSV
   # The version of the installed library.
-  VERSION = "1.5.2".freeze
+  VERSION = "1.5.3".freeze
 
   # 
   # A FasterCSV::Row is part Array and part Hash.  It retains an order for the
@@ -1614,12 +1614,14 @@ def shift
       parse.split(@col_sep, -1).each do |match|
         if current_field.empty? && match.count(@quote_and_newlines).zero?
           csv           << (match.empty? ? nil : match)
-        elsif(current_field.empty? ? match[0] : current_field[0]) == @quote_char[0]
+        elsif (current_field.empty? ? match[0] : current_field[0]) ==
+              @quote_char[0]
           current_field << match
           field_quotes += match.count(@quote_char)
           if field_quotes % 2 == 0
             in_quotes = current_field[@parsers[:quoted_field], 1]
-            raise MalformedCSVError unless in_quotes
+            raise MalformedCSVError if !in_quotes ||
+                                       in_quotes[@parsers[:stray_quote]]
             current_field = in_quotes
             current_field.gsub!(@quote_char * 2, @quote_char) # unescape contents
             csv           << current_field
@@ -1797,14 +1799,17 @@ def init_parsers(options)
     esc_row_sep = Regexp.escape(@row_sep)
     esc_quote   = Regexp.escape(@quote_char)
     @parsers = {
-      :any_field      => Regexp.new( "[^#{esc_col_sep}]+",
-                                     Regexp::MULTILINE,
-                                     @encoding ),
-      :quoted_field   => Regexp.new( "^#{esc_quote}(.*)#{esc_quote}$",
-                                     Regexp::MULTILINE,
-                                     @encoding ),
+      :any_field    => Regexp.new( "[^#{esc_col_sep}]+",
+                                   Regexp::MULTILINE,
+                                   @encoding ),
+      :quoted_field => Regexp.new( "^#{esc_quote}(.*)#{esc_quote}$",
+                                   Regexp::MULTILINE,
+                                   @encoding ),
+      :stray_quote  => Regexp.new( "[^#{esc_quote}]#{esc_quote}[^#{esc_quote}]",
+                                   Regexp::MULTILINE,
+                                   @encoding ),
       # safer than chomp!()
-      :line_end       => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
+      :line_end     => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
     }
   end
 

diff --git a/test/tc_csv_parsing.rb b/test/tc_csv_parsing.rb
@@ -114,6 +114,10 @@ def test_non_regex_edge_cases
     [["foo,\"foo,bar,baz,foo\",\"foo\"", ["foo", "foo,bar,baz,foo", "foo"]]].each do |edge_case|
       assert_equal(edge_case.last, FasterCSV.parse_line(edge_case.first))
     end
+
+    assert_raise(FasterCSV::MalformedCSVError) do
+      FasterCSV.parse_line("1,\"23\"4\"5\", 6")
+    end
   end
 
   def test_malformed_csv