Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Addressing blank row issues.

git-svn-id: svn+ssh://rubyforge.org/var/svn/fastercsv/trunk@38 e365412f-4209-0410-a6b9-d67c64ae9f52
  • Loading branch information...
commit bc16bc05df2c783ec2f0d038b7764d84f5ab1a53 1 parent d0c29bc
@JEG2 authored
View
9 CHANGELOG
@@ -2,6 +2,15 @@
Below is a complete listing of changes for each revision of FasterCSV.
+== 1.1.0
+
+* Added empty?(), length(), and size() methods to FasterCSV::Row and
+ FasterCSV::Table.
+* Cleaned up bug fix regression tests.
+* Fixed bug causing Arrays to be returned for blank rows when header processing
+ is active.
+* Added a <tt>:skip_blanks</tt> option for ignoring empty rows.
+
== 1.0.0
* Fixed FasterCSV.rewind() to reset the FasterCSV.lineno() counter.
View
17 TODO
@@ -3,19 +3,4 @@
The following is a list of planned expansions for FasterCSV, in no particular
order.
- * I'm using FasterCSV to parse CSV files from different sources, and
- occasionally the source (which I don't control) outputs
- a blank line.
-
- When doing "FasterCSV.foreach(file, :headers => :first_row) do |row|" the
- "row" variable will become an Array object when it encounters a blank line.
- Within the loop I do something like "next if
- row.respond_to?(:empty?) && row.empty?" to skip the row if its blank, but I
- think of this more as a hack.
-
- It would be nice if FasterCSV.foreach would skip over the row instead of
- instantiating an Array object.
-
- Thanks,
-
- Dan Kubb
+ * Rent this space...
View
57 lib/faster_csv.rb
@@ -75,7 +75,7 @@
#
class FasterCSV
# The version of the installed library.
- VERSION = "1.0.0".freeze
+ VERSION = "1.1.0".freeze
#
# A FasterCSV::Row is part Array and part Hash. It retains an order for the
@@ -95,6 +95,13 @@ class Row
# FasterCSV::Row.header_row?() and FasterCSV::Row.field_row?(), that this is
# a header row. Otherwise, the row is assumes to be a field row.
#
+ # A FasterCSV::Row object supports the following Array methods through
+ # delegation:
+ #
+ # * empty?()
+ # * length()
+ # * size()
+ #
def initialize(headers, fields, header_row = false)
@header_row = header_row
@@ -109,6 +116,11 @@ def initialize(headers, fields, header_row = false)
# Internal data format used to compare equality.
attr_reader :row
protected :row
+
+ ### Array Delegation ###
+
+ extend Forwardable
+ def_delegators :@row, :empty?, :length, :size
# Returns +true+ if this is a header row.
def header_row?
@@ -367,6 +379,13 @@ class Table
# to be FasterCSV::Row objects. All rows are assumed to have the same
# headers.
#
+ # A FasterCSV::Table object supports the following Array methods through
+ # delegation:
+ #
+ # * empty?()
+ # * length()
+ # * size()
+ #
def initialize(array_of_rows)
@table = array_of_rows
@mode = :col_or_row
@@ -378,6 +397,11 @@ def initialize(array_of_rows)
# Internal data format used to compare equality.
attr_reader :table
protected :table
+
+ ### Array Delegation ###
+
+ extend Forwardable
+ def_delegators :@table, :empty?, :length, :size
#
# Returns a duplicate table object, in column mode. This is handy for
@@ -749,6 +773,7 @@ class MalformedCSVError < RuntimeError; end
# <b><tt>:headers</tt></b>:: +false+
# <b><tt>:return_headers</tt></b>:: +false+
# <b><tt>:header_converters</tt></b>:: +nil+
+ # <b><tt>:skip_blanks</tt></b>:: +false+
#
DEFAULT_OPTIONS = { :col_sep => ",",
:row_sep => :auto,
@@ -756,7 +781,8 @@ class MalformedCSVError < RuntimeError; end
:unconverted_fields => nil,
:headers => false,
:return_headers => false,
- :header_converters => nil }.freeze
+ :header_converters => nil,
+ :skip_blanks => false }.freeze
#
# This method will build a drop-in replacement for many of the standard CSV
@@ -1263,6 +1289,9 @@ def self.readlines(*args)
# <tt>:converters</tt> save that the
# conversions are only made to header
# rows.
+ # <b><tt>:skip_blanks</tt></b>:: When set to a +true+ value, FasterCSV
+ # will skip over any rows with no
+ # content.
#
# See FasterCSV::DEFAULT_OPTIONS for the default settings.
#
@@ -1289,14 +1318,14 @@ def initialize(data, options = Hash.new)
@lineno = 0
end
- ### IO and StringIO Delegation ###
-
#
# The line number of the last row read from this file. Fields with nested
# line-end characters will not affect this count.
#
attr_reader :lineno
+ ### IO and StringIO Delegation ###
+
extend Forwardable
def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
:eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
@@ -1432,11 +1461,11 @@ def shift
# handle headers not based on document content
if header_row? and @return_headers and
[Array, String].include? @use_headers.class
- if @unconverted_fields
- return add_unconverted_fields(parse_headers, Array.new)
- else
- return parse_headers
- end
+ if @unconverted_fields
+ return add_unconverted_fields(parse_headers, Array.new)
+ else
+ return parse_headers
+ end
end
# begin with a blank line, so we can always add to it
@@ -1459,8 +1488,13 @@ def shift
#
if parse.empty?
@lineno += 1
- if @unconverted_fields
+ if @skip_blanks
+ line = ""
+ next
+ elsif @unconverted_fields
return add_unconverted_fields(Array.new, Array.new)
+ elsif @use_headers
+ return FasterCSV::Row.new(Array.new, Array.new)
else
return Array.new
end
@@ -1581,6 +1615,9 @@ def init_separators(options)
# Pre-compiles parsers and stores them by name for access during reads.
def init_parsers(options)
+ # store the parser behaviors
+ @skip_blanks = options.delete(:skip_blanks)
+
# prebuild Regexps for faster parsing
@parsers = {
:leading_fields =>
View
32 test/tc_features.rb
@@ -98,23 +98,37 @@ def test_unknown_options
end
end
- def test_bug_fixes
- # failing to escape <tt>:col_sep</tt> (reported by Kev Jackson)
+ def test_skip_blanks
+ assert_equal(4, @csv.to_a.size)
+
+ @csv = FasterCSV.new(@sample_data, :skip_blanks => true)
+
+ count = 0
+ @csv.each do |row|
+ count += 1
+ assert_equal("line", row.first)
+ end
+ assert_equal(3, count)
+ end
+
+ # reported by Kev Jackson
+ def test_failing_to_escape_col_sep_bug_fix
assert_nothing_raised(Exception) do
FasterCSV.new(String.new, :col_sep => "|")
end
-
- # failing to reset header behavior on rewind() (reported by Chris Roos)
+ end
+
+ # reported by Chris Roos
+ def test_failing_to_reset_headers_in_rewind_bug_fix
csv = FasterCSV.new( "forename,surname", :headers => true,
:return_headers => true )
csv.each { |row| assert row.header_row? }
csv.rewind
csv.each { |row| assert row.header_row? }
-
- #
- # leading empty fields with multibyte col_sep raises MalformedCSVError
- # (reported by Dave Burt)
- #
+ end
+
+ # reported by Dave Burt
+ def test_leading_empty_fields_with_multibyte_col_sep_bug_fix
data = <<-END_DATA.gsub(/^\s+/, "")
<=><=>A<=>B<=>C
1<=>2<=>3
View
35 test/tc_headers.rb
@@ -224,4 +224,39 @@ def test_table_support
assert_instance_of(FasterCSV::Table, csv)
end
+
+ def test_skip_blanks
+ @data = <<-END_CSV.gsub(/^ +/, "")
+
+
+ A,B,C
+
+ 1,2,3
+
+
+
+ END_CSV
+
+ expected = [%w[1 2 3]]
+ FasterCSV.parse(@data, :headers => true, :skip_blanks => true) do |row|
+ assert_equal(expected.shift, row.fields)
+ end
+
+ expected = [%w[A B C], %w[1 2 3]]
+ FasterCSV.parse( @data,
+ :headers => true,
+ :return_headers => true,
+ :skip_blanks => true ) do |row|
+ assert_equal(expected.shift, row.fields)
+ end
+ end
+
+ def test_blank_row_bug_fix
+ @data += "\n#{@data}" # add a blank row
+
+ # ensure that everything returned is a Row object
+ FasterCSV.parse(@data, :headers => true) do |row|
+ assert_instance_of(FasterCSV::Row, row)
+ end
+ end
end
View
6 test/tc_row.rb
@@ -279,4 +279,10 @@ def test_to_csv
assert_equal( "1|2|3|4|\r\n",
@row.to_csv(:col_sep => "|", :row_sep => "\r\n") )
end
+
+ def test_array_delegation
+ assert(!@row.empty?, "Row was empty.")
+
+ assert_equal([@row.headers.size, @row.fields.size].max, @row.size)
+ end
end
View
6 test/tc_table.rb
@@ -382,4 +382,10 @@ def test_values_at
assert_equal([[1, 3], [4, 6], [7, 9]], @table.by_col.values_at(0, 2))
assert_equal(@rows.values_at(0, 2), @table.values_at(0, 2))
end
+
+ def test_array_delegation
+ assert(!@table.empty?, "Table was empty.")
+
+ assert_equal(@rows.size, @table.size)
+ end
end
Please sign in to comment.
Something went wrong with that request. Please try again.