/
faster_csv.rb
2012 lines (1840 loc) · 72.7 KB
/
faster_csv.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/local/bin/ruby -w
# = faster_csv.rb -- Faster CSV Reading and Writing
#
# Created by James Edward Gray II on 2005-10-31.
# Copyright 2005 Gray Productions. All rights reserved.
#
# See FasterCSV for documentation.
if RUBY_VERSION >= "1.9"
class FasterCSV
def self.const_missing(*_)
raise NotImplementedError, "Please switch to Ruby 1.9's standard CSV " +
"library. It's FasterCSV plus support for " +
"Ruby 1.9's m17n encoding engine."
end
def self.method_missing(*_)
const_missing
end
def method_missing(*_)
self.class.const_missing
end
end
else
require "forwardable"
require "English"
require "enumerator"
require "date"
require "stringio"
#
# This class provides a complete interface to CSV files and data. It offers
# tools to enable you to read and write to and from Strings or IO objects, as
# needed.
#
# == Reading
#
# === From a File
#
# ==== A Line at a Time
#
# FasterCSV.foreach("path/to/file.csv") do |row|
# # use row here...
# end
#
# ==== All at Once
#
# arr_of_arrs = FasterCSV.read("path/to/file.csv")
#
# === From a String
#
# ==== A Line at a Time
#
# FasterCSV.parse("CSV,data,String") do |row|
# # use row here...
# end
#
# ==== All at Once
#
# arr_of_arrs = FasterCSV.parse("CSV,data,String")
#
# == Writing
#
# === To a File
#
# FasterCSV.open("path/to/file.csv", "w") do |csv|
# csv << ["row", "of", "CSV", "data"]
# csv << ["another", "row"]
# # ...
# end
#
# === To a String
#
# csv_string = FasterCSV.generate do |csv|
# csv << ["row", "of", "CSV", "data"]
# csv << ["another", "row"]
# # ...
# end
#
# == Convert a Single Line
#
# csv_string = ["CSV", "data"].to_csv # to CSV
# csv_array = "CSV,String".parse_csv # from CSV
#
# == Shortcut Interface
#
# FCSV { |csv_out| csv_out << %w{my data here} } # to $stdout
# FCSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
# FCSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
# FCSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
#
# == Advanced Usage
#
# === Wrap an IO Object
#
# csv = FCSV.new(io, options)
# # ... read (with gets() or each()) from and write (with <<) to csv here ...
#
class FasterCSV
# The version of the installed library.
VERSION = "1.5.5".freeze
#
# A FasterCSV::Row is part Array and part Hash. It retains an order for the
# fields and allows duplicates just as an Array would, but also allows you to
# access fields by name just as you could if they were in a Hash.
#
# All rows returned by FasterCSV will be constructed from this class, if
# header row processing is activated.
#
class Row
#
# Construct a new FasterCSV::Row from +headers+ and +fields+, which are
# expected to be Arrays. If one Array is shorter than the other, it will be
# padded with +nil+ objects.
#
# The optional +header_row+ parameter can be set to +true+ to indicate, via
# FasterCSV::Row.header_row?() and FasterCSV::Row.field_row?(), that this is
# a header row. Otherwise, the row is assumes to be a field row.
#
# A FasterCSV::Row object supports the following Array methods through
# delegation:
#
# * empty?()
# * length()
# * size()
#
def initialize(headers, fields, header_row = false)
@header_row = header_row
# handle extra headers or fields
@row = if headers.size > fields.size
headers.zip(fields)
else
fields.zip(headers).map { |pair| pair.reverse }
end
end
# Internal data format used to compare equality.
attr_reader :row
protected :row
### Array Delegation ###
extend Forwardable
def_delegators :@row, :empty?, :length, :size
# Returns +true+ if this is a header row.
def header_row?
@header_row
end
# Returns +true+ if this is a field row.
def field_row?
not header_row?
end
# Returns the headers of this row.
def headers
@row.map { |pair| pair.first }
end
#
# :call-seq:
# field( header )
# field( header, offset )
# field( index )
#
# This method will fetch the field value by +header+ or +index+. If a field
# is not found, +nil+ is returned.
#
# When provided, +offset+ ensures that a header match occurrs on or later
# than the +offset+ index. You can use this to find duplicate headers,
# without resorting to hard-coding exact indices.
#
def field(header_or_index, minimum_index = 0)
# locate the pair
finder = header_or_index.is_a?(Integer) ? :[] : :assoc
pair = @row[minimum_index..-1].send(finder, header_or_index)
# return the field if we have a pair
pair.nil? ? nil : pair.last
end
alias_method :[], :field
#
# :call-seq:
# []=( header, value )
# []=( header, offset, value )
# []=( index, value )
#
# Looks up the field by the semantics described in FasterCSV::Row.field()
# and assigns the +value+.
#
# Assigning past the end of the row with an index will set all pairs between
# to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
# pair.
#
def []=(*args)
value = args.pop
if args.first.is_a? Integer
if @row[args.first].nil? # extending past the end with index
@row[args.first] = [nil, value]
@row.map! { |pair| pair.nil? ? [nil, nil] : pair }
else # normal index assignment
@row[args.first][1] = value
end
else
index = index(*args)
if index.nil? # appending a field
self << [args.first, value]
else # normal header assignment
@row[index][1] = value
end
end
end
#
# :call-seq:
# <<( field )
# <<( header_and_field_array )
# <<( header_and_field_hash )
#
# If a two-element Array is provided, it is assumed to be a header and field
# and the pair is appended. A Hash works the same way with the key being
# the header and the value being the field. Anything else is assumed to be
# a lone field which is appended with a +nil+ header.
#
# This method returns the row for chaining.
#
def <<(arg)
if arg.is_a?(Array) and arg.size == 2 # appending a header and name
@row << arg
elsif arg.is_a?(Hash) # append header and name pairs
arg.each { |pair| @row << pair }
else # append field value
@row << [nil, arg]
end
self # for chaining
end
#
# A shortcut for appending multiple fields. Equivalent to:
#
# args.each { |arg| faster_csv_row << arg }
#
# This method returns the row for chaining.
#
def push(*args)
args.each { |arg| self << arg }
self # for chaining
end
#
# :call-seq:
# delete( header )
# delete( header, offset )
# delete( index )
#
# Used to remove a pair from the row by +header+ or +index+. The pair is
# located as described in FasterCSV::Row.field(). The deleted pair is
# returned, or +nil+ if a pair could not be found.
#
def delete(header_or_index, minimum_index = 0)
if header_or_index.is_a? Integer # by index
@row.delete_at(header_or_index)
elsif i = index(header_or_index, minimum_index) # by header
@row.delete_at(i)
else
[ ]
end
end
#
# The provided +block+ is passed a header and field for each pair in the row
# and expected to return +true+ or +false+, depending on whether the pair
# should be deleted.
#
# This method returns the row for chaining.
#
def delete_if(&block)
@row.delete_if(&block)
self # for chaining
end
#
# This method accepts any number of arguments which can be headers, indices,
# Ranges of either, or two-element Arrays containing a header and offset.
# Each argument will be replaced with a field lookup as described in
# FasterCSV::Row.field().
#
# If called with no arguments, all fields are returned.
#
def fields(*headers_and_or_indices)
if headers_and_or_indices.empty? # return all fields--no arguments
@row.map { |pair| pair.last }
else # or work like values_at()
headers_and_or_indices.inject(Array.new) do |all, h_or_i|
all + if h_or_i.is_a? Range
index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
index(h_or_i.begin)
index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
index(h_or_i.end)
new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
(index_begin..index_end)
fields.values_at(new_range)
else
[field(*Array(h_or_i))]
end
end
end
end
alias_method :values_at, :fields
#
# :call-seq:
# index( header )
# index( header, offset )
#
# This method will return the index of a field with the provided +header+.
# The +offset+ can be used to locate duplicate header names, as described in
# FasterCSV::Row.field().
#
def index(header, minimum_index = 0)
# find the pair
index = headers[minimum_index..-1].index(header)
# return the index at the right offset, if we found one
index.nil? ? nil : index + minimum_index
end
# Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
def header?(name)
headers.include? name
end
alias_method :include?, :header?
#
# Returns +true+ if +data+ matches a field in this row, and +false+
# otherwise.
#
def field?(data)
fields.include? data
end
include Enumerable
#
# Yields each pair of the row as header and field tuples (much like
# iterating over a Hash).
#
# Support for Enumerable.
#
# This method returns the row for chaining.
#
def each(&block)
@row.each(&block)
self # for chaining
end
#
# Returns +true+ if this row contains the same headers and fields in the
# same order as +other+.
#
def ==(other)
@row == other.row
end
#
# Collapses the row into a simple Hash. Be warning that this discards field
# order and clobbers duplicate fields.
#
def to_hash
# flatten just one level of the internal Array
Hash[*@row.inject(Array.new) { |ary, pair| ary.push(*pair) }]
end
#
# Returns the row as a CSV String. Headers are not used. Equivalent to:
#
# faster_csv_row.fields.to_csv( options )
#
def to_csv(options = Hash.new)
fields.to_csv(options)
end
alias_method :to_s, :to_csv
# A summary of fields, by header.
def inspect
str = "#<#{self.class}"
each do |header, field|
str << " #{header.is_a?(Symbol) ? header.to_s : header.inspect}:" <<
field.inspect
end
str << ">"
end
end
#
# A FasterCSV::Table is a two-dimensional data structure for representing CSV
# documents. Tables allow you to work with the data by row or column,
# manipulate the data, and even convert the results back to CSV, if needed.
#
# All tables returned by FasterCSV will be constructed from this class, if
# header row processing is activated.
#
class Table
#
# Construct a new FasterCSV::Table from +array_of_rows+, which are expected
# to be FasterCSV::Row objects. All rows are assumed to have the same
# headers.
#
# A FasterCSV::Table object supports the following Array methods through
# delegation:
#
# * empty?()
# * length()
# * size()
#
def initialize(array_of_rows)
@table = array_of_rows
@mode = :col_or_row
end
# The current access mode for indexing and iteration.
attr_reader :mode
# Internal data format used to compare equality.
attr_reader :table
protected :table
### Array Delegation ###
extend Forwardable
def_delegators :@table, :empty?, :length, :size
#
# Returns a duplicate table object, in column mode. This is handy for
# chaining in a single call without changing the table mode, but be aware
# that this method can consume a fair amount of memory for bigger data sets.
#
# This method returns the duplicate table for chaining. Don't chain
# destructive methods (like []=()) this way though, since you are working
# with a duplicate.
#
def by_col
self.class.new(@table.dup).by_col!
end
#
# Switches the mode of this table to column mode. All calls to indexing and
# iteration methods will work with columns until the mode is changed again.
#
# This method returns the table and is safe to chain.
#
def by_col!
@mode = :col
self
end
#
# Returns a duplicate table object, in mixed mode. This is handy for
# chaining in a single call without changing the table mode, but be aware
# that this method can consume a fair amount of memory for bigger data sets.
#
# This method returns the duplicate table for chaining. Don't chain
# destructive methods (like []=()) this way though, since you are working
# with a duplicate.
#
def by_col_or_row
self.class.new(@table.dup).by_col_or_row!
end
#
# Switches the mode of this table to mixed mode. All calls to indexing and
# iteration methods will use the default intelligent indexing system until
# the mode is changed again. In mixed mode an index is assumed to be a row
# reference while anything else is assumed to be column access by headers.
#
# This method returns the table and is safe to chain.
#
def by_col_or_row!
@mode = :col_or_row
self
end
#
# Returns a duplicate table object, in row mode. This is handy for chaining
# in a single call without changing the table mode, but be aware that this
# method can consume a fair amount of memory for bigger data sets.
#
# This method returns the duplicate table for chaining. Don't chain
# destructive methods (like []=()) this way though, since you are working
# with a duplicate.
#
def by_row
self.class.new(@table.dup).by_row!
end
#
# Switches the mode of this table to row mode. All calls to indexing and
# iteration methods will work with rows until the mode is changed again.
#
# This method returns the table and is safe to chain.
#
def by_row!
@mode = :row
self
end
#
# Returns the headers for the first row of this table (assumed to match all
# other rows). An empty Array is returned for empty tables.
#
def headers
if @table.empty?
Array.new
else
@table.first.headers
end
end
#
# In the default mixed mode, this method returns rows for index access and
# columns for header access. You can force the index association by first
# calling by_col!() or by_row!().
#
# Columns are returned as an Array of values. Altering that Array has no
# effect on the table.
#
def [](index_or_header)
if @mode == :row or # by index
(@mode == :col_or_row and index_or_header.is_a? Integer)
@table[index_or_header]
else # by header
@table.map { |row| row[index_or_header] }
end
end
#
# In the default mixed mode, this method assigns rows for index access and
# columns for header access. You can force the index association by first
# calling by_col!() or by_row!().
#
# Rows may be set to an Array of values (which will inherit the table's
# headers()) or a FasterCSV::Row.
#
# Columns may be set to a single value, which is copied to each row of the
# column, or an Array of values. Arrays of values are assigned to rows top
# to bottom in row major order. Excess values are ignored and if the Array
# does not have a value for each row the extra rows will receive a +nil+.
#
# Assigning to an existing column or row clobbers the data. Assigning to
# new columns creates them at the right end of the table.
#
def []=(index_or_header, value)
if @mode == :row or # by index
(@mode == :col_or_row and index_or_header.is_a? Integer)
if value.is_a? Array
@table[index_or_header] = Row.new(headers, value)
else
@table[index_or_header] = value
end
else # set column
if value.is_a? Array # multiple values
@table.each_with_index do |row, i|
if row.header_row?
row[index_or_header] = index_or_header
else
row[index_or_header] = value[i]
end
end
else # repeated value
@table.each do |row|
if row.header_row?
row[index_or_header] = index_or_header
else
row[index_or_header] = value
end
end
end
end
end
#
# The mixed mode default is to treat a list of indices as row access,
# returning the rows indicated. Anything else is considered columnar
# access. For columnar access, the return set has an Array for each row
# with the values indicated by the headers in each Array. You can force
# column or row mode using by_col!() or by_row!().
#
# You cannot mix column and row access.
#
def values_at(*indices_or_headers)
if @mode == :row or # by indices
( @mode == :col_or_row and indices_or_headers.all? do |index|
index.is_a?(Integer) or
( index.is_a?(Range) and
index.first.is_a?(Integer) and
index.last.is_a?(Integer) )
end )
@table.values_at(*indices_or_headers)
else # by headers
@table.map { |row| row.values_at(*indices_or_headers) }
end
end
#
# Adds a new row to the bottom end of this table. You can provide an Array,
# which will be converted to a FasterCSV::Row (inheriting the table's
# headers()), or a FasterCSV::Row.
#
# This method returns the table for chaining.
#
def <<(row_or_array)
if row_or_array.is_a? Array # append Array
@table << Row.new(headers, row_or_array)
else # append Row
@table << row_or_array
end
self # for chaining
end
#
# A shortcut for appending multiple rows. Equivalent to:
#
# rows.each { |row| self << row }
#
# This method returns the table for chaining.
#
def push(*rows)
rows.each { |row| self << row }
self # for chaining
end
#
# Removes and returns the indicated column or row. In the default mixed
# mode indices refer to rows and everything else is assumed to be a column
# header. Use by_col!() or by_row!() to force the lookup.
#
def delete(index_or_header)
if @mode == :row or # by index
(@mode == :col_or_row and index_or_header.is_a? Integer)
@table.delete_at(index_or_header)
else # by header
@table.map { |row| row.delete(index_or_header).last }
end
end
#
# Removes any column or row for which the block returns +true+. In the
# default mixed mode or row mode, iteration is the standard row major
# walking of rows. In column mode, interation will +yield+ two element
# tuples containing the column name and an Array of values for that column.
#
# This method returns the table for chaining.
#
def delete_if(&block)
if @mode == :row or @mode == :col_or_row # by index
@table.delete_if(&block)
else # by header
to_delete = Array.new
headers.each_with_index do |header, i|
to_delete << header if block[[header, self[header]]]
end
to_delete.map { |header| delete(header) }
end
self # for chaining
end
include Enumerable
#
# In the default mixed mode or row mode, iteration is the standard row major
# walking of rows. In column mode, interation will +yield+ two element
# tuples containing the column name and an Array of values for that column.
#
# This method returns the table for chaining.
#
def each(&block)
if @mode == :col
headers.each { |header| block[[header, self[header]]] }
else
@table.each(&block)
end
self # for chaining
end
# Returns +true+ if all rows of this table ==() +other+'s rows.
def ==(other)
@table == other.table
end
#
# Returns the table as an Array of Arrays. Headers will be the first row,
# then all of the field rows will follow.
#
def to_a
@table.inject([headers]) do |array, row|
if row.header_row?
array
else
array + [row.fields]
end
end
end
#
# Returns the table as a complete CSV String. Headers will be listed first,
# then all of the field rows.
#
# This method assumes you want the Table.headers(), unless you explicitly
# pass <tt>:write_headers => false</tt>.
#
def to_csv(options = Hash.new)
wh = options.fetch(:write_headers, true)
@table.inject(wh ? [headers.to_csv(options)] : [ ]) do |rows, row|
if row.header_row?
rows
else
rows + [row.fields.to_csv(options)]
end
end.join
end
alias_method :to_s, :to_csv
def inspect
"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
end
end
# The error thrown when the parser encounters illegal CSV formatting.
class MalformedCSVError < RuntimeError; end
#
# A FieldInfo Struct contains details about a field's position in the data
# source it was read from. FasterCSV will pass this Struct to some blocks
# that make decisions based on field structure. See
# FasterCSV.convert_fields() for an example.
#
# <b><tt>index</tt></b>:: The zero-based index of the field in its row.
# <b><tt>line</tt></b>:: The line of the data source this row is from.
# <b><tt>header</tt></b>:: The header for the column, when available.
#
FieldInfo = Struct.new(:index, :line, :header)
# A Regexp used to find and convert some common Date formats.
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
\d{4}-\d{2}-\d{2} )\z /x
# A Regexp used to find and convert some common DateTime formats.
DateTimeMatcher =
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
#
# This Hash holds the built-in converters of FasterCSV that can be accessed by
# name. You can select Converters with FasterCSV.convert() or through the
# +options+ Hash passed to FasterCSV::new().
#
# <b><tt>:integer</tt></b>:: Converts any field Integer() accepts.
# <b><tt>:float</tt></b>:: Converts any field Float() accepts.
# <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt>
# and <tt>:float</tt>.
# <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts.
# <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts.
# <b><tt>:all</tt></b>:: All built-in converters. A combination of
# <tt>:date_time</tt> and <tt>:numeric</tt>.
#
# This Hash is intetionally left unfrozen and users should feel free to add
# values to it that can be accessed by all FasterCSV objects.
#
# To add a combo field, the value should be an Array of names. Combo fields
# can be nested with other combo fields.
#
Converters = { :integer => lambda { |f| Integer(f) rescue f },
:float => lambda { |f| Float(f) rescue f },
:numeric => [:integer, :float],
:date => lambda { |f|
f =~ DateMatcher ? (Date.parse(f) rescue f) : f
},
:date_time => lambda { |f|
f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
},
:all => [:date_time, :numeric] }
#
# This Hash holds the built-in header converters of FasterCSV that can be
# accessed by name. You can select HeaderConverters with
# FasterCSV.header_convert() or through the +options+ Hash passed to
# FasterCSV::new().
#
# <b><tt>:downcase</tt></b>:: Calls downcase() on the header String.
# <b><tt>:symbol</tt></b>:: The header String is downcased, spaces are
# replaced with underscores, non-word characters
# are dropped, and finally to_sym() is called.
#
# This Hash is intetionally left unfrozen and users should feel free to add
# values to it that can be accessed by all FasterCSV objects.
#
# To add a combo field, the value should be an Array of names. Combo fields
# can be nested with other combo fields.
#
HeaderConverters = {
:downcase => lambda { |h| h.downcase },
:symbol => lambda { |h|
h.downcase.tr(" ", "_").delete("^a-z0-9_").to_sym
}
}
#
# The options used when no overrides are given by calling code. They are:
#
# <b><tt>:col_sep</tt></b>:: <tt>","</tt>
# <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
# <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
# <b><tt>:converters</tt></b>:: +nil+
# <b><tt>:unconverted_fields</tt></b>:: +nil+
# <b><tt>:headers</tt></b>:: +false+
# <b><tt>:return_headers</tt></b>:: +false+
# <b><tt>:header_converters</tt></b>:: +nil+
# <b><tt>:skip_blanks</tt></b>:: +false+
# <b><tt>:force_quotes</tt></b>:: +false+
#
DEFAULT_OPTIONS = { :col_sep => ",",
:row_sep => :auto,
:quote_char => '"',
:converters => nil,
:unconverted_fields => nil,
:headers => false,
:return_headers => false,
:header_converters => nil,
:skip_blanks => false,
:force_quotes => false }.freeze
#
# This method will build a drop-in replacement for many of the standard CSV
# methods. It allows you to write code like:
#
# begin
# require "faster_csv"
# FasterCSV.build_csv_interface
# rescue LoadError
# require "csv"
# end
# # ... use CSV here ...
#
# This is not a complete interface with completely identical behavior.
# However, it is intended to be close enough that you won't notice the
# difference in most cases. CSV methods supported are:
#
# * foreach()
# * generate_line()
# * open()
# * parse()
# * parse_line()
# * readlines()
#
# Be warned that this interface is slower than vanilla FasterCSV due to the
# extra layer of method calls. Depending on usage, this can slow it down to
# near CSV speeds.
#
def self.build_csv_interface
Object.const_set(:CSV, Class.new).class_eval do
def self.foreach(path, rs = :auto, &block) # :nodoc:
FasterCSV.foreach(path, :row_sep => rs, &block)
end
def self.generate_line(row, fs = ",", rs = "") # :nodoc:
FasterCSV.generate_line(row, :col_sep => fs, :row_sep => rs)
end
def self.open(path, mode, fs = ",", rs = :auto, &block) # :nodoc:
if block and mode.include? "r"
FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs) do |csv|
csv.each(&block)
end
else
FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs, &block)
end
end
def self.parse(str_or_readable, fs = ",", rs = :auto, &block) # :nodoc:
FasterCSV.parse(str_or_readable, :col_sep => fs, :row_sep => rs, &block)
end
def self.parse_line(src, fs = ",", rs = :auto) # :nodoc:
FasterCSV.parse_line(src, :col_sep => fs, :row_sep => rs)
end
def self.readlines(path, rs = :auto) # :nodoc:
FasterCSV.readlines(path, :row_sep => rs)
end
end
end
#
# This method allows you to serialize an Array of Ruby objects to a String or
# File of CSV data. This is not as powerful as Marshal or YAML, but perhaps
# useful for spreadsheet and database interaction.
#
# Out of the box, this method is intended to work with simple data objects or
# Structs. It will serialize a list of instance variables and/or
# Struct.members().
#
# If you need need more complicated serialization, you can control the process
# by adding methods to the class to be serialized.
#
# A class method csv_meta() is responsible for returning the first row of the
# document (as an Array). This row is considered to be a Hash of the form
# key_1,value_1,key_2,value_2,... FasterCSV::load() expects to find a class
# key with a value of the stringified class name and FasterCSV::dump() will
# create this, if you do not define this method. This method is only called
# on the first object of the Array.
#
# The next method you can provide is an instance method called csv_headers().
# This method is expected to return the second line of the document (again as
# an Array), which is to be used to give each column a header. By default,
# FasterCSV::load() will set an instance variable if the field header starts
# with an @ character or call send() passing the header as the method name and
# the field value as an argument. This method is only called on the first
# object of the Array.
#
# Finally, you can provide an instance method called csv_dump(), which will
# be passed the headers. This should return an Array of fields that can be
# serialized for this object. This method is called once for every object in
# the Array.
#
# The +io+ parameter can be used to serialize to a File, and +options+ can be
# anything FasterCSV::new() accepts.
#
def self.dump(ary_of_objs, io = "", options = Hash.new)
obj_template = ary_of_objs.first
csv = FasterCSV.new(io, options)
# write meta information
begin
csv << obj_template.class.csv_meta
rescue NoMethodError
csv << [:class, obj_template.class]
end
# write headers
begin
headers = obj_template.csv_headers
rescue NoMethodError
headers = obj_template.instance_variables.sort
if obj_template.class.ancestors.find { |cls| cls.to_s =~ /\AStruct\b/ }
headers += obj_template.members.map { |mem| "#{mem}=" }.sort
end
end
csv << headers
# serialize each object
ary_of_objs.each do |obj|
begin
csv << obj.csv_dump(headers)
rescue NoMethodError
csv << headers.map do |var|
if var[0] == ?@
obj.instance_variable_get(var)
else
obj[var[0..-2]]
end
end
end
end
if io.is_a? String
csv.string
else
csv.close
end
end
#
# :call-seq:
# filter( options = Hash.new ) { |row| ... }
# filter( input, options = Hash.new ) { |row| ... }
# filter( input, output, options = Hash.new ) { |row| ... }
#
# This method is a convenience for building Unix-like filters for CSV data.
# Each row is yielded to the provided block which can alter it as needed.
# After the block returns, the row is appended to +output+ altered or not.
#
# The +input+ and +output+ arguments can be anything FasterCSV::new() accepts
# (generally String or IO objects). If not given, they default to
# <tt>ARGF</tt> and <tt>$stdout</tt>.