diff --git a/.gitignore b/.gitignore index 5ca49a2..67125b6 100644 --- a/.gitignore +++ b/.gitignore @@ -6,8 +6,5 @@ /doc/ /pkg/ /spec/reports/ -/tmp/ -*output.csv -*input.csv -*result.csv -*transformed.csv +tmp/* +!tmp/ diff --git a/.travis.yml b/.travis.yml index 31adce5..a9ee759 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,9 +2,9 @@ sudo: false language: ruby cache: bundler rvm: + - 2.0.0 - 2.1.0 - 2.2.0 - - 2.2.2 - 2.3.0 - - 2.3.3 -before_install: gem install bundler -v 1.13.6 + - 2.4.0 + - 2.5.0 diff --git a/README.md b/README.md index b2185fe..522f686 100644 --- a/README.md +++ b/README.md @@ -46,20 +46,20 @@ The headers will be picked up and used instead of the first line. These are the settings that will be merged with settings passed through either `SimpleCsv#generate` or `SimpleCsv#read` -| setting | value | -|----------------------|---------------------------------------| -|`:col_sep` | `","` | -|`:row_sep` | `:auto` | -|`:quote_char` | `"\"` | -|`:field_size_limit` | `nil` | -|`:converters` | `[:all, :blank_to_nil, :null_to_nil]` | -|`:unconverted_fields` | `nil` | -|`:headers` | `true` | -|`:return_headers` | `false` | -|`:header_converters` | `nil` | -|`:skip_blanks` | `false` | -|`:force_quotes` | `true` | -|`:skip_lines` | `nil` | +| setting | value | +| ---------------------- | --------------------------------------- | +| `:col_sep` | `","` | +| `:row_sep` | `:auto` | +| `:quote_char` | `"\"` | +| `:field_size_limit` | `nil` | +| `:converters` | `[:all, :blank_to_nil, :null_to_nil]` | +| `:unconverted_fields` | `nil` | +| `:headers` | `true` | +| `:return_headers` | `false` | +| `:header_converters` | `nil` | +| `:skip_blanks` | `false` | +| `:force_quotes` | `true` | +| `:skip_lines` | `nil` | The following settings differ from the `CSV::DEFAULT_OPTIONS` @@ -110,7 +110,12 @@ SimpleCsv.generate path, options = { ... }, &block The `SimpleCsv#generate` method takes a (required) path, an (optional) hash of options and a (required) block to start building a CSV file. To generate a CSV file we use `SimpleCsv#generate` (using the [faker](https://github.com/stympy/faker) gem to provide fake data) -While writing a row to a CSV, the value of a set property can be accessed by calling that property method again without arguments. (See the "inspect a value" comment in the following example) +This method passes any unknown method to its caller (`main Object` if none). If you need a reference to the instance of the current writer from within the block, it takes an optional argument: + +```ruby +``` + +While writing a row to a CSV, the value of a set property can be accessed by calling that property method again without arguments (See the "inspect a value" comment in the following example). ```ruby require 'faker' @@ -156,7 +161,7 @@ end ### Reading a CSV file without headers -Last but not least, if we have a CSV file that does not contain headers we can use the following setup. +If we have a CSV file that does not contain headers we can use the following setup. Setting `:has_headers` to `false` means we do not expect the first line to be headers. Therefore we have to explicitly define the headers before looping the CSV. @@ -172,6 +177,34 @@ SimpleCsv.read('headerless.csv', has_headers: false) do end ``` +### Transforming s CSV file + +When you want to alter or reduce the output of a given CSV file, `SimpleCsv#transform` can be used. +This allows you to apply call a block for each value in a specified column, you can also control the output headers to remove clutter from the input file. + +A transformation is defined by calling the header you wish to modify with a block that performs the modification. +In below example, a CSV with columns `:name`, `:username`, `:age` and `:interests` is assumed. The `:age` of every row +will be incremented because `age` was defined with the block. **Only** `headers` _and_ `output_headers` are supported within the transform block. + +```ruby +SimpleCsv.transform('people.csv', output: 'people2.csv') do + # define specific output headers, other columns will not be added to output csv file + output_headers :name, :username, :age, :interests + + # everyone got one year older, increase all ages. + age { |n| n + 1 } + + # replace all names with "#{name}_old". + name { |s| "#{name}_old" } +end +``` + +The above example will create a file called `people2.csv` that contains the result data. The original file is **not** destroyed by default. +There is one additional option for `SimpleCsv#transform` which is the `:output` option. +When this option not set, the returned file will have the same name as the input CSV followed by a timestamp +formatted in the following format: `[input_csv]-[%d-%m-%Y-%S&7N].csv` (`[input_csv]` will have `.csv` extension stripped and reapplied). +See Ruby's [`Time#strftime`](https://ruby-doc.org/core-2.5.0/Time.html) documentation for more information. + ### Batch operations If we have a large CSV we might want to batch operations (say, if we are inserting this data into a database or through an API). diff --git a/bin/console b/bin/console index a5961c0..254fef9 100755 --- a/bin/console +++ b/bin/console @@ -5,21 +5,25 @@ require 'pry' require 'faker' require 'simple_csv' -SimpleCsv.generate('sample.csv') do - headers :name, :age +# SimpleCsv.generate('sample.csv') do +# headers :name, :age - 10.times do - name Faker::Name.name - age Faker::Number.between(20, 120) - end -end +# 10.times do +# name Faker::Name.name +# age Faker::Number.between(20, 120) +# end +# end -SimpleCsv.transform('sample.csv', output: 'result.csv') do - output_headers :age +SimpleCsv.transform('spec/files/result.csv', output: 'result.csv') do + output_headers 'user name' - age { |n| n * 2 } + user_name { |n| n * 2 } end SimpleCsv.read('result.csv') do p headers + + each_row do + p headers.map { |h| send(h) } + end end diff --git a/lib/simple_csv/base.rb b/lib/simple_csv/base.rb index e10c4c6..44bcc01 100644 --- a/lib/simple_csv/base.rb +++ b/lib/simple_csv/base.rb @@ -42,14 +42,13 @@ def first_line @first_line ||= File.open @csv_path, &:readline end - def headers? @headers_set end - def alias_to_friendly_headers + def alias_to_friendly_headers(names = @headers) @col_map ||= {} - aliasses = headers.each_with_object({}) do |hdr, h| + aliasses = names.each_with_object({}) do |hdr, h| n = hdr.to_s.strip.gsub(/([a-z])([A-Z])/, '\1_\2').downcase .gsub(/[^\w]|\s/, '_') h[n] = hdr unless @col_map.key? n diff --git a/lib/simple_csv/transformer.rb b/lib/simple_csv/transformer.rb index f79d05c..e30dc58 100644 --- a/lib/simple_csv/transformer.rb +++ b/lib/simple_csv/transformer.rb @@ -22,13 +22,14 @@ def output_headers(*out_headers) return @output_headers if @output_headers.any? @output_headers = out_headers.map(&:to_s) + alias_to_friendly_headers @output_headers + @output_headers end private def apply_transforms(path, **opts) received_headers = headers - transformations = @transforms timestamp = Time.new.strftime '%d-%m-%Y-%S%7N' output_path = opts.delete(:output) || "#{path.split('.')[0..-2].join}-#{timestamp}.csv" output_headers = @output_headers.any? ? @output_headers : received_headers @@ -39,7 +40,7 @@ def apply_transforms(path, **opts) reader.each_row do output_headers.each do |column| - transform = transformations[column.to_sym] + transform = find_transform column result = transform ? transform.call(reader.send(column)) : reader.send(column) @@ -50,8 +51,14 @@ def apply_transforms(path, **opts) end end + def find_transform(column) + @transforms[(@col_map.key(column.to_s) || column).to_sym] + end + def method_missing(mtd, *args, &block) - if headers.include?(mtd.to_s) || @output_headers.include?(mtd.to_s) + mstr = mtd.to_s + + if headers.include?(mstr) || @output_headers.include?(mstr) || @col_map.key?(mstr) @transforms[mtd] = block || args.first unless @transforms.key? mtd else @caller_self.send mtd, *args, &block diff --git a/simple_csv.gemspec b/simple_csv.gemspec index 69ebac3..0bb7f6f 100644 --- a/simple_csv.gemspec +++ b/simple_csv.gemspec @@ -24,9 +24,9 @@ Gem::Specification.new do |spec| spec.required_ruby_version = '>= 2.1' - spec.add_development_dependency 'bundler', '~> 1.13' - spec.add_development_dependency 'rake', '~> 10.0' - spec.add_development_dependency 'rspec', '~> 3.0' + spec.add_development_dependency 'bundler' + spec.add_development_dependency 'rake' + spec.add_development_dependency 'rspec' spec.add_development_dependency 'pry' spec.add_development_dependency 'pry-byebug' spec.add_development_dependency 'faker' diff --git a/spec/files/output.csv b/spec/files/output.csv deleted file mode 100644 index 778a6ec..0000000 --- a/spec/files/output.csv +++ /dev/null @@ -1,101 +0,0 @@ -"first_name","last_name","birth_date","employed_at" -"foo","bar","1933-08-21","" -"foo","bar","1933-08-21","" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" -"foo","bar","1933-08-21","bizz" diff --git a/spec/simple_csv_spec.rb b/spec/simple_csv_spec.rb index 8440116..d097f79 100644 --- a/spec/simple_csv_spec.rb +++ b/spec/simple_csv_spec.rb @@ -1,16 +1,15 @@ require 'spec_helper' describe SimpleCsv do - describe SimpleCsv::Writer do - it 'can generate a CSV file' do - csv_path = Helpers.generate_csv('spec/files/result.csv', rows: 100) + it 'generates a CSV file' do + csv_path = Helpers.generate_csv('tmp/result.csv', rows: 10) expect(CSV.open(csv_path)).to be_instance_of(CSV) end it 'converts code-unfriendly headers to callable method aliasses' do expect do - SimpleCsv.generate('spec/files/result.csv') do + SimpleCsv.generate('tmp/result.csv') do headers 'user name' user_name 'sidofc' @@ -19,7 +18,7 @@ end it 'headers return values when called without arguments' do - SimpleCsv.generate('spec/files/result.csv') do + SimpleCsv.generate('tmp/result.csv') do headers 'user name' user_name 'sidofc' @@ -30,7 +29,7 @@ it 'raises RowNotComplete if a property is called twice in the same loop' do expect do - SimpleCsv.generate('spec/files/output.csv') do + SimpleCsv.generate('tmp/output.csv') do headers :username, :email, :age username 'sidofc' @@ -42,11 +41,11 @@ end describe SimpleCsv::Reader do - it 'read CSV files delimited by ",", ";" or "|"' do - %w(, ; |).each do |sep| + %w(, ; |).each do |sep| + it "reads CSV files delimited by \"#{sep}\"" do res = [] - csv_path = Helpers.generate_csv('spec/files/result.csv', rows: 100, - seperator: sep) + csv_path = Helpers.generate_csv 'tmp/result.csv', + rows: 10, seperator: sep SimpleCsv.read(csv_path) do headers(*Helpers::HEADERS) each_row { res = res.concat(headers.map { |mtd| send(mtd) }) } @@ -60,20 +59,22 @@ end it 'headers return values when called without arguments' do - SimpleCsv.generate('spec/files/result.csv') do + SimpleCsv.generate('tmp/result.csv') do headers 'First name' first_name 'foo' + + expect(first_name).to eq 'foo' end - SimpleCsv.read('spec/files/result.csv') do + SimpleCsv.read('tmp/result.csv') do each_row { expect(first_name).to eq 'foo' } end end - it 'can detect headers automatically' do + it 'detects headers automatically' do # if the file has_headers res = [] - csv_path = Helpers.generate_csv('spec/files/result.csv', rows: 100) + csv_path = Helpers.generate_csv('tmp/result.csv', rows: 10) SimpleCsv.read(csv_path) do each_row { res = res.concat(headers.map { |mtd| send(mtd) }) } end @@ -84,7 +85,7 @@ it 'allows aliassing headers' do # if the file has_headers - csv_path = Helpers.generate_csv('spec/files/result.csv', rows: 100) + csv_path = Helpers.generate_csv('tmp/result.csv', rows: 10) SimpleCsv.read(csv_path) do headers :first_name, first_name: :aliassed_method @@ -122,14 +123,14 @@ describe SimpleCsv::Transformer do it 'can transform a CSV' do - csv_path = Helpers.generate_csv('spec/files/result.csv', rows: 100, age: 40, first_name: 'hello') + csv_path = Helpers.generate_csv('tmp/result.csv', rows: 10, age: 40, first_name: 'hello') - SimpleCsv.transform csv_path, output: 'spec/files/transformed.csv' do + SimpleCsv.transform csv_path, output: 'tmp/transformed.csv' do first_name { |s| s + 'hello' } age { |n| n * 2 } end - SimpleCsv.read 'spec/files/transformed.csv' do + SimpleCsv.read 'tmp/transformed.csv' do each_row { expect(first_name).to eq "hellohello" expect(age).to eq 80 @@ -138,17 +139,38 @@ end it 'allows reducing output with SimpleCsv::Transformer#output_headers' do - csv_path = Helpers.generate_csv('spec/files/result.csv', rows: 100, first_name: 'hello') + csv_path = Helpers.generate_csv('tmp/result.csv', rows: 10, first_name: 'hello') - SimpleCsv.transform csv_path, output: 'spec/files/transformed.csv' do + SimpleCsv.transform csv_path, output: 'tmp/transformed.csv' do output_headers :first_name first_name { |s| s + 'hello' } end - SimpleCsv.read 'spec/files/transformed.csv' do + SimpleCsv.read 'tmp/transformed.csv' do expect(headers).to eq ['first_name'] end end + + it 'converts code-unfriendly headers to callable method aliasses' do + SimpleCsv.generate 'tmp/result.csv' do + headers 'first name', 'last name' + + first_name 'john' + last_name 'smith' + end + + SimpleCsv.transform 'tmp/result.csv', output: 'tmp/hello.csv' do + first_name do |str| + expect(str).to eq 'john' + str + end + + send 'first name' do |str| + expect(str).to eq 'john' + str + end + end + end end end diff --git a/tmp/.gitkeep b/tmp/.gitkeep new file mode 100644 index 0000000..e69de29