Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better number normalization fixes #2419 #4530

Merged
merged 1 commit into from
Jul 22, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ PENDING_SPECS = \
WORKING_SPECS_1 = \
spec/rspec_configuration.rb \
spec/models/table_spec.rb \
spec/models/table/relator_spec.rb \
spec/models/table/relator_spec.rb \
spec/models/table/column_typecaster_spec.rb \
spec/models/user_spec.rb \
spec/models/user_presenter_spec.rb \
spec/models/layer_spec.rb \
Expand Down
37 changes: 29 additions & 8 deletions app/models/table/column_typecaster.rb
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,10 @@ def straight_cast(new_type=self.new_type, options = {})
end #straight_cast

def string_to_number
thousand, decimal = get_digit_separators_for(column_name)
normalize_digit_separators(thousand, decimal)
thousand_separator, decimal_separator = get_digit_separators_for(column_name)
normalize_number(thousand_separator, decimal_separator)
nullify_if_non_convertible
end #string_to_number
end

def string_to_datetime
straight_cast('timestamptz', cast: "cartodb.CDB_StringToDate(#{column_name})")
Expand Down Expand Up @@ -209,14 +209,35 @@ def get_digit_separators_for(column_name)
}, &:to_a).first.values
end #get_digit_separators

def normalize_digit_separators(thousand=nil, decimal=nil)
return unless thousand && decimal
def normalize_number(thousand_separator=nil, decimal_separator=nil)
return unless thousand_separator && decimal_separator

replacements = [
[thousand_separator, ''],
[decimal_separator, '.'],
['$', ''],
['€', '']
]

user_database.execute(%Q{
UPDATE #{qualified_table}
SET #{column_name} = replace(
replace(#{column_name}, '#{thousand}', ''), '#{decimal}', '.'
)
SET "#{column_name}" = trim(#{pg_replace_expression(column_name, replacements)})
})
end

# Takes a column_name and a mapping of replacements and returns an expression such as:
# replace(replace(replace(column_name, k1, v1), k2, v2), k3, v3)
def pg_replace_expression(column_name, replacement_map=[])
# base case
k1, v1 = replacement_map.shift
expression = %Q{replace("#{column_name}", '#{k1}', '#{v1}')}

# rest
replacement_map.each do |k, v|
expression = %Q{replace(#{expression}, '#{k}', '#{v}')}
end

return expression
end
end
end
71 changes: 36 additions & 35 deletions spec/models/table/column_typecaster_spec.rb
Original file line number Diff line number Diff line change
@@ -1,49 +1,50 @@
# encoding: utf-8
require 'rspec/core'
require 'rspec/expectations'
require 'rspec/mocks'
require_relative '../../spec_helper'

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I basically reused the spec file: the only test was not in the Makefile and was not recoverable so I wrote a useful one and got rid of the rest.

require_relative '../../rspec_configuration'
require_relative '../../../app/models/table/column_typecaster'

describe CartoDB::ColumnTypecaster do
before do
quota_in_bytes = 524288000
table_quota = 500
@user = create_user(
quota_in_bytes: quota_in_bytes,
table_quota: table_quota
)
end

after do
@user.destroy
end
describe '#pg_replace_expression' do
before(:all) do
@column_typecaster = CartoDB::ColumnTypecaster.new({
user_database: nil,
schema: nil,
table_name: nil,
column_name: nil,
new_type: nil
})
end

pending 'raises NonConvertibleData when trying to cast a non-supported time format to date' do
@user.in_database { |database| @db = database }
it 'takes a column_name and a set of replacements and returns a postgres expression that implements them' do
# e.g.: as it if where in spanish locale
thousand_separator = '.'
decimal_separator = ','

table_name = "test_#{rand(999)}"
schema = @user.database_schema
dataset = @db[table_name.to_sym]
time_text = 'Mon Oct 13 1997 15:32:18 GMT+0200 (CEST)'
# order matters, the innermost expression will be applied first
replacements = [
[thousand_separator, ''],
[decimal_separator, '.'],
['$', '']
]

@db.create_table?(table_name) do
String :time_with_timezone
@column_typecaster.send(:pg_replace_expression, 'my_column', replacements)
.should == %Q{replace(replace(replace("my_column", '.', ''), ',', '.'), '$', '')}
end

5.times { dataset.insert(time_with_timezone: time_text) }

typecaster = CartoDB::ColumnTypecaster.new(
user_database: @db,
schema: schema,
table_name: table_name,
column_name: :time_with_timezone,
new_type: 'date'
)
it 'returns an expression quasi-equivalent to the previous implementation' do
# us locale
thousand_separator = ','
decimal_separator = '.'

expect { typecaster.run }.to raise_error CartoDB::NonConvertibleData
dataset.first.fetch(:time_with_timezone).should_not be_nil
replacements = [
[thousand_separator, ''],
[decimal_separator, '.']
]

@db.drop_table?(table_name)
@column_typecaster.send(:pg_replace_expression, 'my_column', replacements)
.should == %Q{replace(replace("my_column", ',', ''), '.', '.')}
end
end
end