Skip to content

Commit

Permalink
Merge pull request #4625 from CartoDB/4417-add-logger-to-geocodings
Browse files Browse the repository at this point in the history
4417 add logger to geocodings
  • Loading branch information
Rafa de la Torre committed Jul 23, 2015
2 parents 81cb147 + 2fccc34 commit 737ee74
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 27 deletions.
31 changes: 30 additions & 1 deletion app/models/geocoding.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# encoding: UTF-8'
require 'socket'
require_relative '../../services/table-geocoder/lib/table_geocoder_factory'
require_relative '../../services/table-geocoder/lib/exceptions'
require_relative '../../services/geocoder/lib/geocoder_config'
require_relative '../../lib/cartodb/metrics'
require_relative '../../lib/cartodb/mixpanel'
require_relative 'log'

class Geocoding < Sequel::Model

Expand All @@ -26,6 +28,7 @@ class Geocoding < Sequel::Model

attr_reader :table_geocoder
attr_reader :started_at, :finished_at
attr_reader :log

def self.get_geocoding_calls(dataset, date_from, date_to)
dataset.where(kind: 'high-resolution').where('geocodings.created_at >= ? and geocodings.created_at <= ?', date_from, date_to + 1.days).sum("processed_rows + cache_hits".lit).to_i
Expand Down Expand Up @@ -79,7 +82,8 @@ def table_geocoder
kind: kind,
max_rows: max_geocodable_rows,
country_column: country_column,
region_column: region_column)
region_column: region_column,
log: self.log)
rescue => e
@table_geocoder = nil
raise e
Expand All @@ -106,6 +110,7 @@ def cancel

# INFO: this method shall always be called from a queue processor
def run!
log.append "Running geocoding job on server #{Socket.gethostname} with PID: #{Process.pid}"
if self.force_all_rows == true
table_geocoder.reset_cartodb_georef_status
else
Expand All @@ -123,13 +128,17 @@ def run!

self.run_geocoding!(processable_rows, rows_geocoded_before)
rescue => e
log.append "Unexpected exception: #{e.to_s}"
log.append e.backtrace
CartoDB.notify_exception(e)
raise e
ensure
log.store # Make sure the log is stored in DB
user.reset_pooled_connections
end

def run_geocoding!(processable_rows, rows_geocoded_before = 0)
log.append "run_geocoding!()"
self.update state: 'started', processable_rows: processable_rows
@started_at = Time.now

Expand All @@ -152,6 +161,7 @@ def run_geocoding!(processable_rows, rows_geocoded_before = 0)
@finished_at = Time.now
self.batched = table_geocoder.used_batch_request?
self.update(state: 'finished', real_rows: rows_geocoded_after - rows_geocoded_before, used_credits: calculate_used_credits)
log.append "Geocoding finished"
self.report
rescue => e
@finished_at = Time.now
Expand Down Expand Up @@ -292,11 +302,30 @@ def metrics_payload(exception = nil)
payload
end

def log
@log ||= instantiate_log
end


private

def table_service
@table_service ||= Table.new(user_table: user_table) if user_table.present?
end

def instantiate_log
if self.log_id
@log = CartoDB::Log.where(id: log_id).first
else
@log = CartoDB::Log.new({
type: CartoDB::Log::TYPE_GEOCODING,
user_id: user.id
})
@log.store
self.log_id = @log.id
self.save
end
@log
end

end
10 changes: 9 additions & 1 deletion app/models/log.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ class Log < Sequel::Model
TYPE_DATA_IMPORT = 'import'
TYPE_SYNCHRONIZATION = 'sync'
TYPE_USER_CREATION = 'user_creation'
TYPE_GEOCODING = 'geocoding'

SUPPORTED_TYPES = [
TYPE_DATA_IMPORT,
TYPE_SYNCHRONIZATION,
TYPE_USER_CREATION,
TYPE_GEOCODING
]

# @param id Numeric
# @param type String
Expand Down Expand Up @@ -165,7 +173,7 @@ def fix_entries_encoding

def validate
super
errors.add(:type, 'unsupported type') unless (self.type == TYPE_DATA_IMPORT || self.type == TYPE_SYNCHRONIZATION)
errors.add(:type, 'unsupported type') unless SUPPORTED_TYPES.include?(self.type)
end

def before_save
Expand Down
13 changes: 13 additions & 0 deletions db/migrate/20150722102858_add_log_to_geocodings.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Sequel.migration do
up do
alter_table :geocodings do
add_column :log_id, :uuid
end
end

down do
alter_table :geocodings do
drop_column :log_id
end
end
end
8 changes: 7 additions & 1 deletion services/table-geocoder/lib/internal_geocoder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Geocoder < AbstractTableGeocoder
:working_dir, :remote_id, :state, :processed_rows, :country_column, :region_column,
:qualified_table_name, :batch_size, :countries, :regions, :kind, :geometry_type

attr_accessor :table_schema, :table_name, :column_name
attr_accessor :table_schema, :table_name, :column_name, :log

def initialize(arguments)
super(arguments)
Expand All @@ -33,9 +33,11 @@ def initialize(arguments)
@working_dir = arguments[:working_dir] || Dir.mktmpdir
@geocoding_results = File.join(working_dir, "#{temp_table_name}_results.csv")
@query_generator = CartoDB::InternalGeocoder::QueryGeneratorFactory.get self
@log = arguments[:log]
end # initialize

def run
log.append 'run()'
@state = 'processing'
ensure_georef_status_colummn_valid
download_results
Expand All @@ -53,6 +55,7 @@ def run
end

def download_results
log.append 'download_results()'
begin
count = count + 1 rescue 0
search_terms = get_search_terms(count)
Expand All @@ -72,6 +75,7 @@ def get_search_terms(page)
end # get_search_terms

def create_temp_table
log.append 'create_temp_table()'
connection.run(%Q{
CREATE TABLE #{temp_table_name} (
geocode_string text, country text, region text, the_geom geometry, cartodb_georef_status boolean
Expand All @@ -87,10 +91,12 @@ def process_results; end
def cancel; end

def load_results_to_temp_table
log.append 'load_results_to_temp_table()'
connection.copy_into(temp_table_name.lit, data: File.read(geocoding_results), format: :csv)
end # load_results_to_temp_table

def copy_results_to_table
log.append 'copy_results_to_table()'
# 'InternalGeocoder::copy_results_to_table'
CartoDB::Importer2::QueryBatcher.new(
connection,
Expand Down
4 changes: 4 additions & 0 deletions services/table-geocoder/lib/table_geocoder_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ def self.get(user, cartodb_geocoder_config, table_service, params = {})
# Reset old connections to make sure changes apply.
# NOTE: This assumes it's being called from a Resque job
user.reset_pooled_connections
log = params.fetch(:log)
log.append 'TableGeocoderFactory.get()'
log.append "params: #{params.select{|k| k != :log}.to_s}"

if user == table_service.owner
user_connection = user.in_database
Expand Down Expand Up @@ -50,6 +53,7 @@ def self.get(user, cartodb_geocoder_config, table_service, params = {})
geocoder_class = CartoDB::InternalGeocoder::Geocoder
end

log.append "geocoder_class = #{geocoder_class.to_s}"
return geocoder_class.new(instance_config)
end

Expand Down
46 changes: 23 additions & 23 deletions spec/models/geocoding_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
end

describe '#setup' do
let(:geocoding) { FactoryGirl.create(:geocoding, user: @user, user_table: @table) }
let(:geocoding) { FactoryGirl.create(:geocoding, user: @user, user_table: @table, formatter: 'foo', kind: 'admin0') }

it 'sets default timestamps value' do
geocoding.created_at.should_not be_nil
Expand All @@ -33,17 +33,17 @@

describe '#table_geocoder' do
it 'returns an instance of TableGeocoder when kind is high-resolution' do
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'high-resolution')
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'high-resolution', formatter: 'foo')
geocoding.table_geocoder.should be_kind_of(CartoDB::TableGeocoder)
end

it 'returns an instance of InternalGeocoder when kind is not high-resolution' do
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'admin0', geometry_type: 'polygon')
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'admin0', geometry_type: 'polygon', formatter: 'foo')
geocoding.table_geocoder.should be_kind_of(CartoDB::InternalGeocoder::Geocoder)
end

it 'memoizes' do
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'admin0', geometry_type: 'polygon')
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'admin0', geometry_type: 'polygon', formatter: 'foo')
geocoder = geocoding.table_geocoder
geocoder.should be_kind_of(CartoDB::InternalGeocoder::Geocoder)
geocoder.should eq geocoding.table_geocoder
Expand Down Expand Up @@ -94,7 +94,7 @@
describe '#run!' do
it 'updates geocoding stats' do
# TODO: this doesn't really test anything but parameter passing, consider deleting it
geocoding = FactoryGirl.create(:geocoding, user: @user, user_table: @table, formatter: 'b')
geocoding = FactoryGirl.create(:geocoding, user: @user, user_table: @table, formatter: 'b', kind: 'high-resolution')
geocoding.table_geocoder.stubs(:run).returns true
geocoding.table_geocoder.stubs(:used_batch_request?).returns false
geocoding.table_geocoder.stubs(:cache).returns OpenStruct.new(hits: 5000)
Expand Down Expand Up @@ -189,7 +189,7 @@
end

pending 'raises an exception if the geocoding times out' do
geocoding = FactoryGirl.create(:geocoding, user: @user, user_table: @table, formatter: 'b')
geocoding = FactoryGirl.create(:geocoding, user: @user, user_table: @table, formatter: 'b', kind: 'high-resolution')
geocoding.class.stubs(:processable_rows).returns 10
geocoding.stubs(:processing_timeout_seconds).returns 0.01 # set timeout to 10 ms

Expand Down Expand Up @@ -220,13 +220,13 @@
@user.stubs('hard_geocoding_limit?').returns(true)
delete_user_data @user
geocoding.max_geocodable_rows.should eq 200
FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, remote_id: 'wadus')
FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, remote_id: 'wadus', formatter: 'foo', kind: 'high-resolution')
geocoding.max_geocodable_rows.should eq 100
end

it 'returns 50000 if the user has soft limit' do
@user.stubs('soft_geocoding_limit?').returns(true)
FactoryGirl.create(:geocoding, user: @user, processed_rows: 100)
FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, formatter: 'foo', kind: 'high-resolution')
geocoding.max_geocodable_rows.should eq 50000
end

Expand All @@ -237,15 +237,15 @@
org_geocoding = FactoryGirl.build(:geocoding, user: org_user)
organization.geocoding_quota.should eq 150
org_geocoding.max_geocodable_rows.should eq 150
FactoryGirl.create(:geocoding, user: org_user, processed_rows: 100, remote_id: 'wadus')
FactoryGirl.create(:geocoding, user: org_user, processed_rows: 100, remote_id: 'wadus', formatter: 'foo', kind: 'high-resolution')
org_geocoding.max_geocodable_rows.should eq 50
organization.destroy
end

end

describe '#cancel' do
let(:geocoding) { FactoryGirl.build(:geocoding, user: @user, user_table: @table) }
let(:geocoding) { FactoryGirl.build(:geocoding, user: @user, user_table: @table, formatter: 'foo', kind: 'high-resolution') }

it 'cancels the geocoding job' do
geocoding.table_geocoder.expects(:cancel).times(1).returns(true)
Expand All @@ -265,39 +265,39 @@
end

it 'returns 0 when the geocode is not high-resolution' do
geocoding = FactoryGirl.create(:geocoding, user: @user, kind: 'admin0', processed_rows: 10500)
geocoding = FactoryGirl.create(:geocoding, user: @user, kind: 'admin0', processed_rows: 10500, formatter: 'foo')
geocoding.calculate_used_credits.should eq 0
end

it 'returns 0 when the user has enough quota' do
# User has 200 geocoding credits, so geocoding 200 strings should take 0 credits
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 200, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 200, kind: 'high-resolution', formatter: 'foo')
geocoding.calculate_used_credits.should eq 0
end

it 'returns the used credits when the user is over geocoding quota' do
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 100, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 100, kind: 'high-resolution', formatter: 'foo')
# 100 total (user has 200) => 0 used credits
geocoding.calculate_used_credits.should eq 0
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 150, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 150, kind: 'high-resolution', formatter: 'foo')
# 250 total => 50 used credits
geocoding.calculate_used_credits.should eq 50
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, kind: 'high-resolution', formatter: 'foo')
# 350 total => 100 used credits
geocoding.calculate_used_credits.should eq 100
end
end

describe '#price' do
it 'returns 0 when the job used no credits' do
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 0)
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 0, formatter: 'foo', kind: 'admin0')
geocoding.price.should eq 0
end

it 'returns the expected price when the geocoding used some credits' do
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 100)
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 100, formatter: 'foo', kind: 'high-resolution')
geocoding.price.should eq 150
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 3)
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 3, formatter: 'foo', kind: 'high-resolution')
geocoding.price.should eq 4.5
end
end
Expand Down Expand Up @@ -339,19 +339,19 @@
@user.geocodings_dataset.delete
end
it 'returns expected results' do
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 100, real_rows: 100, processable_rows: 100, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 100, real_rows: 100, processable_rows: 100, kind: 'high-resolution', formatter: 'foo')
geocoding.failed_rows.should eq 0
geocoding.successful_rows.should eq 100
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 10, cache_hits: 150, real_rows: 155, processable_rows: 160, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 10, cache_hits: 150, real_rows: 155, processable_rows: 160, kind: 'high-resolution', formatter: 'foo')
geocoding.failed_rows.should eq 5
geocoding.successful_rows.should eq 155
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, real_rows: 100, processable_rows: 100, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, real_rows: 100, processable_rows: 100, kind: 'high-resolution', formatter: 'foo')
geocoding.failed_rows.should eq 0
geocoding.successful_rows.should eq 100
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 0, real_rows: 0, processable_rows: 0, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 0, real_rows: 0, processable_rows: 0, kind: 'high-resolution', formatter: 'foo')
geocoding.failed_rows.should eq 0
geocoding.successful_rows.should eq 0
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, real_rows: 0, processable_rows: 100, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, real_rows: 0, processable_rows: 100, kind: 'high-resolution', formatter: 'foo')
geocoding.failed_rows.should eq 100
geocoding.successful_rows.should eq 0
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def remove_dates(geocoding_hash)
get api_v1_geocodings_index_url
last_response.status.should eq 200

expected = {"geocodings"=>[{"table_name"=>nil, "processed_rows"=>1, "remote_id"=>nil, "formatter"=>nil, "state"=>"started", "cache_hits"=>0, "id"=>geocoding1.id, "user_id"=>@user1.id,"table_id"=>nil, "automatic_geocoding_id"=>nil, "kind"=>"high-resolution", "country_code"=>nil, "geometry_type"=>nil, "processable_rows"=>nil, "real_rows"=>nil, "used_credits"=>nil, "country_column"=>nil, "data_import_id"=>nil, "region_code"=>nil, "region_column"=>nil, "batched"=>nil, "error_code"=>nil, "force_all_rows"=>false}]}
expected = {"geocodings"=>[{"table_name"=>nil, "processed_rows"=>1, "remote_id"=>nil, "formatter"=>nil, "state"=>"started", "cache_hits"=>0, "id"=>geocoding1.id, "user_id"=>@user1.id,"table_id"=>nil, "automatic_geocoding_id"=>nil, "kind"=>"high-resolution", "country_code"=>nil, "geometry_type"=>nil, "processable_rows"=>nil, "real_rows"=>nil, "used_credits"=>nil, "country_column"=>nil, "data_import_id"=>nil, "region_code"=>nil, "region_column"=>nil, "batched"=>nil, "error_code"=>nil, "force_all_rows"=>false, "log_id"=>nil}]}
received_without_dates = { 'geocodings' => JSON.parse(last_response.body)['geocodings'].map { |g| remove_dates(g) } }
received_without_dates.should == expected
end
Expand Down

0 comments on commit 737ee74

Please sign in to comment.