Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

4417 add logger to geocodings #4625

Merged
merged 7 commits into from
Jul 23, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion app/models/geocoding.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# encoding: UTF-8'
require 'socket'
require_relative '../../services/table-geocoder/lib/table_geocoder_factory'
require_relative '../../services/table-geocoder/lib/exceptions'
require_relative '../../services/geocoder/lib/geocoder_config'
require_relative '../../lib/cartodb/metrics'
require_relative '../../lib/cartodb/mixpanel'
require_relative 'log'

class Geocoding < Sequel::Model

Expand All @@ -26,6 +28,7 @@ class Geocoding < Sequel::Model

attr_reader :table_geocoder
attr_reader :started_at, :finished_at
attr_reader :log

def self.get_geocoding_calls(dataset, date_from, date_to)
dataset.where(kind: 'high-resolution').where('geocodings.created_at >= ? and geocodings.created_at <= ?', date_from, date_to + 1.days).sum("processed_rows + cache_hits".lit).to_i
Expand Down Expand Up @@ -79,7 +82,8 @@ def table_geocoder
kind: kind,
max_rows: max_geocodable_rows,
country_column: country_column,
region_column: region_column)
region_column: region_column,
log: self.log)
rescue => e
@table_geocoder = nil
raise e
Expand All @@ -106,6 +110,7 @@ def cancel

# INFO: this method shall always be called from a queue processor
def run!
log.append "Running geocoding job on server #{Socket.gethostname} with PID: #{Process.pid}"
if self.force_all_rows == true
table_geocoder.reset_cartodb_georef_status
else
Expand All @@ -123,13 +128,17 @@ def run!

self.run_geocoding!(processable_rows, rows_geocoded_before)
rescue => e
log.append "Unexpected exception: #{e.to_s}"
log.append e.backtrace
CartoDB.notify_exception(e)
raise e
ensure
log.store # Make sure the log is stored in DB
user.reset_pooled_connections
end

def run_geocoding!(processable_rows, rows_geocoded_before = 0)
log.append "run_geocoding!()"
self.update state: 'started', processable_rows: processable_rows
@started_at = Time.now

Expand All @@ -152,6 +161,7 @@ def run_geocoding!(processable_rows, rows_geocoded_before = 0)
@finished_at = Time.now
self.batched = table_geocoder.used_batch_request?
self.update(state: 'finished', real_rows: rows_geocoded_after - rows_geocoded_before, used_credits: calculate_used_credits)
log.append "Geocoding finished"
self.report
rescue => e
@finished_at = Time.now
Expand Down Expand Up @@ -292,11 +302,30 @@ def metrics_payload(exception = nil)
payload
end

def log
@log ||= instantiate_log
end


private

def table_service
@table_service ||= Table.new(user_table: user_table) if user_table.present?
end

def instantiate_log
if self.log_id
@log = CartoDB::Log.where(id: log_id).first
else
@log = CartoDB::Log.new({
type: CartoDB::Log::TYPE_GEOCODING,
user_id: user.id
})
@log.store
self.log_id = @log.id
self.save
end
@log
end

end
10 changes: 9 additions & 1 deletion app/models/log.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ class Log < Sequel::Model
TYPE_DATA_IMPORT = 'import'
TYPE_SYNCHRONIZATION = 'sync'
TYPE_USER_CREATION = 'user_creation'
TYPE_GEOCODING = 'geocoding'

SUPPORTED_TYPES = [
TYPE_DATA_IMPORT,
TYPE_SYNCHRONIZATION,
TYPE_USER_CREATION,
TYPE_GEOCODING
]

# @param id Numeric
# @param type String
Expand Down Expand Up @@ -165,7 +173,7 @@ def fix_entries_encoding

def validate
super
errors.add(:type, 'unsupported type') unless (self.type == TYPE_DATA_IMPORT || self.type == TYPE_SYNCHRONIZATION)
errors.add(:type, 'unsupported type') unless SUPPORTED_TYPES.include?(self.type)
end

def before_save
Expand Down
13 changes: 13 additions & 0 deletions db/migrate/20150722102858_add_log_to_geocodings.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Sequel.migration do
up do
alter_table :geocodings do
add_column :log_id, :uuid
end
end

down do
alter_table :geocodings do
drop_column :log_id
end
end
end
8 changes: 7 additions & 1 deletion services/table-geocoder/lib/internal_geocoder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Geocoder < AbstractTableGeocoder
:working_dir, :remote_id, :state, :processed_rows, :country_column, :region_column,
:qualified_table_name, :batch_size, :countries, :regions, :kind, :geometry_type

attr_accessor :table_schema, :table_name, :column_name
attr_accessor :table_schema, :table_name, :column_name, :log

def initialize(arguments)
super(arguments)
Expand All @@ -33,9 +33,11 @@ def initialize(arguments)
@working_dir = arguments[:working_dir] || Dir.mktmpdir
@geocoding_results = File.join(working_dir, "#{temp_table_name}_results.csv")
@query_generator = CartoDB::InternalGeocoder::QueryGeneratorFactory.get self
@log = arguments[:log]
end # initialize

def run
log.append 'run()'
@state = 'processing'
ensure_georef_status_colummn_valid
download_results
Expand All @@ -53,6 +55,7 @@ def run
end

def download_results
log.append 'download_results()'
begin
count = count + 1 rescue 0
search_terms = get_search_terms(count)
Expand All @@ -72,6 +75,7 @@ def get_search_terms(page)
end # get_search_terms

def create_temp_table
log.append 'create_temp_table()'
connection.run(%Q{
CREATE TABLE #{temp_table_name} (
geocode_string text, country text, region text, the_geom geometry, cartodb_georef_status boolean
Expand All @@ -87,10 +91,12 @@ def process_results; end
def cancel; end

def load_results_to_temp_table
log.append 'load_results_to_temp_table()'
connection.copy_into(temp_table_name.lit, data: File.read(geocoding_results), format: :csv)
end # load_results_to_temp_table

def copy_results_to_table
log.append 'copy_results_to_table()'
# 'InternalGeocoder::copy_results_to_table'
CartoDB::Importer2::QueryBatcher.new(
connection,
Expand Down
4 changes: 4 additions & 0 deletions services/table-geocoder/lib/table_geocoder_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ def self.get(user, cartodb_geocoder_config, table_service, params = {})
# Reset old connections to make sure changes apply.
# NOTE: This assumes it's being called from a Resque job
user.reset_pooled_connections
log = params.fetch(:log)
log.append 'TableGeocoderFactory.get()'
log.append "params: #{params.select{|k| k != :log}.to_s}"

if user == table_service.owner
user_connection = user.in_database
Expand Down Expand Up @@ -50,6 +53,7 @@ def self.get(user, cartodb_geocoder_config, table_service, params = {})
geocoder_class = CartoDB::InternalGeocoder::Geocoder
end

log.append "geocoder_class = #{geocoder_class.to_s}"
return geocoder_class.new(instance_config)
end

Expand Down
46 changes: 23 additions & 23 deletions spec/models/geocoding_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
end

describe '#setup' do
let(:geocoding) { FactoryGirl.create(:geocoding, user: @user, user_table: @table) }
let(:geocoding) { FactoryGirl.create(:geocoding, user: @user, user_table: @table, formatter: 'foo', kind: 'admin0') }

it 'sets default timestamps value' do
geocoding.created_at.should_not be_nil
Expand All @@ -33,17 +33,17 @@

describe '#table_geocoder' do
it 'returns an instance of TableGeocoder when kind is high-resolution' do
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'high-resolution')
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'high-resolution', formatter: 'foo')
geocoding.table_geocoder.should be_kind_of(CartoDB::TableGeocoder)
end

it 'returns an instance of InternalGeocoder when kind is not high-resolution' do
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'admin0', geometry_type: 'polygon')
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'admin0', geometry_type: 'polygon', formatter: 'foo')
geocoding.table_geocoder.should be_kind_of(CartoDB::InternalGeocoder::Geocoder)
end

it 'memoizes' do
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'admin0', geometry_type: 'polygon')
geocoding = FactoryGirl.build(:geocoding, user: @user, user_table: @table, kind: 'admin0', geometry_type: 'polygon', formatter: 'foo')
geocoder = geocoding.table_geocoder
geocoder.should be_kind_of(CartoDB::InternalGeocoder::Geocoder)
geocoder.should eq geocoding.table_geocoder
Expand Down Expand Up @@ -94,7 +94,7 @@
describe '#run!' do
it 'updates geocoding stats' do
# TODO: this doesn't really test anything but parameter passing, consider deleting it
geocoding = FactoryGirl.create(:geocoding, user: @user, user_table: @table, formatter: 'b')
geocoding = FactoryGirl.create(:geocoding, user: @user, user_table: @table, formatter: 'b', kind: 'high-resolution')
geocoding.table_geocoder.stubs(:run).returns true
geocoding.table_geocoder.stubs(:used_batch_request?).returns false
geocoding.table_geocoder.stubs(:cache).returns OpenStruct.new(hits: 5000)
Expand Down Expand Up @@ -189,7 +189,7 @@
end

pending 'raises an exception if the geocoding times out' do
geocoding = FactoryGirl.create(:geocoding, user: @user, user_table: @table, formatter: 'b')
geocoding = FactoryGirl.create(:geocoding, user: @user, user_table: @table, formatter: 'b', kind: 'high-resolution')
geocoding.class.stubs(:processable_rows).returns 10
geocoding.stubs(:processing_timeout_seconds).returns 0.01 # set timeout to 10 ms

Expand Down Expand Up @@ -220,13 +220,13 @@
@user.stubs('hard_geocoding_limit?').returns(true)
delete_user_data @user
geocoding.max_geocodable_rows.should eq 200
FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, remote_id: 'wadus')
FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, remote_id: 'wadus', formatter: 'foo', kind: 'high-resolution')
geocoding.max_geocodable_rows.should eq 100
end

it 'returns 50000 if the user has soft limit' do
@user.stubs('soft_geocoding_limit?').returns(true)
FactoryGirl.create(:geocoding, user: @user, processed_rows: 100)
FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, formatter: 'foo', kind: 'high-resolution')
geocoding.max_geocodable_rows.should eq 50000
end

Expand All @@ -237,15 +237,15 @@
org_geocoding = FactoryGirl.build(:geocoding, user: org_user)
organization.geocoding_quota.should eq 150
org_geocoding.max_geocodable_rows.should eq 150
FactoryGirl.create(:geocoding, user: org_user, processed_rows: 100, remote_id: 'wadus')
FactoryGirl.create(:geocoding, user: org_user, processed_rows: 100, remote_id: 'wadus', formatter: 'foo', kind: 'high-resolution')
org_geocoding.max_geocodable_rows.should eq 50
organization.destroy
end

end

describe '#cancel' do
let(:geocoding) { FactoryGirl.build(:geocoding, user: @user, user_table: @table) }
let(:geocoding) { FactoryGirl.build(:geocoding, user: @user, user_table: @table, formatter: 'foo', kind: 'high-resolution') }

it 'cancels the geocoding job' do
geocoding.table_geocoder.expects(:cancel).times(1).returns(true)
Expand All @@ -265,39 +265,39 @@
end

it 'returns 0 when the geocode is not high-resolution' do
geocoding = FactoryGirl.create(:geocoding, user: @user, kind: 'admin0', processed_rows: 10500)
geocoding = FactoryGirl.create(:geocoding, user: @user, kind: 'admin0', processed_rows: 10500, formatter: 'foo')
geocoding.calculate_used_credits.should eq 0
end

it 'returns 0 when the user has enough quota' do
# User has 200 geocoding credits, so geocoding 200 strings should take 0 credits
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 200, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 200, kind: 'high-resolution', formatter: 'foo')
geocoding.calculate_used_credits.should eq 0
end

it 'returns the used credits when the user is over geocoding quota' do
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 100, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 100, kind: 'high-resolution', formatter: 'foo')
# 100 total (user has 200) => 0 used credits
geocoding.calculate_used_credits.should eq 0
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 150, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 150, kind: 'high-resolution', formatter: 'foo')
# 250 total => 50 used credits
geocoding.calculate_used_credits.should eq 50
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, kind: 'high-resolution', formatter: 'foo')
# 350 total => 100 used credits
geocoding.calculate_used_credits.should eq 100
end
end

describe '#price' do
it 'returns 0 when the job used no credits' do
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 0)
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 0, formatter: 'foo', kind: 'admin0')
geocoding.price.should eq 0
end

it 'returns the expected price when the geocoding used some credits' do
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 100)
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 100, formatter: 'foo', kind: 'high-resolution')
geocoding.price.should eq 150
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 3)
geocoding = FactoryGirl.create(:geocoding, user: @user, used_credits: 3, formatter: 'foo', kind: 'high-resolution')
geocoding.price.should eq 4.5
end
end
Expand Down Expand Up @@ -339,19 +339,19 @@
@user.geocodings_dataset.delete
end
it 'returns expected results' do
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 100, real_rows: 100, processable_rows: 100, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 100, real_rows: 100, processable_rows: 100, kind: 'high-resolution', formatter: 'foo')
geocoding.failed_rows.should eq 0
geocoding.successful_rows.should eq 100
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 10, cache_hits: 150, real_rows: 155, processable_rows: 160, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 10, cache_hits: 150, real_rows: 155, processable_rows: 160, kind: 'high-resolution', formatter: 'foo')
geocoding.failed_rows.should eq 5
geocoding.successful_rows.should eq 155
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, real_rows: 100, processable_rows: 100, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, real_rows: 100, processable_rows: 100, kind: 'high-resolution', formatter: 'foo')
geocoding.failed_rows.should eq 0
geocoding.successful_rows.should eq 100
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 0, real_rows: 0, processable_rows: 0, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 0, cache_hits: 0, real_rows: 0, processable_rows: 0, kind: 'high-resolution', formatter: 'foo')
geocoding.failed_rows.should eq 0
geocoding.successful_rows.should eq 0
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, real_rows: 0, processable_rows: 100, kind: 'high-resolution')
geocoding = FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, cache_hits: 0, real_rows: 0, processable_rows: 100, kind: 'high-resolution', formatter: 'foo')
geocoding.failed_rows.should eq 100
geocoding.successful_rows.should eq 0
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def remove_dates(geocoding_hash)
get api_v1_geocodings_index_url
last_response.status.should eq 200

expected = {"geocodings"=>[{"table_name"=>nil, "processed_rows"=>1, "remote_id"=>nil, "formatter"=>nil, "state"=>"started", "cache_hits"=>0, "id"=>geocoding1.id, "user_id"=>@user1.id,"table_id"=>nil, "automatic_geocoding_id"=>nil, "kind"=>"high-resolution", "country_code"=>nil, "geometry_type"=>nil, "processable_rows"=>nil, "real_rows"=>nil, "used_credits"=>nil, "country_column"=>nil, "data_import_id"=>nil, "region_code"=>nil, "region_column"=>nil, "batched"=>nil, "error_code"=>nil, "force_all_rows"=>false}]}
expected = {"geocodings"=>[{"table_name"=>nil, "processed_rows"=>1, "remote_id"=>nil, "formatter"=>nil, "state"=>"started", "cache_hits"=>0, "id"=>geocoding1.id, "user_id"=>@user1.id,"table_id"=>nil, "automatic_geocoding_id"=>nil, "kind"=>"high-resolution", "country_code"=>nil, "geometry_type"=>nil, "processable_rows"=>nil, "real_rows"=>nil, "used_credits"=>nil, "country_column"=>nil, "data_import_id"=>nil, "region_code"=>nil, "region_column"=>nil, "batched"=>nil, "error_code"=>nil, "force_all_rows"=>false, "log_id"=>nil}]}
received_without_dates = { 'geocodings' => JSON.parse(last_response.body)['geocodings'].map { |g| remove_dates(g) } }
received_without_dates.should == expected
end
Expand Down