diff --git a/.travis.yml b/.travis.yml index d746a99..d780277 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,6 @@ language: ruby rvm: - 2.3.5 before_install: - - curl -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.4.4.deb && sudo dpkg -i elasticsearch-1.4.4.deb && true - - 'echo ''script.disable_dynamic: false'' | sudo tee --append /etc/elasticsearch/elasticsearch.yml' - - 'echo ''index.number_of_shards: 1'' | sudo tee --append /etc/elasticsearch/elasticsearch.yml' + - curl -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-5.6.5.deb && sudo dpkg -i --force-confnew elasticsearch-5.6.5.deb && true + - 'printf "script:\n inline: true\n stored: true\n" | sudo tee --append /etc/elasticsearch/elasticsearch.yml' - sudo service elasticsearch start diff --git a/Gemfile b/Gemfile index 64ebde4..c6e328e 100644 --- a/Gemfile +++ b/Gemfile @@ -8,8 +8,6 @@ end gem 'rails', '5.1.4' gem 'rails-controller-testing', '~> 1.0' gem 'nokogiri', '~> 1.8.0' -gem 'tire', '~> 0.6.2' #deprecated in 2013 -gem 'tire-contrib', '~> 0.1.2' gem 'oj', '~> 3.1.3' # Unused? gem 'faraday_middleware', '~> 0.12.2' gem 'net-http-persistent', '~> 2.8' @@ -20,6 +18,10 @@ gem 'rack-cors', '~> 1.0.2' gem 'us_states', '~> 0.1.1', git: 'https://github.com/GSA/us_states.git' gem 'newrelic_rpm', '~> 4.6.0' gem 'rake', '~> 11.0' +gem 'elasticsearch-model' +gem 'elasticsearch-rails' +gem 'elasticsearch-dsl' +gem 'whenever' group :development, :test do gem 'puma', '~> 3.7' diff --git a/Gemfile.lock b/Gemfile.lock index d693232..49c3acc 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -47,7 +47,6 @@ GEM airbrake (7.1.0) airbrake-ruby (~> 2.5) airbrake-ruby (2.6.2) - ansi (1.5.0) arel (8.0.0) builder (3.2.3) capistrano (2.15.4) @@ -56,6 +55,7 @@ GEM net-sftp (>= 2.0.0) net-ssh (>= 2.0.14) net-ssh-gateway (>= 1.1.0) + chronic (0.10.2) concurrent-ruby (1.0.5) coveralls (0.7.0) multi_json (~> 1.3) @@ -68,6 +68,20 @@ GEM docile (1.1.3) domain_name (0.5.20170404) unf (>= 0.0.5, < 1.0.0) + elasticsearch (5.0.4) + elasticsearch-api (= 5.0.4) + elasticsearch-transport (= 5.0.4) + elasticsearch-api (5.0.4) + multi_json + elasticsearch-dsl (0.1.5) + elasticsearch-model (5.0.2) + activesupport (> 3) + elasticsearch (~> 5) + hashie + elasticsearch-rails (5.0.2) + elasticsearch-transport (5.0.4) + faraday + multi_json erubi (1.7.0) faraday (0.13.1) multipart-post (>= 1.2, < 3) @@ -76,7 +90,7 @@ GEM ffi (1.9.18) globalid (0.4.1) activesupport (>= 4.2.0) - hashr (0.0.22) + hashie (3.5.7) highline (1.6.19) http-cookie (1.0.3) domain_name (~> 0.5) @@ -204,16 +218,6 @@ GEM thor (0.20.0) thread_safe (0.3.6) tins (0.13.2) - tire (0.6.2) - activemodel (>= 3.0) - activesupport - ansi - hashr (~> 0.0.19) - multi_json (~> 1.3) - rake - rest-client (~> 1.6) - tire-contrib (0.1.2) - tire tzinfo (1.2.4) thread_safe (~> 0.1) unf (0.1.4) @@ -222,6 +226,8 @@ GEM websocket-driver (0.6.5) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.3) + whenever (0.10.0) + chronic (>= 0.6.3) PLATFORMS ruby @@ -230,6 +236,9 @@ DEPENDENCIES airbrake (~> 7.1) capistrano (~> 2.15.4) coveralls (~> 0.7.0) + elasticsearch-dsl + elasticsearch-model + elasticsearch-rails faraday_middleware (~> 0.12.2) jbuilder (~> 2.7.0) listen (>= 3.0.5, < 3.2) @@ -250,9 +259,8 @@ DEPENDENCIES spring spring-watcher-listen (~> 2.0.0) test-unit (~> 3.0) - tire (~> 0.6.2) - tire-contrib (~> 0.1.2) us_states (~> 0.1.1)! + whenever BUNDLED WITH 1.16.0 diff --git a/README.md b/README.md index 35fb9cb..8895827 100644 --- a/README.md +++ b/README.md @@ -21,9 +21,9 @@ We use bundler to manage gems. You can install bundler and other required gems l ### Elasticsearch -We're using [Elasticsearch](http://www.elasticsearch.org/) (>= 1.4.0) for fulltext search. On a Mac, it's easy to install with [Homebrew](http://mxcl.github.com/homebrew/). +We're using [Elasticsearch](http://www.elasticsearch.org/) (>= 5.6) for fulltext search. On a Mac, it's easy to install with [Homebrew](http://mxcl.github.com/homebrew/). - $ brew install elasticsearch + $ brew install elasticsearch@5.6 Otherwise, follow the [instructions](http://www.elasticsearch.org/download/) to download and run it. @@ -32,21 +32,21 @@ Otherwise, follow the [instructions](http://www.elasticsearch.org/download/) to Install Docker if you haven't done so yet. Follow the instruction [here](https://www.docker.com/community-edition) Once you have Docker installed on your machine, run the following command in your terminal - $ docker run -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" elasticsearch:1.4.5 + $ docker run -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" elasticsearch:5.6 -This will download an docker image containing elasticsearch=1.4.5 from docker hub, run it, and expose port 9200 & 9300 to your machine. You can verify your setup with the following command. +This will download an docker image containing elasticsearch=5.6.5 from docker hub, run it, and expose port 9200 & 9300 to your machine. You can verify your setup with the following command. $ curl localhost:9200 { - "status" : 200, - "name" : "Aegis", + "name" : "u2bQgL2", "cluster_name" : "elasticsearch", + "cluster_uuid" : "qZ-Xas_PR_2ARtHpY724Ug", "version" : { - "number" : "1.4.5", - "build_hash" : "2aaf797f2a571dcb779a3b61180afe8390ab61f9", - "build_timestamp" : "2015-04-27T08:06:06Z", + "number" : "5.6.5", + "build_hash" : "6a37571", + "build_date" : "2017-12-04T07:50:10.466Z", "build_snapshot" : false, - "lucene_version" : "4.10.4" + "lucene_version" : "6.6.1" }, "tagline" : "You Know, for Search" } diff --git a/app/models/geoname.rb b/app/models/geoname.rb index df1b409..b8e1e92 100644 --- a/app/models/geoname.rb +++ b/app/models/geoname.rb @@ -1,75 +1,118 @@ +require 'active_model' +require 'elasticsearch/dsl' + class Geoname - include Tire::Model::Search + include ActiveModel::Model + include Elasticsearch::Model + include Elasticsearch::DSL + + INDEX_NAME = "#{Rails.env}:geonames".freeze + + SYNONYMS = [ + "afb, air force base", + "afs, air force station", + "ang, air national guard", + "cavecreek, cave creek", + "ft, fort", + "junc, junction", + "natl, nat, national", + "newcastle, new castle", + "pk, park", + "spgs, springs", + "st, saint" + ].freeze - index_name("#{Rails.env}:geonames".freeze) + SETTINGS = { + analysis: { + filter: { + synonym: { + type: 'synonym', + synonyms: SYNONYMS + } + }, + analyzer: { + custom_analyzer: { + type: 'custom', + tokenizer: 'whitespace', + filter: %w(standard lowercase synonym) + } + } + } + } - SYNONYMS = ["ft, fort", "st, saint", "afb, air force base", "afs, air force station", "ang, air national guard", "junc, junction", "spgs, springs", "natl, nat, national", "pk, park", "newcastle, new castle", "cavecreek, cave creek"] + settings index: SETTINGS do + mappings dynamic: 'false' do + indexes :type, type: 'keyword' + indexes :location, type: 'text', analyzer: 'custom_analyzer' + indexes :state, type: 'text', analyzer: 'keyword' + indexes :geo, type: 'geo_point' + indexes :id, type: 'keyword', index: false + end + end class << self + def client + @client ||= Geoname.__elasticsearch__.client + end + def create_search_index - Tire.index index_name do - create( - settings: { - index: { - analysis: { - analyzer: {custom_analyzer: {type: 'custom', tokenizer: 'whitespace', filter: %w(standard lowercase synonym)}}, - filter: {synonym: {type: 'synonym', synonyms: SYNONYMS}} - } - } - }, - mappings: { - geoname: { - properties: { - type: {type: 'string'}, - location: {type: 'string', analyzer: 'custom_analyzer'}, - state: {type: 'string', analyzer: 'keyword'}, - geo: {type: 'geo_point'}, - id: {type: 'string', index: :not_analyzed, include_in_all: false} - } - } - } - ) - end + client.indices.create( + index: INDEX_NAME, + body: { settings: settings.to_hash, mappings: mappings.to_hash } + ) end def geocode(options = {}) - search_for(options.merge(size: 1)).results.first.geo.to_hash rescue nil + search_for(options.merge(size: 1)).results.first.geo rescue nil end def search_for(options) - Tire.search index_name do + search_definition = Elasticsearch::DSL::Search.search do query do - boolean do - must { match :location, options[:location], operator: 'AND' } - must { term :state, options[:state] } + bool do + must do + match :location do + query options[:location] + operator 'and' + end + end + + must { term state: options[:state] } end end + size options[:size] - end + end.to_hash + + Geoname.search(search_definition, index: INDEX_NAME) end def delete_search_index - search_index.delete + client.indices.delete index: INDEX_NAME if search_index_exists? end - def search_index - Tire.index(index_name) + def search_index_exists? + client.indices.exists? index: INDEX_NAME end def import(geonames) - Tire.index index_name do - import geonames do |docs| - docs.each do |doc| - doc[:id] = "#{doc[:location]}:#{doc[:state]}" - end - end - refresh + geonames.each do |doc| + client.index( + index: INDEX_NAME, + type: 'geoname', + id: "#{doc[:location]}:#{doc[:state]}", + body: { + location: doc[:location], + geo: doc[:geo], + state: doc[:state] + } + ) end - #Tire.index index_name - Rails.logger.info "Imported #{geonames.size} Geonames to #{index_name}" - end + __elasticsearch__.refresh_index! index: INDEX_NAME + Rails.logger.info "Imported #{geonames.size} Geonames to #{INDEX_NAME}" + end end -end \ No newline at end of file +end diff --git a/app/models/position_opening.rb b/app/models/position_opening.rb index cdb09f8..e95058c 100644 --- a/app/models/position_opening.rb +++ b/app/models/position_opening.rb @@ -1,134 +1,223 @@ +require 'active_model' +require 'elasticsearch/dsl' + class PositionOpening - include Tire::Model::Search + include ActiveModel::Model + include Elasticsearch::Model + include Elasticsearch::DSL + + INDEX_NAME = "#{Elasticsearch::INDEX_NAME}".freeze + + MAX_RETURNED_DOCUMENTS = 100.freeze - index_name("#{Elasticsearch::INDEX_NAME}") + SYNONYMS = [ + "architect, architecture", + "certified nursing assistant, cna", + "clerk, clerical", + "counselor, counseling, therapy, therapist", + "custodial, janitor, custodian", + "cypa, child and youth program assistant, childcare", + "cys, child youth services", + "electronic, electrical", + "forester, forestry", + "green, environment, environmental", + "information technology, it, tech, computer", + "linguist, language", + "legal, attorney", + "lpn, licensed practical nurse", + "lvn, licensed vocational nurse", + "pa, physician assistant", + "physician, doctor", + "rn, registered nurse", + "teacher, teaching", + "technical, technician", + "technology, technologist", + "tso, transportation security officer", + "tv, television" + ].freeze + + SETTINGS = { + analysis: { + filter: { + synonym: { + type: 'synonym', + synonyms: SYNONYMS + } + }, + analyzer: { + custom_analyzer: { + type: 'custom', + tokenizer: 'whitespace', + filter: %w(standard lowercase synonym snowball) + } + } + } + } + + settings index: SETTINGS do + mappings dynamic: 'false' do + indexes :type, type: 'keyword' + indexes :source, type: 'keyword' + indexes :tags, type: 'text', analyzer: 'keyword' + indexes :external_id, type: 'integer', store: true + indexes :position_title, type: 'text', analyzer: 'custom_analyzer', term_vector: 'with_positions_offsets', store: true + indexes :organization_id, type: 'text', analyzer: 'keyword' + indexes :organization_name, type: 'keyword', index: false + + indexes :locations, type: 'nested' do + indexes :city, type: 'text', analyzer: 'simple' + indexes :state, type: 'text', analyzer: 'keyword' + indexes :geo, type: 'geo_point' + end - MAX_RETURNED_DOCUMENTS = 100 - SYNONYMS = ["information technology, it, tech, computer", "teacher, teaching", "certified nursing assistant, cna", "rn, registered nurse", "lpn, licensed practical nurse", "lvn, licensed vocational nurse", "pa, physician assistant", "custodial, janitor, custodian", "cys, child youth services", "clerk, clerical", "physician, doctor", "linguist, language", "tv, television", "legal, attorney", "counselor, counseling, therapy, therapist", "green, environment, environmental", "forester, forestry", "technical, technician", "technology, technologist", "electronic, electrical", "architect, architecture", "cypa, child and youth program assistant, childcare", "tso, transportation security officer"].freeze + indexes :start_date, type: 'date', format: 'YYYY-MM-dd' + indexes :end_date, type: 'date', format: 'YYYY-MM-dd' + indexes :minimum, type: 'float' + indexes :maximum, type: 'float' + indexes :position_offering_type_code, type: 'integer' + indexes :position_schedule_type_code, type: 'integer' + indexes :rate_interval_code, type: 'text', analyzer: 'keyword' + indexes :id, type: 'keyword', index: false + indexes :timestamp, type: 'date', null_value: 'NULL' + end + end class << self + def client + @client ||= PositionOpening.__elasticsearch__.client + end + def create_search_index - Tire.index index_name do - create( - settings: { - index: { - analysis: { - analyzer: { custom_analyzer: { type: 'custom', tokenizer: 'whitespace', filter: %w(standard lowercase synonym snowball) } }, - filter: { synonym: { type: 'synonym', synonyms: SYNONYMS } } - } - } - }, - mappings: { - position_opening: { - _timestamp: { enabled: true }, - _ttl: { enabled: true }, - properties: { - type: { type: 'string' }, - source: { type: 'string', index: :not_analyzed }, - tags: { type: 'string', analyzer: 'keyword' }, - external_id: { type: 'integer' }, - position_title: { type: 'string', analyzer: 'custom_analyzer', term_vector: 'with_positions_offsets', store: true }, - organization_id: { type: 'string', analyzer: 'keyword' }, - organization_name: { type: 'string', index: :not_analyzed }, - locations: { - type: 'nested', - properties: { - city: { type: 'string', analyzer: 'simple' }, - state: { type: 'string', analyzer: 'keyword' }, - geo: { type: 'geo_point' } } }, - start_date: { type: 'date', format: 'YYYY-MM-dd' }, - end_date: { type: 'date', format: 'YYYY-MM-dd' }, - minimum: { type: 'float' }, - maximum: { type: 'float' }, - position_offering_type_code: { type: 'integer' }, - position_schedule_type_code: { type: 'integer' }, - rate_interval_code: { type: 'string', analyzer: 'keyword' }, - id: { type: 'string', index: :not_analyzed, include_in_all: false } - } - } - } - ) - end + client.indices.create( + index: INDEX_NAME, + body: { settings: settings.to_hash, mappings: mappings.to_hash } + ) end def search_for(options = {}) - options.reverse_merge!(size: 10, from: 0, sort_by: :_timestamp) + options.reverse_merge!(size: 10, from: 0) document_limit = [options[:size].to_i, MAX_RETURNED_DOCUMENTS].min source = options[:source] + sort_by = options[:sort_by] || :timestamp tags = options[:tags].present? ? options[:tags].split(/[ ,]/) : nil lat, lon = options[:lat_lon].split(',') rescue [nil, nil] organization_ids = organization_ids_from_options(options) query = Query.new(options[:query], organization_ids) - search = Tire.search index_name do + definition = Elasticsearch::DSL::Search.search do query do - boolean(minimum_number_should_match: 1) do - must { term :source, source } if source.present? - must { terms :tags, tags } if tags - must { match :position_offering_type_code, query.position_offering_type_code } if query.position_offering_type_code.present? - must { match :position_schedule_type_code, query.position_schedule_type_code } if query.position_schedule_type_code.present? - should { match :position_title, query.keywords, analyzer: 'custom_analyzer' } if query.keywords.present? + bool do + filter do + range :start_date do + lte Date.current + end + end + + must { term source: source } if source.present? + must { terms tags: tags } if tags + must do + match :position_offering_type_code do + query query.position_offering_type_code + end + end if query.position_offering_type_code.present? + + must do + match :position_schedule_type_code do + query query.position_schedule_type_code + end + end if query.position_schedule_type_code.present? + + should do + match :position_title do + query query.keywords + analyzer 'custom_analyzer' + end + end if query.keywords.present? should do - nested path: 'locations' do + nested do + path 'locations' query do - match 'locations.city', query.keywords, operator: 'AND' + match 'locations.city' do + query query.keywords + operator 'and' + end end end end if query.keywords.present? && query.location.nil? - must { match :rate_interval_code, query.rate_interval_code } if query.rate_interval_code.present? + + must do + match :rate_interval_code do + query query.rate_interval_code + end + end if query.rate_interval_code.present? + must do - boolean do - should { terms :organization_id, query.organization_terms } if query.organization_terms.present? - query.organization_prefixes.each do |organization_prefix| - should { prefix :organization_id, organization_prefix } - end if query.organization_prefixes.present? + bool do + should { terms organization_id: query.organization_terms } if query.organization_terms.present? + if query.organization_prefixes.present? + query.organization_prefixes.each do |prefix| + should { prefix organization_id: prefix } + end + end end end if query.organization_ids.present? + must do - nested path: 'locations' do + nested do + path 'locations' query do - boolean do - must { term 'locations.state', query.location.state } if query.has_state? - must { match 'locations.city', query.location.city, operator: 'AND' } if query.has_city? + bool do + must { term 'locations.state': query.location.state } if query.has_state? + must do + match 'locations.city' do + query query.location.city + operator 'and' + end + end if query.has_city? end end end end if query.location.present? - end - end if source.present? || tags || query.valid? - filter :range, start_date: { lte: Date.current } + minimum_should_match '0<1' + end + end - if query.keywords.blank? - if lat.blank? || lon.blank? - sort { by options[:sort_by], 'desc' } - else - options[:sort_by] = 'geo_distance' - sort do - by :_geo_distance, { - 'locations.geo' => { - lat: lat, lon: lon - }, - :order => 'asc' - } + sort do + if query.keywords.blank? + if lat.blank? || lon.blank? + by "#{sort_by}", order: 'desc' + else + by({ + _geo_distance: { + 'locations.geo': { lat: lat.to_f, lon: lon.to_f }, + order: 'asc', + nested_path: 'locations' + } + }) end + else + by "#{sort_by}", order: 'desc' end end + + highlight { field :position_title, number_of_fragments: 0 } size document_limit from options[:from] - highlight position_title: { number_of_fragments: 0 } - end + end.to_hash + + search_results = __elasticsearch__.search(definition, index: INDEX_NAME) - Rails.logger.info("[Query] #{options.merge(result_count: search.results.total).to_json}") + Rails.logger.info("[Query] #{options.merge(result_count: search_results.results.total).to_json}") - search.results.collect do |item| + search_results.results.collect do |item| { id: item.id, source: item.source, external_id: item.external_id, - position_title: (options[:hl] == '1' && item.highlight.present?) ? item.highlight[:position_title][0] : item.position_title, - organization_name: item.organization_name, + position_title: (options[:hl] == '1' && item.try(:highlight).present?) ? item.highlight[:position_title][0] : item.position_title, + organization_name: item.try(:organization_name), rate_interval_code: item.rate_interval_code, minimum: item.minimum, maximum: item.maximum, @@ -141,28 +230,40 @@ def search_for(options = {}) end def delete_search_index - search_index.delete + client.indices.delete index: INDEX_NAME rescue nil end - def search_index - Tire.index(index_name) + def search_index_exists? + client.indices.exists? index: INDEX_NAME end def import(position_openings) - Tire.index index_name do - import position_openings do |docs| - docs.each do |doc| - doc[:id] = "#{doc[:source]}:#{doc[:external_id]}" - doc[:locations].each do |loc| - normalized_city = loc[:city].sub(' Metro Area', '').sub(/, .*$/, '') - lat_lon_hash = Geoname.geocode(location: normalized_city, state: loc[:state]) - loc[:geo] = lat_lon_hash if lat_lon_hash.present? - end if doc[:locations].present? + position_openings.each do |opening| + data = opening.each_with_object({}) do |(key, value), data| + if key == :locations + data[:locations] = value.map do |v| + {city: normalized_city(v[:city]), + state: v[:state], + geo: v[:geo] || find_geoname(v[:city], v[:state])} + end + else + data[key] = value end end - refresh + + client.index( + index: INDEX_NAME, + type: 'position_opening', + id: "#{opening[:source]}:#{opening[:external_id]}", + body: data.merge!({ + timestamp: opening[:timestamp] || DateTime.current, + id: "#{opening[:source]}:#{opening[:external_id]}" + }) + ) end + __elasticsearch__.refresh_index! index: INDEX_NAME + Rails.logger.info "Imported #{position_openings.size} position openings" end @@ -171,13 +272,17 @@ def get_external_ids_by_source(source) total = 0 external_ids = [] begin - search = Tire.search index_name do - query { match :source, source } - fields %w(external_id) - sort { by :id } - from from_index - size MAX_RETURNED_DOCUMENTS - end + search_definition = { + query: { match: { source: { query: source }}}, + stored_fields: %w(external_id), + _source: true + } + + search_definition[:size] = MAX_RETURNED_DOCUMENTS + search_definition[:from] = from_index + search_definition[:sort] = ['external_id'] + + search = __elasticsearch__.search(search_definition, index: INDEX_NAME) external_ids.push(*search.results.map(&:external_id)) from_index += search.results.count total = search.results.total @@ -185,6 +290,43 @@ def get_external_ids_by_source(source) external_ids.flatten end + def delete_expired_docs + query = Elasticsearch::DSL::Search.search do + query do + bool do + filter do + bool do + should do + range :end_date do + lte Date.current + end + end + + should do + bool do + must_not do + bool do + must do + exists { field 'end_date' } + end + must do + exists { field 'start_date' } + end + end + end + end + end + + end + end + end + end + end + + client.delete_by_query(body: query.to_hash, index: INDEX_NAME) + __elasticsearch__.refresh_index! index: INDEX_NAME + end + def url_for_position_opening(position_opening) case position_opening.source when 'usajobs' @@ -204,5 +346,12 @@ def organization_ids_from_options(options) organization_ids end + def find_geoname(location, state) + Geoname.geocode(location: normalized_city(location), state: state) + end + + def normalized_city(city) + city.sub(' Metro Area', '').sub(/, .*$/, '') + end end -end \ No newline at end of file +end diff --git a/config/application.rb b/config/application.rb index 921c263..89ce021 100644 --- a/config/application.rb +++ b/config/application.rb @@ -2,7 +2,6 @@ require 'rails' require 'action_controller/railtie' -# require 'tire/rails/logger' # This does not work with Rails 5 Bundler.require(*Rails.groups) @@ -30,5 +29,6 @@ class Application < Rails::Application end config.airbrake = config_for(:airbrake) + config.elasticsearch = config_for(:elasticsearch) end end diff --git a/config/elasticsearch.yml b/config/elasticsearch.yml index 837de31..c2a617a 100644 --- a/config/elasticsearch.yml +++ b/config/elasticsearch.yml @@ -1,2 +1,9 @@ production: - index_name: \ No newline at end of file + index_name: + url: + username: + password: +development: + url: 'localhost:9200' +test: + url: 'localhost:9200' diff --git a/config/initializers/elasticsearch.rb b/config/initializers/elasticsearch.rb index afdacea..1c553d0 100644 --- a/config/initializers/elasticsearch.rb +++ b/config/initializers/elasticsearch.rb @@ -1,7 +1,16 @@ module Elasticsearch; end -es_config = (YAML.load_file("#{Rails.root}/config/elasticsearch.yml") || {})[Rails.env] +config = Rails.application.config.elasticsearch -Tire::Configuration.url(es_config['url']) if es_config && es_config['url'].present? +Elasticsearch::INDEX_NAME = config && config['index_name'].present? ? config['index_name'].freeze : "#{Rails.env}:jobs".freeze -Elasticsearch::INDEX_NAME = es_config && es_config['index_name'].present? ? es_config['index_name'].freeze : "#{Rails.env}:jobs".freeze +Rails.application.config.elasticsearch_client = Elasticsearch::Client.new( + url: config['url'], + user: config['username'], + password: config['password'] +) + +Elasticsearch::Model.client = Rails.application.config.elasticsearch_client + +PositionOpening.create_search_index unless PositionOpening.search_index_exists? +Geoname.create_search_index unless Geoname.search_index_exists? diff --git a/config/initializers/tire.rb b/config/initializers/tire.rb deleted file mode 100644 index e623a70..0000000 --- a/config/initializers/tire.rb +++ /dev/null @@ -1,3 +0,0 @@ -#Tire.configure { logger STDERR, level: 'debug' } -PositionOpening.create_search_index unless PositionOpening.search_index.exists? -Geoname.create_search_index unless Geoname.search_index.exists? \ No newline at end of file diff --git a/config/schedule.rb b/config/schedule.rb new file mode 100644 index 0000000..079df8c --- /dev/null +++ b/config/schedule.rb @@ -0,0 +1,14 @@ +require "active_support" +require "active_support/time" + +Time.zone = "Eastern Time (US & Canada)" + +def zoned_time(time) + Time.zone.parse(time).localtime +end + +set :output, { error: "log/cron_error.log", standard: "log/cron.log" } + +every 1.day, at: zoned_time("12:00 am") do + rake "position_openings:delete_expired_position_openings" +end diff --git a/lib/importers/neogov_data.rb b/lib/importers/neogov_data.rb index be2b501..d488840 100644 --- a/lib/importers/neogov_data.rb +++ b/lib/importers/neogov_data.rb @@ -42,7 +42,7 @@ def import existing_external_ids = PositionOpening.get_external_ids_by_source(@source) expired_ids = existing_external_ids - updated_external_ids expired_openings = expired_ids.collect do |expired_id| - {type: 'position_opening', source: @source, external_id: expired_id, _ttl: '1s'} + {type: 'position_opening', source: @source, external_id: expired_id} end position_openings.push(*expired_openings) PositionOpening.import position_openings @@ -62,6 +62,7 @@ def process_job(job_xml) now = DateTime.current.freeze is_continuous = end_date_str =~ /^continuous$/i + if is_continuous end_datetime_utc = now + 7 end_date = end_datetime_utc.to_date @@ -86,21 +87,17 @@ def process_job(job_xml) entry[:locations] = process_location_and_state(job_xml.xpath(XPATHS[:location]).inner_text, job_xml.xpath(XPATHS[:state]).inner_text) - if seconds_remaining.zero? || entry[:locations].blank? - entry[:_ttl] = '1s' - return entry + unless seconds_remaining.zero? || entry[:locations].blank? + entry[:timestamp] = pubdate.iso8601 + entry[:position_title] = job_xml.xpath(XPATHS[:position_title]).inner_text.squish + entry[:start_date] = start_date + entry[:end_date] = is_continuous ? nil : end_date + entry[:minimum] = process_salary(job_xml.xpath(XPATHS[:minimum]).inner_text) + entry[:maximum] = process_salary(job_xml.xpath(XPATHS[:maximum]).inner_text) + entry[:rate_interval_code] = process_salary_interval(job_xml.xpath(XPATHS[:salary_interval]).inner_text) + entry.merge!(process_job_type(job_xml.xpath(XPATHS[:job_type]).inner_text)) end - entry[:_timestamp] = pubdate.iso8601 - entry[:_ttl] = "#{seconds_remaining}s" - entry[:position_title] = job_xml.xpath(XPATHS[:position_title]).inner_text.squish - entry[:start_date] = start_date - entry[:end_date] = is_continuous ? nil : end_date - entry[:minimum] = process_salary(job_xml.xpath(XPATHS[:minimum]).inner_text) - entry[:maximum] = process_salary(job_xml.xpath(XPATHS[:maximum]).inner_text) - entry[:rate_interval_code] = process_salary_interval(job_xml.xpath(XPATHS[:salary_interval]).inner_text) - entry.merge!(process_job_type(job_xml.xpath(XPATHS[:job_type]).inner_text)) - entry end diff --git a/lib/importers/usajobs_data.rb b/lib/importers/usajobs_data.rb index 4fd86df..4f36fa1 100644 --- a/lib/importers/usajobs_data.rb +++ b/lib/importers/usajobs_data.rb @@ -38,8 +38,8 @@ def process_job(job_xml) entry[:external_id] = job_xml.xpath(XPATHS[:id]).inner_text.to_i entry[:locations] = process_locations(job_xml) entry[:locations] = [] if entry[:locations].size >= CATCHALL_THRESHOLD - entry[:_ttl] = (days_remaining.zero? || entry[:locations].empty?) ? '1s' : "#{days_remaining}d" - unless entry[:_ttl] == '1s' + # entry[:_ttl] = (days_remaining.zero? || entry[:locations].empty?) ? '1s' : "#{days_remaining}d" + unless entry[:locations].empty? || days_remaining.zero? entry[:position_title] = job_xml.xpath(XPATHS[:position_title]).inner_text.strip entry[:organization_id] = job_xml.xpath(XPATHS[:organization_id]).inner_text.strip.upcase entry[:organization_name] = job_xml.xpath(XPATHS[:organization_name]).inner_text.strip @@ -89,4 +89,4 @@ def abbreviate_state_name(location_str) end location_str end -end \ No newline at end of file +end diff --git a/lib/tasks/geonames.rake b/lib/tasks/geonames.rake index 8cfb1fb..c6336ac 100644 --- a/lib/tasks/geonames.rake +++ b/lib/tasks/geonames.rake @@ -11,7 +11,7 @@ namespace :geonames do desc 'Recreate geonames index' task recreate_index: :environment do - Geoname.delete_search_index if Geoname.search_index.exists? + Geoname.delete_search_index if Geoname.search_index_exists? Geoname.create_search_index end end \ No newline at end of file diff --git a/lib/tasks/position_openings.rake b/lib/tasks/position_openings.rake index 9b9aec7..5034bf8 100644 --- a/lib/tasks/position_openings.rake +++ b/lib/tasks/position_openings.rake @@ -41,7 +41,12 @@ namespace :jobs do desc 'Recreate position openings index' task recreate_index: :environment do - PositionOpening.delete_search_index if PositionOpening.search_index.exists? + PositionOpening.delete_search_index if PositionOpening.search_index_exists? PositionOpening.create_search_index end -end \ No newline at end of file + + desc 'Delete expired position openings' + task delete_expired_position_openings: :environment do + PositionOpening.delete_expired_docs + end +end diff --git a/spec/api/v2/position_openings_spec.rb b/spec/api/v2/position_openings_spec.rb index d26c0ca..d81c189 100644 --- a/spec/api/v2/position_openings_spec.rb +++ b/spec/api/v2/position_openings_spec.rb @@ -4,7 +4,7 @@ let(:v2_headers) { { 'Accept' => 'application/vnd.usagov.position_openings.v2' } } before do - PositionOpening.delete_search_index if PositionOpening.search_index.exists? + PositionOpening.delete_search_index if PositionOpening.search_index_exists? PositionOpening.create_search_index UsajobsData.new('doc/sample.xml').import @@ -33,7 +33,7 @@ 'organization_name'=>'Veterans Affairs, Veterans Health Administration', 'rate_interval_code'=>'PH', 'minimum'=>17, 'maximum'=>23, 'start_date'=>'2012-09-19', 'end_date'=>'2022-01-31', - 'locations'=>['Odessa, TX', 'Pentagon, Arlington, VA', 'San Angelo, TX', 'Abilene, TX'], + 'locations'=>['Odessa, TX', 'Pentagon, VA', 'San Angelo, TX', 'Abilene, TX'], 'url' => 'https://www.usajobs.gov/GetJob/ViewDetails/327358300'}) expect(results_array.last).to eq({'id'=>'ng:michigan:234175', 'position_title'=>'Registered Nurse Non-Career', diff --git a/spec/api/v3/position_openings_spec.rb b/spec/api/v3/position_openings_spec.rb index 5f2927d..04c373f 100644 --- a/spec/api/v3/position_openings_spec.rb +++ b/spec/api/v3/position_openings_spec.rb @@ -2,7 +2,7 @@ describe 'Position Openings API V3' do before do - PositionOpening.delete_search_index if PositionOpening.search_index.exists? + PositionOpening.delete_search_index if PositionOpening.search_index_exists? PositionOpening.create_search_index UsajobsData.new('doc/sample.xml').import @@ -31,7 +31,7 @@ 'organization_name'=>'Veterans Affairs, Veterans Health Administration', 'rate_interval_code'=>'PH', 'minimum'=>17, 'maximum'=>23, 'start_date'=>'2012-09-19', 'end_date'=>'2022-01-31', - 'locations'=>['Odessa, TX', 'Pentagon, Arlington, VA', 'San Angelo, TX', 'Abilene, TX'], + 'locations'=>['Odessa, TX', 'Pentagon, VA', 'San Angelo, TX', 'Abilene, TX'], 'url' => 'https://www.usajobs.gov/GetJob/ViewDetails/327358300'}) expect(results_array.last).to eq({'id'=>'ng:michigan:234175', 'position_title'=>'Registered Nurse Non-Career', diff --git a/spec/lib/importers/neogov_data_spec.rb b/spec/lib/importers/neogov_data_spec.rb index be2da60..da786e9 100644 --- a/spec/lib/importers/neogov_data_spec.rb +++ b/spec/lib/importers/neogov_data_spec.rb @@ -7,7 +7,7 @@ let!(:current_datetime) { DateTime.current.freeze } let!(:current) { current_datetime.to_date.freeze } let(:far_away) { Date.parse('2022-01-31') } - let(:continuous_ttl) { "#{(current_datetime + 7).to_i - DateTime.parse('2012-03-12 10:16:56.14').to_datetime.to_i}s" } + # let(:continuous_ttl) { "#{(current_datetime + 7).to_i - DateTime.parse('2012-03-12 10:16:56.14').to_datetime.to_i}s" } before { allow(DateTime).to receive(:current).and_return(current_datetime) } @@ -25,8 +25,8 @@ expect(position_openings[0]).to eq( {type: 'position_opening', source: 'ng:michigan', organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state), - _timestamp: '2013-04-12T15:52:34+00:00', external_id: 634789, - locations: [{city: 'Lansing', state: 'MI'}], _ttl: '277909586s', + timestamp: '2013-04-12T15:52:34+00:00', external_id: 634789, + locations: [{city: 'Lansing', state: 'MI'}], position_title: 'Professional Development and Training Intern-DHS', start_date: Date.parse('2013-04-12'), end_date: far_away, minimum: nil, maximum: nil, rate_interval_code: 'PH', position_offering_type_code: 15328, position_schedule_type_code: nil} @@ -35,8 +35,8 @@ expect(position_openings[1]).to eq( {type: 'position_opening', source: 'ng:michigan', organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state), - _timestamp: '2013-04-08T15:15:21+00:00', external_id: 631517, - locations: [{city: 'Lansing', state: 'MI'}], _ttl: '278257419s', + timestamp: '2013-04-08T15:15:21+00:00', external_id: 631517, + locations: [{city: 'Lansing', state: 'MI'}], position_title: 'MEDC Corporate - Business Attraction Manager', start_date: Date.parse('2013-04-08'), end_date: far_away, minimum: 59334.0, maximum: 77066.0, rate_interval_code: 'PA', position_offering_type_code: 15317, position_schedule_type_code: 1} @@ -45,9 +45,8 @@ expect(position_openings[2]).to eq( {type: 'position_opening', source: 'ng:michigan', organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state), - _timestamp: '2012-03-12T10:16:56+00:00', external_id: 282662, - locations: [{city: 'Freeland', state: 'MI'}], - _ttl: continuous_ttl, position_title: 'Dentist-A', + timestamp: '2012-03-12T10:16:56+00:00', external_id: 282662, + locations: [{city: 'Freeland', state: 'MI'}], position_title: 'Dentist-A', start_date: Date.parse('2011-09-23'), end_date: nil, minimum: 37.33, maximum: 51.66, rate_interval_code: 'PH', position_offering_type_code: 15317, position_schedule_type_code: 2} ) @@ -55,9 +54,8 @@ expect(position_openings[3]).to eq( {type: 'position_opening', source: 'ng:michigan', organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state), - _timestamp: '2010-08-10T16:07:30+00:00', external_id: 234175, - locations: [{city: 'Munising', state: 'MI'}], - _ttl: '362235090s', position_title: 'Registered Nurse Non-Career', + timestamp: '2010-08-10T16:07:30+00:00', external_id: 234175, + locations: [{city: 'Munising', state: 'MI'}], position_title: 'Registered Nurse Non-Career', start_date: Date.parse('2010-06-08'), end_date: far_away, minimum: 28.37, maximum: 38.87, rate_interval_code: 'PH', position_offering_type_code: nil, position_schedule_type_code: nil} ) @@ -81,8 +79,8 @@ expect(position_openings[0]).to eq( {type: 'position_opening', source: 'ng:michigan', organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state), - _timestamp: '2013-04-12T15:52:34+00:00', external_id: 634789, - locations: [{city: 'Lansing', state: 'MI'}], _ttl: '277909586s', + timestamp: '2013-04-12T15:52:34+00:00', external_id: 634789, + locations: [{city: 'Lansing', state: 'MI'}], position_title: 'Professional Development and Training Intern-DHS', start_date: Date.parse('2013-04-12'), end_date: far_away, minimum: nil, maximum: nil, rate_interval_code: 'PH', position_offering_type_code: 15328, position_schedule_type_code: nil} @@ -91,42 +89,42 @@ expect(position_openings[1]).to eq( {type: 'position_opening', source: 'ng:michigan', organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state), - _timestamp: '2013-04-08T15:15:21+00:00', external_id: 631517, - locations: [{city: 'Lansing', state: 'MI'}], _ttl: '278257419s', + timestamp: '2013-04-08T15:15:21+00:00', external_id: 631517, + locations: [{city: 'Lansing', state: 'MI'}], position_title: 'MEDC Corporate - Business Attraction Manager', start_date: Date.parse('2013-04-08'), end_date: far_away, minimum: 59334.0, maximum: 77066.0, rate_interval_code: 'PA', position_offering_type_code: 15317, position_schedule_type_code: 1} ) expect(position_openings[2]).to eq( - {type: 'position_opening', source: 'ng:michigan', external_id: 282662, _ttl: '1s'} + {type: 'position_opening', source: 'ng:michigan', external_id: 282662} ) end less_entries_importer.import end end - context 'when invalid/expired position openings are in the feed' do - let(:expired_importer) { NeogovData.new('michigan', 'state', 'USMI') } - - before do - allow(expired_importer).to receive(:fetch_jobs_rss).and_return File.open('spec/resources/neogov/expired.rss') - end - - it 'should set their _ttl to 1s' do - expect(PositionOpening).to receive(:get_external_ids_by_source).with('ng:michigan').and_return([]) - expect(PositionOpening).to receive(:import) do |position_openings| - expect(position_openings.length).to eq(1) - - expect(position_openings[0]).to eq( - {type: 'position_opening', source: 'ng:michigan', - organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state), - external_id: 282662, locations: [{city: 'Freeland', state: 'MI'}], _ttl: '1s'} - ) - end - expired_importer.import - end - end + # context 'when invalid/expired position openings are in the feed' do + # let(:expired_importer) { NeogovData.new('michigan', 'state', 'USMI') } + # + # before do + # allow(expired_importer).to receive(:fetch_jobs_rss).and_return File.open('spec/resources/neogov/expired.rss') + # end + # + # it 'should set their ttl to 1s' do + # expect(PositionOpening).to receive(:get_external_ids_by_source).with('ng:michigan').and_return([]) + # expect(PositionOpening).to receive(:import) do |position_openings| + # expect(position_openings.length).to eq(1) + # + # expect(position_openings[0]).to eq( + # {type: 'position_opening', source: 'ng:michigan', + # organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state), + # external_id: 282662, locations: [{city: 'Freeland', state: 'MI'}], ttl: '1s'} + # ) + # end + # expired_importer.import + # end + # end context 'when the city or state is invalid' do let(:bad_location_importer) { NeogovData.new('michigan', 'state', 'USMI') } @@ -143,7 +141,7 @@ expect(position_openings[0]).to eq( {type: 'position_opening', source: 'ng:michigan', organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state), - external_id: 386302, locations: [], _ttl: '1s'} + external_id: 386302, locations: []} ) end bad_location_importer.import diff --git a/spec/lib/importers/usajobs_data_spec.rb b/spec/lib/importers/usajobs_data_spec.rb index e50edf2..081cca4 100644 --- a/spec/lib/importers/usajobs_data_spec.rb +++ b/spec/lib/importers/usajobs_data_spec.rb @@ -11,7 +11,7 @@ expect(PositionOpening).to receive(:import) do |position_openings| expect(position_openings.length).to eq(3) expect(position_openings[0]).to eq( - {type: 'position_opening', source: 'usajobs', external_id: 305972200, _ttl: ttl, + {type: 'position_opening', source: 'usajobs', external_id: 305972200, position_title: 'Medical Officer', tags: %w(federal), organization_id: 'AF09', organization_name: 'Air Force Personnel Center', locations: [{city: 'Dyess AFB', state: 'TX'}], @@ -19,7 +19,7 @@ minimum: 60274, maximum: 155500, rate_interval_code: 'PA', position_schedule_type_code: 1, position_offering_type_code: 15327} ) expect(position_openings[1]).to eq( - {type: 'position_opening', source: 'usajobs', external_id: 325054900, _ttl: ttl, + {type: 'position_opening', source: 'usajobs', external_id: 325054900, position_title: 'Physician (Surgical Critical Care)', tags: %w(federal), organization_id: 'VATA', organization_name: 'Veterans Affairs, Veterans Health Administration', locations: [{city: 'Charleston', state: 'SC'}], @@ -27,7 +27,7 @@ minimum: 125000, maximum: 295000, rate_interval_code: 'PA', position_schedule_type_code: 2, position_offering_type_code: 15317} ) expect(position_openings[2]).to eq( - {type: 'position_opening', source: 'usajobs', external_id: 327358300, _ttl: ttl, + {type: 'position_opening', source: 'usajobs', external_id: 327358300, position_title: 'Student Nurse Technicians', tags: %w(federal), organization_id: 'VATA', organization_name: 'Veterans Affairs, Veterans Health Administration', locations: [{city: 'Odessa', state: 'TX'}, @@ -48,15 +48,15 @@ expect(PositionOpening).to receive(:import) do |position_openings| expect(position_openings.length).to eq(3) expect(position_openings[0]).to eq( - {type: 'position_opening', source: 'usajobs', external_id: 305972200, _ttl: '1s', + {type: 'position_opening', source: 'usajobs', external_id: 305972200, tags: %w(federal), locations: [{:city => "Dyess AFB", :state => "TX"}]} ) expect(position_openings[1]).to eq( - {type: 'position_opening', source: 'usajobs', external_id: 325054900, _ttl: '1s', + {type: 'position_opening', source: 'usajobs', external_id: 325054900, tags: %w(federal), locations: [{:city => "Charleston", :state => "SC"}]} ) expect(position_openings[2]).to eq( - {type: 'position_opening', source: 'usajobs', external_id: 327358300, _ttl: '1s', + {type: 'position_opening', source: 'usajobs', external_id: 327358300, tags: %w(federal), locations: [{:city => "Odessa", :state => "TX"}, {:city => "Pentagon, Arlington", :state => "VA"}, {:city => "San Angelo", :state => "TX"}, @@ -75,14 +75,14 @@ expect(PositionOpening).to receive(:import) do |position_openings| expect(position_openings.length).to eq(2) expect(position_openings[0]).to eq( - {type: "position_opening", source: 'usajobs', external_id: 305972200, _ttl: ttl, position_title: "Medical Officer", + {type: "position_opening", source: 'usajobs', external_id: 305972200, position_title: "Medical Officer", organization_id: "AF09", organization_name: "Air Force Personnel Center", tags: %w(federal), locations: [{:city => "Fulton", :state => "MD"}], start_date: Date.parse('28 Dec 2011'), end_date: far_away, minimum: 60274, maximum: 155500, rate_interval_code: "PA", position_schedule_type_code: 1, position_offering_type_code: 15327} ) expect(position_openings[1]).to eq( - {type: "position_opening", source: 'usajobs', external_id: 325054900, _ttl: "1s", locations: [], tags: %w(federal)} + {type: "position_opening", source: 'usajobs', external_id: 325054900, locations: [], tags: %w(federal)} ) end bad_location_importer.import @@ -96,7 +96,7 @@ expect(PositionOpening).to receive(:import) do |position_openings| expect(position_openings.length).to eq(1) expect(position_openings[0]).to eq( - {type: 'position_opening', source: 'usajobs', external_id: 327358300, _ttl: '1s', + {type: 'position_opening', source: 'usajobs', external_id: 327358300, tags: %w(federal), locations: []} ) end diff --git a/spec/models/geoname_spec.rb b/spec/models/geoname_spec.rb index 68aa2d7..2ca8878 100644 --- a/spec/models/geoname_spec.rb +++ b/spec/models/geoname_spec.rb @@ -2,7 +2,7 @@ describe Geoname do before do - Geoname.delete_search_index if Geoname.search_index.exists? + Geoname.delete_search_index if Geoname.search_index_exists? Geoname.create_search_index end @@ -14,11 +14,10 @@ end it 'should return the lat/lon hash of the place' do - expect(Geoname.geocode(location: "Someplace", state: 'XY')).to eq({lat: 12.34, lon: -123.45}) + expect(Geoname.geocode(location: "Someplace", state: 'XY').to_json).to eq({lat: 12.34, lon: -123.45}.to_json) end end - context 'when query terms contain a synonym match with terms in location field' do before do geonames, @first_synonyms = [], [] @@ -26,7 +25,7 @@ first_synonym, remainder = batch_str.strip.gsub(/ ?, ?/, ',').split(',', 2) @first_synonyms << first_synonym remainder.split(',').each do |synonym| - geonames << {type: 'geoname', location: "#{synonym} City", state: 'CA', geo: {lat: rand * 180, lon: rand * 180}} + geonames << {type: 'geoname', location: "#{synonym} City", state: 'CA', geo: {lat: rand * 90, lon: rand * 180}} end end Geoname.import geonames @@ -45,11 +44,11 @@ describe '.import(geonames)' do it 'should set the document ID' do Geoname.import [{type: 'geoname', location: "Someplace", state: 'XY', geo: {lat: 12.34, lon: -123.45}}] - Geoname.import [{type: 'geoname', location: "Someplace", state: 'XY', geo: {lat: 92.34, lon: 23.45}}] + Geoname.import [{type: 'geoname', location: "Someplace", state: 'XY', geo: {lat: 82.34, lon: 23.45}}] search = Geoname.search_for(location: 'Someplace', state: 'XY', size: 2) expect(search.results.total).to eq(1) expect(search.results.first.id).to eq('Someplace:XY') - expect(search.results.first.geo.lat).to eq(92.34) + expect(search.results.first.geo.lat).to eq(82.34) end end diff --git a/spec/models/position_opening_spec.rb b/spec/models/position_opening_spec.rb index b69d0a1..89c336d 100644 --- a/spec/models/position_opening_spec.rb +++ b/spec/models/position_opening_spec.rb @@ -2,10 +2,55 @@ describe PositionOpening do before do - PositionOpening.delete_search_index if PositionOpening.search_index.exists? + PositionOpening.delete_search_index if PositionOpening.search_index_exists? PositionOpening.create_search_index end + describe '.delete_expired_docs' do + before do + position_openings = [] + # deleted : end date is now + position_openings << { source: 'usajobs', external_id: 8801, type: 'position_opening', position_title: 'Deputy Special Assistant to the Chief Nurse Practitioner', + organization_id: 'AF09', organization_name: 'Air Force Personnel Center', + position_schedule_type_code: 1, position_offering_type_code: 15317, tags: %w(federal), + start_date: Date.current, end_date: Date.current, minimum: 80000, maximum: 100000, rate_interval_code: 'PA', + locations: [{ city: 'Andrews AFB', state: 'MD' }, + { city: 'Pentagon Arlington', state: 'VA' }, + { city: 'Air Force Academy', state: 'CO' }] } + # not deleted + position_openings << { source: 'usajobs', external_id: 8803, type: 'position_opening', position_title: 'Future Person', + organization_id: 'FUTU', organization_name: 'Future Administration', + position_schedule_type_code: 2, position_offering_type_code: 15327, tags: %w(federal), + start_date: Date.current + 1, end_date: Date.current + 8, minimum: 17, maximum: 23, rate_interval_code: 'PH', + locations: [{ city: 'San Francisco', state: 'CA' }] } + # deleted: end_date is less than start date + position_openings << { source: 'usajobs', external_id: 8804, type: 'position_opening', position_title: 'Making No Money', + organization_id: 'FUTU', organization_name: 'Future Administration', + position_schedule_type_code: 1, position_offering_type_code: 15328, tags: %w(federal), + start_date: Date.current + 10, end_date: Date.current, minimum: 0, maximum: 0, rate_interval_code: 'WC', + locations: [{ city: 'San Francisco', state: 'CA' }] } + position_openings << { source: 'usajobs', external_id: 8807, type: 'position_opening', position_title: 'Making No Money', + organization_id: 'FUTU', organization_name: 'Future Administration', + position_schedule_type_code: 1, position_offering_type_code: 15328, tags: %w(federal), + start_date: nil, end_date: Date.current + 8, minimum: 0, maximum: 0, rate_interval_code: 'WC', + locations: [{ city: 'San Francisco', state: 'CA' }] } + # deleted: end_date is nil + position_openings << { source: 'usajobs', external_id: 8805, type: 'position_opening', position_title: 'Physician Assistant', + position_schedule_type_code: 2, position_offering_type_code: 15318, tags: %w(federal), + organization_id: 'VATA', organization_name: 'Veterans Affairs, Veterans Health Administration', + start_date: Date.current, end_date: nil, minimum: 17, maximum: 23, rate_interval_code: 'PH', + locations: [{ city: 'Fulton', state: 'MD' }] } + PositionOpening.import position_openings + end + + it 'should delete the position openings that are expired (less than today)' do + PositionOpening.delete_expired_docs + res = PositionOpening.search('*', index: 'test:jobs') + expect(res.size).to eq 1 + expect(res.results.first.id).to eq('usajobs:8803') + end + end + describe '.search_for(options)' do before do position_openings = [] @@ -31,17 +76,17 @@ position_schedule_type_code: 1, position_offering_type_code: 15328, tags: %w(federal), start_date: Date.current, end_date: Date.current + 8, minimum: 0, maximum: 0, rate_interval_code: 'WC', locations: [{ city: 'San Francisco', state: 'CA' }] } - position_openings << { type: 'position_opening', source: 'ng:michigan', _timestamp: Date.current.weeks_ago(1).iso8601, external_id: 629140, + position_openings << { type: 'position_opening', source: 'ng:michigan', timestamp: Date.current.weeks_ago(1).iso8601, external_id: 629140, locations: [{ city: 'Lansing', state: 'MI' }], tags: %w(state), rate_interval_code: 'PH', position_schedule_type_code: 1, position_offering_type_code: 15317, position_title: 'Supervisor (DOH #28425)', start_date: Date.current, end_date: Date.current.tomorrow, minimum: 20.7, maximum: 36.8 } - position_openings << { type: 'position_opening', source: 'ng:michigan', _timestamp: Date.current.yesterday.iso8601, external_id: 616313, + position_openings << { type: 'position_opening', source: 'ng:michigan', timestamp: Date.current.yesterday.iso8601, external_id: 616313, locations: [{ city: 'Detroit', state: 'MI' }], tags: %w(state), rate_interval_code: 'PH', position_schedule_type_code: 1, position_offering_type_code: 15322, position_title: 'Indoor Lifeguard', start_date: Date.current, end_date: Date.current + 8, minimum: 15.68, maximum: 27.11 } - position_openings << { type: 'position_opening', source: 'ng:bloomingtonmn', _timestamp: Date.current.iso8601, external_id: 632865, + position_openings << { type: 'position_opening', source: 'ng:bloomingtonmn', timestamp: Date.current.iso8601, external_id: 632865, locations: [{ city: 'Detroit', state: 'MI' }], tags: %w(city), rate_interval_code: 'PA', position_schedule_type_code: 1, position_offering_type_code: 15317, position_title: 'Computer Specialist', @@ -230,8 +275,8 @@ context 'when keywords present' do it 'should sort by relevance' do res = PositionOpening.search_for(query: 'physician nursing Practitioner') - expect(res.first[:position_title]).to eq('Deputy Special Assistant to the Chief Nurse Practitioner') - expect(res.last[:position_title]).to eq('Physician Assistant') + expect(res.first[:position_title]).to eq('Physician Assistant') + expect(res.last[:position_title]).to eq('Deputy Special Assistant to the Chief Nurse Practitioner') end end @@ -282,14 +327,12 @@ start_date: Date.current, end_date: Date.tomorrow, minimum: 17, maximum: 23, rate_interval_code: 'PH', locations: [{ city: 'Fulton', state: 'MD' }] }] PositionOpening.import position_openings - sleep(0.25) position_openings = [{ source: 'usajobs', external_id: 1001, type: 'position_opening', position_title: 'Physician Assistant Newer', position_schedule_type_code: 2, position_offering_type_code: 15318, tags: %w(federal), organization_id: 'VATA', organization_name: 'Veterans Affairs, Veterans Health Administration', start_date: Date.current, end_date: Date.tomorrow, minimum: 17, maximum: 23, rate_interval_code: 'PH', locations: [{ city: 'Fulton', state: 'MD' }] }] PositionOpening.import position_openings - sleep(0.25) position_openings = [{ source: 'usajobs', external_id: 1002, type: 'position_opening', position_title: 'Physician Assistant Newest', position_schedule_type_code: 2, position_offering_type_code: 15318, tags: %w(federal), organization_id: 'VATA', organization_name: 'Veterans Affairs, Veterans Health Administration', @@ -396,12 +439,10 @@ expect(Geoname).to receive(:geocode).with(location: 'Washington', state: 'DC').and_return({ lat: 23.45, lon: -12.34 }) expect(Geoname).to receive(:geocode).with(location: 'Maui Island', state: 'HI').and_return({ lat: 45.67, lon: -13.31 }) PositionOpening.import([position_opening]) - position_openings = Tire.search 'test:jobs' do - query { all } - end - expect(position_openings.results.first[:locations][0][:geo].to_hash).to eq({ lat: 12.34, lon: -23.45 }) - expect(position_openings.results.first[:locations][1][:geo].to_hash).to eq({ lat: 23.45, lon: -12.34 }) - expect(position_openings.results.first[:locations][2][:geo].to_hash).to eq({ lat: 45.67, lon: -13.31 }) + position_openings = PositionOpening.search('*', index: 'test:jobs') + expect(position_openings.results.first.locations[0][:geo].to_json).to eq({ lat: 12.34, lon: -23.45 }.to_json) + expect(position_openings.results.first.locations[1][:geo].to_json).to eq({ lat: 23.45, lon: -12.34 }.to_json) + expect(position_openings.results.first.locations[2][:geo].to_json).to eq({ lat: 45.67, lon: -13.31 }.to_json) end context 'when no location information is present for job' do @@ -414,9 +455,7 @@ it 'should leave locations empty' do PositionOpening.import([position_opening_no_locations]) - position_openings = Tire.search 'test:jobs' do - query { all } - end + position_openings = PositionOpening.search('*', index: 'test:jobs') expect(position_openings.results.first[:locations]).to be_nil end