diff --git a/.travis.yml b/.travis.yml
index d746a99..d780277 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,7 +3,6 @@ language: ruby
rvm:
- 2.3.5
before_install:
- - curl -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.4.4.deb && sudo dpkg -i elasticsearch-1.4.4.deb && true
- - 'echo ''script.disable_dynamic: false'' | sudo tee --append /etc/elasticsearch/elasticsearch.yml'
- - 'echo ''index.number_of_shards: 1'' | sudo tee --append /etc/elasticsearch/elasticsearch.yml'
+ - curl -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-5.6.5.deb && sudo dpkg -i --force-confnew elasticsearch-5.6.5.deb && true
+ - 'printf "script:\n inline: true\n stored: true\n" | sudo tee --append /etc/elasticsearch/elasticsearch.yml'
- sudo service elasticsearch start
diff --git a/Gemfile b/Gemfile
index 64ebde4..c6e328e 100644
--- a/Gemfile
+++ b/Gemfile
@@ -8,8 +8,6 @@ end
gem 'rails', '5.1.4'
gem 'rails-controller-testing', '~> 1.0'
gem 'nokogiri', '~> 1.8.0'
-gem 'tire', '~> 0.6.2' #deprecated in 2013
-gem 'tire-contrib', '~> 0.1.2'
gem 'oj', '~> 3.1.3' # Unused?
gem 'faraday_middleware', '~> 0.12.2'
gem 'net-http-persistent', '~> 2.8'
@@ -20,6 +18,10 @@ gem 'rack-cors', '~> 1.0.2'
gem 'us_states', '~> 0.1.1', git: 'https://github.com/GSA/us_states.git'
gem 'newrelic_rpm', '~> 4.6.0'
gem 'rake', '~> 11.0'
+gem 'elasticsearch-model'
+gem 'elasticsearch-rails'
+gem 'elasticsearch-dsl'
+gem 'whenever'
group :development, :test do
gem 'puma', '~> 3.7'
diff --git a/Gemfile.lock b/Gemfile.lock
index d693232..49c3acc 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -47,7 +47,6 @@ GEM
airbrake (7.1.0)
airbrake-ruby (~> 2.5)
airbrake-ruby (2.6.2)
- ansi (1.5.0)
arel (8.0.0)
builder (3.2.3)
capistrano (2.15.4)
@@ -56,6 +55,7 @@ GEM
net-sftp (>= 2.0.0)
net-ssh (>= 2.0.14)
net-ssh-gateway (>= 1.1.0)
+ chronic (0.10.2)
concurrent-ruby (1.0.5)
coveralls (0.7.0)
multi_json (~> 1.3)
@@ -68,6 +68,20 @@ GEM
docile (1.1.3)
domain_name (0.5.20170404)
unf (>= 0.0.5, < 1.0.0)
+ elasticsearch (5.0.4)
+ elasticsearch-api (= 5.0.4)
+ elasticsearch-transport (= 5.0.4)
+ elasticsearch-api (5.0.4)
+ multi_json
+ elasticsearch-dsl (0.1.5)
+ elasticsearch-model (5.0.2)
+ activesupport (> 3)
+ elasticsearch (~> 5)
+ hashie
+ elasticsearch-rails (5.0.2)
+ elasticsearch-transport (5.0.4)
+ faraday
+ multi_json
erubi (1.7.0)
faraday (0.13.1)
multipart-post (>= 1.2, < 3)
@@ -76,7 +90,7 @@ GEM
ffi (1.9.18)
globalid (0.4.1)
activesupport (>= 4.2.0)
- hashr (0.0.22)
+ hashie (3.5.7)
highline (1.6.19)
http-cookie (1.0.3)
domain_name (~> 0.5)
@@ -204,16 +218,6 @@ GEM
thor (0.20.0)
thread_safe (0.3.6)
tins (0.13.2)
- tire (0.6.2)
- activemodel (>= 3.0)
- activesupport
- ansi
- hashr (~> 0.0.19)
- multi_json (~> 1.3)
- rake
- rest-client (~> 1.6)
- tire-contrib (0.1.2)
- tire
tzinfo (1.2.4)
thread_safe (~> 0.1)
unf (0.1.4)
@@ -222,6 +226,8 @@ GEM
websocket-driver (0.6.5)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.3)
+ whenever (0.10.0)
+ chronic (>= 0.6.3)
PLATFORMS
ruby
@@ -230,6 +236,9 @@ DEPENDENCIES
airbrake (~> 7.1)
capistrano (~> 2.15.4)
coveralls (~> 0.7.0)
+ elasticsearch-dsl
+ elasticsearch-model
+ elasticsearch-rails
faraday_middleware (~> 0.12.2)
jbuilder (~> 2.7.0)
listen (>= 3.0.5, < 3.2)
@@ -250,9 +259,8 @@ DEPENDENCIES
spring
spring-watcher-listen (~> 2.0.0)
test-unit (~> 3.0)
- tire (~> 0.6.2)
- tire-contrib (~> 0.1.2)
us_states (~> 0.1.1)!
+ whenever
BUNDLED WITH
1.16.0
diff --git a/README.md b/README.md
index 35fb9cb..8895827 100644
--- a/README.md
+++ b/README.md
@@ -21,9 +21,9 @@ We use bundler to manage gems. You can install bundler and other required gems l
### Elasticsearch
-We're using [Elasticsearch](http://www.elasticsearch.org/) (>= 1.4.0) for fulltext search. On a Mac, it's easy to install with [Homebrew](http://mxcl.github.com/homebrew/).
+We're using [Elasticsearch](http://www.elasticsearch.org/) (>= 5.6) for fulltext search. On a Mac, it's easy to install with [Homebrew](http://mxcl.github.com/homebrew/).
- $ brew install elasticsearch
+ $ brew install elasticsearch@5.6
Otherwise, follow the [instructions](http://www.elasticsearch.org/download/) to download and run it.
@@ -32,21 +32,21 @@ Otherwise, follow the [instructions](http://www.elasticsearch.org/download/) to
Install Docker if you haven't done so yet. Follow the instruction [here](https://www.docker.com/community-edition)
Once you have Docker installed on your machine, run the following command in your terminal
- $ docker run -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" elasticsearch:1.4.5
+ $ docker run -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" elasticsearch:5.6
-This will download an docker image containing elasticsearch=1.4.5 from docker hub, run it, and expose port 9200 & 9300 to your machine. You can verify your setup with the following command.
+This will download an docker image containing elasticsearch=5.6.5 from docker hub, run it, and expose port 9200 & 9300 to your machine. You can verify your setup with the following command.
$ curl localhost:9200
{
- "status" : 200,
- "name" : "Aegis",
+ "name" : "u2bQgL2",
"cluster_name" : "elasticsearch",
+ "cluster_uuid" : "qZ-Xas_PR_2ARtHpY724Ug",
"version" : {
- "number" : "1.4.5",
- "build_hash" : "2aaf797f2a571dcb779a3b61180afe8390ab61f9",
- "build_timestamp" : "2015-04-27T08:06:06Z",
+ "number" : "5.6.5",
+ "build_hash" : "6a37571",
+ "build_date" : "2017-12-04T07:50:10.466Z",
"build_snapshot" : false,
- "lucene_version" : "4.10.4"
+ "lucene_version" : "6.6.1"
},
"tagline" : "You Know, for Search"
}
diff --git a/app/models/geoname.rb b/app/models/geoname.rb
index df1b409..b8e1e92 100644
--- a/app/models/geoname.rb
+++ b/app/models/geoname.rb
@@ -1,75 +1,118 @@
+require 'active_model'
+require 'elasticsearch/dsl'
+
class Geoname
- include Tire::Model::Search
+ include ActiveModel::Model
+ include Elasticsearch::Model
+ include Elasticsearch::DSL
+
+ INDEX_NAME = "#{Rails.env}:geonames".freeze
+
+ SYNONYMS = [
+ "afb, air force base",
+ "afs, air force station",
+ "ang, air national guard",
+ "cavecreek, cave creek",
+ "ft, fort",
+ "junc, junction",
+ "natl, nat, national",
+ "newcastle, new castle",
+ "pk, park",
+ "spgs, springs",
+ "st, saint"
+ ].freeze
- index_name("#{Rails.env}:geonames".freeze)
+ SETTINGS = {
+ analysis: {
+ filter: {
+ synonym: {
+ type: 'synonym',
+ synonyms: SYNONYMS
+ }
+ },
+ analyzer: {
+ custom_analyzer: {
+ type: 'custom',
+ tokenizer: 'whitespace',
+ filter: %w(standard lowercase synonym)
+ }
+ }
+ }
+ }
- SYNONYMS = ["ft, fort", "st, saint", "afb, air force base", "afs, air force station", "ang, air national guard", "junc, junction", "spgs, springs", "natl, nat, national", "pk, park", "newcastle, new castle", "cavecreek, cave creek"]
+ settings index: SETTINGS do
+ mappings dynamic: 'false' do
+ indexes :type, type: 'keyword'
+ indexes :location, type: 'text', analyzer: 'custom_analyzer'
+ indexes :state, type: 'text', analyzer: 'keyword'
+ indexes :geo, type: 'geo_point'
+ indexes :id, type: 'keyword', index: false
+ end
+ end
class << self
+ def client
+ @client ||= Geoname.__elasticsearch__.client
+ end
+
def create_search_index
- Tire.index index_name do
- create(
- settings: {
- index: {
- analysis: {
- analyzer: {custom_analyzer: {type: 'custom', tokenizer: 'whitespace', filter: %w(standard lowercase synonym)}},
- filter: {synonym: {type: 'synonym', synonyms: SYNONYMS}}
- }
- }
- },
- mappings: {
- geoname: {
- properties: {
- type: {type: 'string'},
- location: {type: 'string', analyzer: 'custom_analyzer'},
- state: {type: 'string', analyzer: 'keyword'},
- geo: {type: 'geo_point'},
- id: {type: 'string', index: :not_analyzed, include_in_all: false}
- }
- }
- }
- )
- end
+ client.indices.create(
+ index: INDEX_NAME,
+ body: { settings: settings.to_hash, mappings: mappings.to_hash }
+ )
end
def geocode(options = {})
- search_for(options.merge(size: 1)).results.first.geo.to_hash rescue nil
+ search_for(options.merge(size: 1)).results.first.geo rescue nil
end
def search_for(options)
- Tire.search index_name do
+ search_definition = Elasticsearch::DSL::Search.search do
query do
- boolean do
- must { match :location, options[:location], operator: 'AND' }
- must { term :state, options[:state] }
+ bool do
+ must do
+ match :location do
+ query options[:location]
+ operator 'and'
+ end
+ end
+
+ must { term state: options[:state] }
end
end
+
size options[:size]
- end
+ end.to_hash
+
+ Geoname.search(search_definition, index: INDEX_NAME)
end
def delete_search_index
- search_index.delete
+ client.indices.delete index: INDEX_NAME if search_index_exists?
end
- def search_index
- Tire.index(index_name)
+ def search_index_exists?
+ client.indices.exists? index: INDEX_NAME
end
def import(geonames)
- Tire.index index_name do
- import geonames do |docs|
- docs.each do |doc|
- doc[:id] = "#{doc[:location]}:#{doc[:state]}"
- end
- end
- refresh
+ geonames.each do |doc|
+ client.index(
+ index: INDEX_NAME,
+ type: 'geoname',
+ id: "#{doc[:location]}:#{doc[:state]}",
+ body: {
+ location: doc[:location],
+ geo: doc[:geo],
+ state: doc[:state]
+ }
+ )
end
- #Tire.index index_name
- Rails.logger.info "Imported #{geonames.size} Geonames to #{index_name}"
- end
+ __elasticsearch__.refresh_index! index: INDEX_NAME
+ Rails.logger.info "Imported #{geonames.size} Geonames to #{INDEX_NAME}"
+ end
end
-end
\ No newline at end of file
+end
diff --git a/app/models/position_opening.rb b/app/models/position_opening.rb
index cdb09f8..e95058c 100644
--- a/app/models/position_opening.rb
+++ b/app/models/position_opening.rb
@@ -1,134 +1,223 @@
+require 'active_model'
+require 'elasticsearch/dsl'
+
class PositionOpening
- include Tire::Model::Search
+ include ActiveModel::Model
+ include Elasticsearch::Model
+ include Elasticsearch::DSL
+
+ INDEX_NAME = "#{Elasticsearch::INDEX_NAME}".freeze
+
+ MAX_RETURNED_DOCUMENTS = 100.freeze
- index_name("#{Elasticsearch::INDEX_NAME}")
+ SYNONYMS = [
+ "architect, architecture",
+ "certified nursing assistant, cna",
+ "clerk, clerical",
+ "counselor, counseling, therapy, therapist",
+ "custodial, janitor, custodian",
+ "cypa, child and youth program assistant, childcare",
+ "cys, child youth services",
+ "electronic, electrical",
+ "forester, forestry",
+ "green, environment, environmental",
+ "information technology, it, tech, computer",
+ "linguist, language",
+ "legal, attorney",
+ "lpn, licensed practical nurse",
+ "lvn, licensed vocational nurse",
+ "pa, physician assistant",
+ "physician, doctor",
+ "rn, registered nurse",
+ "teacher, teaching",
+ "technical, technician",
+ "technology, technologist",
+ "tso, transportation security officer",
+ "tv, television"
+ ].freeze
+
+ SETTINGS = {
+ analysis: {
+ filter: {
+ synonym: {
+ type: 'synonym',
+ synonyms: SYNONYMS
+ }
+ },
+ analyzer: {
+ custom_analyzer: {
+ type: 'custom',
+ tokenizer: 'whitespace',
+ filter: %w(standard lowercase synonym snowball)
+ }
+ }
+ }
+ }
+
+ settings index: SETTINGS do
+ mappings dynamic: 'false' do
+ indexes :type, type: 'keyword'
+ indexes :source, type: 'keyword'
+ indexes :tags, type: 'text', analyzer: 'keyword'
+ indexes :external_id, type: 'integer', store: true
+ indexes :position_title, type: 'text', analyzer: 'custom_analyzer', term_vector: 'with_positions_offsets', store: true
+ indexes :organization_id, type: 'text', analyzer: 'keyword'
+ indexes :organization_name, type: 'keyword', index: false
+
+ indexes :locations, type: 'nested' do
+ indexes :city, type: 'text', analyzer: 'simple'
+ indexes :state, type: 'text', analyzer: 'keyword'
+ indexes :geo, type: 'geo_point'
+ end
- MAX_RETURNED_DOCUMENTS = 100
- SYNONYMS = ["information technology, it, tech, computer", "teacher, teaching", "certified nursing assistant, cna", "rn, registered nurse", "lpn, licensed practical nurse", "lvn, licensed vocational nurse", "pa, physician assistant", "custodial, janitor, custodian", "cys, child youth services", "clerk, clerical", "physician, doctor", "linguist, language", "tv, television", "legal, attorney", "counselor, counseling, therapy, therapist", "green, environment, environmental", "forester, forestry", "technical, technician", "technology, technologist", "electronic, electrical", "architect, architecture", "cypa, child and youth program assistant, childcare", "tso, transportation security officer"].freeze
+ indexes :start_date, type: 'date', format: 'YYYY-MM-dd'
+ indexes :end_date, type: 'date', format: 'YYYY-MM-dd'
+ indexes :minimum, type: 'float'
+ indexes :maximum, type: 'float'
+ indexes :position_offering_type_code, type: 'integer'
+ indexes :position_schedule_type_code, type: 'integer'
+ indexes :rate_interval_code, type: 'text', analyzer: 'keyword'
+ indexes :id, type: 'keyword', index: false
+ indexes :timestamp, type: 'date', null_value: 'NULL'
+ end
+ end
class << self
+ def client
+ @client ||= PositionOpening.__elasticsearch__.client
+ end
+
def create_search_index
- Tire.index index_name do
- create(
- settings: {
- index: {
- analysis: {
- analyzer: { custom_analyzer: { type: 'custom', tokenizer: 'whitespace', filter: %w(standard lowercase synonym snowball) } },
- filter: { synonym: { type: 'synonym', synonyms: SYNONYMS } }
- }
- }
- },
- mappings: {
- position_opening: {
- _timestamp: { enabled: true },
- _ttl: { enabled: true },
- properties: {
- type: { type: 'string' },
- source: { type: 'string', index: :not_analyzed },
- tags: { type: 'string', analyzer: 'keyword' },
- external_id: { type: 'integer' },
- position_title: { type: 'string', analyzer: 'custom_analyzer', term_vector: 'with_positions_offsets', store: true },
- organization_id: { type: 'string', analyzer: 'keyword' },
- organization_name: { type: 'string', index: :not_analyzed },
- locations: {
- type: 'nested',
- properties: {
- city: { type: 'string', analyzer: 'simple' },
- state: { type: 'string', analyzer: 'keyword' },
- geo: { type: 'geo_point' } } },
- start_date: { type: 'date', format: 'YYYY-MM-dd' },
- end_date: { type: 'date', format: 'YYYY-MM-dd' },
- minimum: { type: 'float' },
- maximum: { type: 'float' },
- position_offering_type_code: { type: 'integer' },
- position_schedule_type_code: { type: 'integer' },
- rate_interval_code: { type: 'string', analyzer: 'keyword' },
- id: { type: 'string', index: :not_analyzed, include_in_all: false }
- }
- }
- }
- )
- end
+ client.indices.create(
+ index: INDEX_NAME,
+ body: { settings: settings.to_hash, mappings: mappings.to_hash }
+ )
end
def search_for(options = {})
- options.reverse_merge!(size: 10, from: 0, sort_by: :_timestamp)
+ options.reverse_merge!(size: 10, from: 0)
document_limit = [options[:size].to_i, MAX_RETURNED_DOCUMENTS].min
source = options[:source]
+ sort_by = options[:sort_by] || :timestamp
tags = options[:tags].present? ? options[:tags].split(/[ ,]/) : nil
lat, lon = options[:lat_lon].split(',') rescue [nil, nil]
organization_ids = organization_ids_from_options(options)
query = Query.new(options[:query], organization_ids)
- search = Tire.search index_name do
+ definition = Elasticsearch::DSL::Search.search do
query do
- boolean(minimum_number_should_match: 1) do
- must { term :source, source } if source.present?
- must { terms :tags, tags } if tags
- must { match :position_offering_type_code, query.position_offering_type_code } if query.position_offering_type_code.present?
- must { match :position_schedule_type_code, query.position_schedule_type_code } if query.position_schedule_type_code.present?
- should { match :position_title, query.keywords, analyzer: 'custom_analyzer' } if query.keywords.present?
+ bool do
+ filter do
+ range :start_date do
+ lte Date.current
+ end
+ end
+
+ must { term source: source } if source.present?
+ must { terms tags: tags } if tags
+ must do
+ match :position_offering_type_code do
+ query query.position_offering_type_code
+ end
+ end if query.position_offering_type_code.present?
+
+ must do
+ match :position_schedule_type_code do
+ query query.position_schedule_type_code
+ end
+ end if query.position_schedule_type_code.present?
+
+ should do
+ match :position_title do
+ query query.keywords
+ analyzer 'custom_analyzer'
+ end
+ end if query.keywords.present?
should do
- nested path: 'locations' do
+ nested do
+ path 'locations'
query do
- match 'locations.city', query.keywords, operator: 'AND'
+ match 'locations.city' do
+ query query.keywords
+ operator 'and'
+ end
end
end
end if query.keywords.present? && query.location.nil?
- must { match :rate_interval_code, query.rate_interval_code } if query.rate_interval_code.present?
+
+ must do
+ match :rate_interval_code do
+ query query.rate_interval_code
+ end
+ end if query.rate_interval_code.present?
+
must do
- boolean do
- should { terms :organization_id, query.organization_terms } if query.organization_terms.present?
- query.organization_prefixes.each do |organization_prefix|
- should { prefix :organization_id, organization_prefix }
- end if query.organization_prefixes.present?
+ bool do
+ should { terms organization_id: query.organization_terms } if query.organization_terms.present?
+ if query.organization_prefixes.present?
+ query.organization_prefixes.each do |prefix|
+ should { prefix organization_id: prefix }
+ end
+ end
end
end if query.organization_ids.present?
+
must do
- nested path: 'locations' do
+ nested do
+ path 'locations'
query do
- boolean do
- must { term 'locations.state', query.location.state } if query.has_state?
- must { match 'locations.city', query.location.city, operator: 'AND' } if query.has_city?
+ bool do
+ must { term 'locations.state': query.location.state } if query.has_state?
+ must do
+ match 'locations.city' do
+ query query.location.city
+ operator 'and'
+ end
+ end if query.has_city?
end
end
end
end if query.location.present?
- end
- end if source.present? || tags || query.valid?
- filter :range, start_date: { lte: Date.current }
+ minimum_should_match '0<1'
+ end
+ end
- if query.keywords.blank?
- if lat.blank? || lon.blank?
- sort { by options[:sort_by], 'desc' }
- else
- options[:sort_by] = 'geo_distance'
- sort do
- by :_geo_distance, {
- 'locations.geo' => {
- lat: lat, lon: lon
- },
- :order => 'asc'
- }
+ sort do
+ if query.keywords.blank?
+ if lat.blank? || lon.blank?
+ by "#{sort_by}", order: 'desc'
+ else
+ by({
+ _geo_distance: {
+ 'locations.geo': { lat: lat.to_f, lon: lon.to_f },
+ order: 'asc',
+ nested_path: 'locations'
+ }
+ })
end
+ else
+ by "#{sort_by}", order: 'desc'
end
end
+
+ highlight { field :position_title, number_of_fragments: 0 }
size document_limit
from options[:from]
- highlight position_title: { number_of_fragments: 0 }
- end
+ end.to_hash
+
+ search_results = __elasticsearch__.search(definition, index: INDEX_NAME)
- Rails.logger.info("[Query] #{options.merge(result_count: search.results.total).to_json}")
+ Rails.logger.info("[Query] #{options.merge(result_count: search_results.results.total).to_json}")
- search.results.collect do |item|
+ search_results.results.collect do |item|
{
id: item.id,
source: item.source,
external_id: item.external_id,
- position_title: (options[:hl] == '1' && item.highlight.present?) ? item.highlight[:position_title][0] : item.position_title,
- organization_name: item.organization_name,
+ position_title: (options[:hl] == '1' && item.try(:highlight).present?) ? item.highlight[:position_title][0] : item.position_title,
+ organization_name: item.try(:organization_name),
rate_interval_code: item.rate_interval_code,
minimum: item.minimum,
maximum: item.maximum,
@@ -141,28 +230,40 @@ def search_for(options = {})
end
def delete_search_index
- search_index.delete
+ client.indices.delete index: INDEX_NAME rescue nil
end
- def search_index
- Tire.index(index_name)
+ def search_index_exists?
+ client.indices.exists? index: INDEX_NAME
end
def import(position_openings)
- Tire.index index_name do
- import position_openings do |docs|
- docs.each do |doc|
- doc[:id] = "#{doc[:source]}:#{doc[:external_id]}"
- doc[:locations].each do |loc|
- normalized_city = loc[:city].sub(' Metro Area', '').sub(/, .*$/, '')
- lat_lon_hash = Geoname.geocode(location: normalized_city, state: loc[:state])
- loc[:geo] = lat_lon_hash if lat_lon_hash.present?
- end if doc[:locations].present?
+ position_openings.each do |opening|
+ data = opening.each_with_object({}) do |(key, value), data|
+ if key == :locations
+ data[:locations] = value.map do |v|
+ {city: normalized_city(v[:city]),
+ state: v[:state],
+ geo: v[:geo] || find_geoname(v[:city], v[:state])}
+ end
+ else
+ data[key] = value
end
end
- refresh
+
+ client.index(
+ index: INDEX_NAME,
+ type: 'position_opening',
+ id: "#{opening[:source]}:#{opening[:external_id]}",
+ body: data.merge!({
+ timestamp: opening[:timestamp] || DateTime.current,
+ id: "#{opening[:source]}:#{opening[:external_id]}"
+ })
+ )
end
+ __elasticsearch__.refresh_index! index: INDEX_NAME
+
Rails.logger.info "Imported #{position_openings.size} position openings"
end
@@ -171,13 +272,17 @@ def get_external_ids_by_source(source)
total = 0
external_ids = []
begin
- search = Tire.search index_name do
- query { match :source, source }
- fields %w(external_id)
- sort { by :id }
- from from_index
- size MAX_RETURNED_DOCUMENTS
- end
+ search_definition = {
+ query: { match: { source: { query: source }}},
+ stored_fields: %w(external_id),
+ _source: true
+ }
+
+ search_definition[:size] = MAX_RETURNED_DOCUMENTS
+ search_definition[:from] = from_index
+ search_definition[:sort] = ['external_id']
+
+ search = __elasticsearch__.search(search_definition, index: INDEX_NAME)
external_ids.push(*search.results.map(&:external_id))
from_index += search.results.count
total = search.results.total
@@ -185,6 +290,43 @@ def get_external_ids_by_source(source)
external_ids.flatten
end
+ def delete_expired_docs
+ query = Elasticsearch::DSL::Search.search do
+ query do
+ bool do
+ filter do
+ bool do
+ should do
+ range :end_date do
+ lte Date.current
+ end
+ end
+
+ should do
+ bool do
+ must_not do
+ bool do
+ must do
+ exists { field 'end_date' }
+ end
+ must do
+ exists { field 'start_date' }
+ end
+ end
+ end
+ end
+ end
+
+ end
+ end
+ end
+ end
+ end
+
+ client.delete_by_query(body: query.to_hash, index: INDEX_NAME)
+ __elasticsearch__.refresh_index! index: INDEX_NAME
+ end
+
def url_for_position_opening(position_opening)
case position_opening.source
when 'usajobs'
@@ -204,5 +346,12 @@ def organization_ids_from_options(options)
organization_ids
end
+ def find_geoname(location, state)
+ Geoname.geocode(location: normalized_city(location), state: state)
+ end
+
+ def normalized_city(city)
+ city.sub(' Metro Area', '').sub(/, .*$/, '')
+ end
end
-end
\ No newline at end of file
+end
diff --git a/config/application.rb b/config/application.rb
index 921c263..89ce021 100644
--- a/config/application.rb
+++ b/config/application.rb
@@ -2,7 +2,6 @@
require 'rails'
require 'action_controller/railtie'
-# require 'tire/rails/logger' # This does not work with Rails 5
Bundler.require(*Rails.groups)
@@ -30,5 +29,6 @@ class Application < Rails::Application
end
config.airbrake = config_for(:airbrake)
+ config.elasticsearch = config_for(:elasticsearch)
end
end
diff --git a/config/elasticsearch.yml b/config/elasticsearch.yml
index 837de31..c2a617a 100644
--- a/config/elasticsearch.yml
+++ b/config/elasticsearch.yml
@@ -1,2 +1,9 @@
production:
- index_name:
\ No newline at end of file
+ index_name:
+ url:
+ username:
+ password:
+development:
+ url: 'localhost:9200'
+test:
+ url: 'localhost:9200'
diff --git a/config/initializers/elasticsearch.rb b/config/initializers/elasticsearch.rb
index afdacea..1c553d0 100644
--- a/config/initializers/elasticsearch.rb
+++ b/config/initializers/elasticsearch.rb
@@ -1,7 +1,16 @@
module Elasticsearch; end
-es_config = (YAML.load_file("#{Rails.root}/config/elasticsearch.yml") || {})[Rails.env]
+config = Rails.application.config.elasticsearch
-Tire::Configuration.url(es_config['url']) if es_config && es_config['url'].present?
+Elasticsearch::INDEX_NAME = config && config['index_name'].present? ? config['index_name'].freeze : "#{Rails.env}:jobs".freeze
-Elasticsearch::INDEX_NAME = es_config && es_config['index_name'].present? ? es_config['index_name'].freeze : "#{Rails.env}:jobs".freeze
+Rails.application.config.elasticsearch_client = Elasticsearch::Client.new(
+ url: config['url'],
+ user: config['username'],
+ password: config['password']
+)
+
+Elasticsearch::Model.client = Rails.application.config.elasticsearch_client
+
+PositionOpening.create_search_index unless PositionOpening.search_index_exists?
+Geoname.create_search_index unless Geoname.search_index_exists?
diff --git a/config/initializers/tire.rb b/config/initializers/tire.rb
deleted file mode 100644
index e623a70..0000000
--- a/config/initializers/tire.rb
+++ /dev/null
@@ -1,3 +0,0 @@
-#Tire.configure { logger STDERR, level: 'debug' }
-PositionOpening.create_search_index unless PositionOpening.search_index.exists?
-Geoname.create_search_index unless Geoname.search_index.exists?
\ No newline at end of file
diff --git a/config/schedule.rb b/config/schedule.rb
new file mode 100644
index 0000000..079df8c
--- /dev/null
+++ b/config/schedule.rb
@@ -0,0 +1,14 @@
+require "active_support"
+require "active_support/time"
+
+Time.zone = "Eastern Time (US & Canada)"
+
+def zoned_time(time)
+ Time.zone.parse(time).localtime
+end
+
+set :output, { error: "log/cron_error.log", standard: "log/cron.log" }
+
+every 1.day, at: zoned_time("12:00 am") do
+ rake "position_openings:delete_expired_position_openings"
+end
diff --git a/lib/importers/neogov_data.rb b/lib/importers/neogov_data.rb
index be2b501..d488840 100644
--- a/lib/importers/neogov_data.rb
+++ b/lib/importers/neogov_data.rb
@@ -42,7 +42,7 @@ def import
existing_external_ids = PositionOpening.get_external_ids_by_source(@source)
expired_ids = existing_external_ids - updated_external_ids
expired_openings = expired_ids.collect do |expired_id|
- {type: 'position_opening', source: @source, external_id: expired_id, _ttl: '1s'}
+ {type: 'position_opening', source: @source, external_id: expired_id}
end
position_openings.push(*expired_openings)
PositionOpening.import position_openings
@@ -62,6 +62,7 @@ def process_job(job_xml)
now = DateTime.current.freeze
is_continuous = end_date_str =~ /^continuous$/i
+
if is_continuous
end_datetime_utc = now + 7
end_date = end_datetime_utc.to_date
@@ -86,21 +87,17 @@ def process_job(job_xml)
entry[:locations] = process_location_and_state(job_xml.xpath(XPATHS[:location]).inner_text,
job_xml.xpath(XPATHS[:state]).inner_text)
- if seconds_remaining.zero? || entry[:locations].blank?
- entry[:_ttl] = '1s'
- return entry
+ unless seconds_remaining.zero? || entry[:locations].blank?
+ entry[:timestamp] = pubdate.iso8601
+ entry[:position_title] = job_xml.xpath(XPATHS[:position_title]).inner_text.squish
+ entry[:start_date] = start_date
+ entry[:end_date] = is_continuous ? nil : end_date
+ entry[:minimum] = process_salary(job_xml.xpath(XPATHS[:minimum]).inner_text)
+ entry[:maximum] = process_salary(job_xml.xpath(XPATHS[:maximum]).inner_text)
+ entry[:rate_interval_code] = process_salary_interval(job_xml.xpath(XPATHS[:salary_interval]).inner_text)
+ entry.merge!(process_job_type(job_xml.xpath(XPATHS[:job_type]).inner_text))
end
- entry[:_timestamp] = pubdate.iso8601
- entry[:_ttl] = "#{seconds_remaining}s"
- entry[:position_title] = job_xml.xpath(XPATHS[:position_title]).inner_text.squish
- entry[:start_date] = start_date
- entry[:end_date] = is_continuous ? nil : end_date
- entry[:minimum] = process_salary(job_xml.xpath(XPATHS[:minimum]).inner_text)
- entry[:maximum] = process_salary(job_xml.xpath(XPATHS[:maximum]).inner_text)
- entry[:rate_interval_code] = process_salary_interval(job_xml.xpath(XPATHS[:salary_interval]).inner_text)
- entry.merge!(process_job_type(job_xml.xpath(XPATHS[:job_type]).inner_text))
-
entry
end
diff --git a/lib/importers/usajobs_data.rb b/lib/importers/usajobs_data.rb
index 4fd86df..4f36fa1 100644
--- a/lib/importers/usajobs_data.rb
+++ b/lib/importers/usajobs_data.rb
@@ -38,8 +38,8 @@ def process_job(job_xml)
entry[:external_id] = job_xml.xpath(XPATHS[:id]).inner_text.to_i
entry[:locations] = process_locations(job_xml)
entry[:locations] = [] if entry[:locations].size >= CATCHALL_THRESHOLD
- entry[:_ttl] = (days_remaining.zero? || entry[:locations].empty?) ? '1s' : "#{days_remaining}d"
- unless entry[:_ttl] == '1s'
+ # entry[:_ttl] = (days_remaining.zero? || entry[:locations].empty?) ? '1s' : "#{days_remaining}d"
+ unless entry[:locations].empty? || days_remaining.zero?
entry[:position_title] = job_xml.xpath(XPATHS[:position_title]).inner_text.strip
entry[:organization_id] = job_xml.xpath(XPATHS[:organization_id]).inner_text.strip.upcase
entry[:organization_name] = job_xml.xpath(XPATHS[:organization_name]).inner_text.strip
@@ -89,4 +89,4 @@ def abbreviate_state_name(location_str)
end
location_str
end
-end
\ No newline at end of file
+end
diff --git a/lib/tasks/geonames.rake b/lib/tasks/geonames.rake
index 8cfb1fb..c6336ac 100644
--- a/lib/tasks/geonames.rake
+++ b/lib/tasks/geonames.rake
@@ -11,7 +11,7 @@ namespace :geonames do
desc 'Recreate geonames index'
task recreate_index: :environment do
- Geoname.delete_search_index if Geoname.search_index.exists?
+ Geoname.delete_search_index if Geoname.search_index_exists?
Geoname.create_search_index
end
end
\ No newline at end of file
diff --git a/lib/tasks/position_openings.rake b/lib/tasks/position_openings.rake
index 9b9aec7..5034bf8 100644
--- a/lib/tasks/position_openings.rake
+++ b/lib/tasks/position_openings.rake
@@ -41,7 +41,12 @@ namespace :jobs do
desc 'Recreate position openings index'
task recreate_index: :environment do
- PositionOpening.delete_search_index if PositionOpening.search_index.exists?
+ PositionOpening.delete_search_index if PositionOpening.search_index_exists?
PositionOpening.create_search_index
end
-end
\ No newline at end of file
+
+ desc 'Delete expired position openings'
+ task delete_expired_position_openings: :environment do
+ PositionOpening.delete_expired_docs
+ end
+end
diff --git a/spec/api/v2/position_openings_spec.rb b/spec/api/v2/position_openings_spec.rb
index d26c0ca..d81c189 100644
--- a/spec/api/v2/position_openings_spec.rb
+++ b/spec/api/v2/position_openings_spec.rb
@@ -4,7 +4,7 @@
let(:v2_headers) { { 'Accept' => 'application/vnd.usagov.position_openings.v2' } }
before do
- PositionOpening.delete_search_index if PositionOpening.search_index.exists?
+ PositionOpening.delete_search_index if PositionOpening.search_index_exists?
PositionOpening.create_search_index
UsajobsData.new('doc/sample.xml').import
@@ -33,7 +33,7 @@
'organization_name'=>'Veterans Affairs, Veterans Health Administration',
'rate_interval_code'=>'PH', 'minimum'=>17, 'maximum'=>23,
'start_date'=>'2012-09-19', 'end_date'=>'2022-01-31',
- 'locations'=>['Odessa, TX', 'Pentagon, Arlington, VA', 'San Angelo, TX', 'Abilene, TX'],
+ 'locations'=>['Odessa, TX', 'Pentagon, VA', 'San Angelo, TX', 'Abilene, TX'],
'url' => 'https://www.usajobs.gov/GetJob/ViewDetails/327358300'})
expect(results_array.last).to eq({'id'=>'ng:michigan:234175', 'position_title'=>'Registered Nurse Non-Career',
diff --git a/spec/api/v3/position_openings_spec.rb b/spec/api/v3/position_openings_spec.rb
index 5f2927d..04c373f 100644
--- a/spec/api/v3/position_openings_spec.rb
+++ b/spec/api/v3/position_openings_spec.rb
@@ -2,7 +2,7 @@
describe 'Position Openings API V3' do
before do
- PositionOpening.delete_search_index if PositionOpening.search_index.exists?
+ PositionOpening.delete_search_index if PositionOpening.search_index_exists?
PositionOpening.create_search_index
UsajobsData.new('doc/sample.xml').import
@@ -31,7 +31,7 @@
'organization_name'=>'Veterans Affairs, Veterans Health Administration',
'rate_interval_code'=>'PH', 'minimum'=>17, 'maximum'=>23,
'start_date'=>'2012-09-19', 'end_date'=>'2022-01-31',
- 'locations'=>['Odessa, TX', 'Pentagon, Arlington, VA', 'San Angelo, TX', 'Abilene, TX'],
+ 'locations'=>['Odessa, TX', 'Pentagon, VA', 'San Angelo, TX', 'Abilene, TX'],
'url' => 'https://www.usajobs.gov/GetJob/ViewDetails/327358300'})
expect(results_array.last).to eq({'id'=>'ng:michigan:234175', 'position_title'=>'Registered Nurse Non-Career',
diff --git a/spec/lib/importers/neogov_data_spec.rb b/spec/lib/importers/neogov_data_spec.rb
index be2da60..da786e9 100644
--- a/spec/lib/importers/neogov_data_spec.rb
+++ b/spec/lib/importers/neogov_data_spec.rb
@@ -7,7 +7,7 @@
let!(:current_datetime) { DateTime.current.freeze }
let!(:current) { current_datetime.to_date.freeze }
let(:far_away) { Date.parse('2022-01-31') }
- let(:continuous_ttl) { "#{(current_datetime + 7).to_i - DateTime.parse('2012-03-12 10:16:56.14').to_datetime.to_i}s" }
+ # let(:continuous_ttl) { "#{(current_datetime + 7).to_i - DateTime.parse('2012-03-12 10:16:56.14').to_datetime.to_i}s" }
before { allow(DateTime).to receive(:current).and_return(current_datetime) }
@@ -25,8 +25,8 @@
expect(position_openings[0]).to eq(
{type: 'position_opening', source: 'ng:michigan',
organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state),
- _timestamp: '2013-04-12T15:52:34+00:00', external_id: 634789,
- locations: [{city: 'Lansing', state: 'MI'}], _ttl: '277909586s',
+ timestamp: '2013-04-12T15:52:34+00:00', external_id: 634789,
+ locations: [{city: 'Lansing', state: 'MI'}],
position_title: 'Professional Development and Training Intern-DHS',
start_date: Date.parse('2013-04-12'), end_date: far_away, minimum: nil, maximum: nil,
rate_interval_code: 'PH', position_offering_type_code: 15328, position_schedule_type_code: nil}
@@ -35,8 +35,8 @@
expect(position_openings[1]).to eq(
{type: 'position_opening', source: 'ng:michigan',
organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state),
- _timestamp: '2013-04-08T15:15:21+00:00', external_id: 631517,
- locations: [{city: 'Lansing', state: 'MI'}], _ttl: '278257419s',
+ timestamp: '2013-04-08T15:15:21+00:00', external_id: 631517,
+ locations: [{city: 'Lansing', state: 'MI'}],
position_title: 'MEDC Corporate - Business Attraction Manager',
start_date: Date.parse('2013-04-08'), end_date: far_away, minimum: 59334.0, maximum: 77066.0,
rate_interval_code: 'PA', position_offering_type_code: 15317, position_schedule_type_code: 1}
@@ -45,9 +45,8 @@
expect(position_openings[2]).to eq(
{type: 'position_opening', source: 'ng:michigan',
organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state),
- _timestamp: '2012-03-12T10:16:56+00:00', external_id: 282662,
- locations: [{city: 'Freeland', state: 'MI'}],
- _ttl: continuous_ttl, position_title: 'Dentist-A',
+ timestamp: '2012-03-12T10:16:56+00:00', external_id: 282662,
+ locations: [{city: 'Freeland', state: 'MI'}], position_title: 'Dentist-A',
start_date: Date.parse('2011-09-23'), end_date: nil, minimum: 37.33, maximum: 51.66,
rate_interval_code: 'PH', position_offering_type_code: 15317, position_schedule_type_code: 2}
)
@@ -55,9 +54,8 @@
expect(position_openings[3]).to eq(
{type: 'position_opening', source: 'ng:michigan',
organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state),
- _timestamp: '2010-08-10T16:07:30+00:00', external_id: 234175,
- locations: [{city: 'Munising', state: 'MI'}],
- _ttl: '362235090s', position_title: 'Registered Nurse Non-Career',
+ timestamp: '2010-08-10T16:07:30+00:00', external_id: 234175,
+ locations: [{city: 'Munising', state: 'MI'}], position_title: 'Registered Nurse Non-Career',
start_date: Date.parse('2010-06-08'), end_date: far_away, minimum: 28.37, maximum: 38.87,
rate_interval_code: 'PH', position_offering_type_code: nil, position_schedule_type_code: nil}
)
@@ -81,8 +79,8 @@
expect(position_openings[0]).to eq(
{type: 'position_opening', source: 'ng:michigan',
organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state),
- _timestamp: '2013-04-12T15:52:34+00:00', external_id: 634789,
- locations: [{city: 'Lansing', state: 'MI'}], _ttl: '277909586s',
+ timestamp: '2013-04-12T15:52:34+00:00', external_id: 634789,
+ locations: [{city: 'Lansing', state: 'MI'}],
position_title: 'Professional Development and Training Intern-DHS',
start_date: Date.parse('2013-04-12'), end_date: far_away, minimum: nil, maximum: nil,
rate_interval_code: 'PH', position_offering_type_code: 15328, position_schedule_type_code: nil}
@@ -91,42 +89,42 @@
expect(position_openings[1]).to eq(
{type: 'position_opening', source: 'ng:michigan',
organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state),
- _timestamp: '2013-04-08T15:15:21+00:00', external_id: 631517,
- locations: [{city: 'Lansing', state: 'MI'}], _ttl: '278257419s',
+ timestamp: '2013-04-08T15:15:21+00:00', external_id: 631517,
+ locations: [{city: 'Lansing', state: 'MI'}],
position_title: 'MEDC Corporate - Business Attraction Manager',
start_date: Date.parse('2013-04-08'), end_date: far_away, minimum: 59334.0, maximum: 77066.0,
rate_interval_code: 'PA', position_offering_type_code: 15317, position_schedule_type_code: 1}
)
expect(position_openings[2]).to eq(
- {type: 'position_opening', source: 'ng:michigan', external_id: 282662, _ttl: '1s'}
+ {type: 'position_opening', source: 'ng:michigan', external_id: 282662}
)
end
less_entries_importer.import
end
end
- context 'when invalid/expired position openings are in the feed' do
- let(:expired_importer) { NeogovData.new('michigan', 'state', 'USMI') }
-
- before do
- allow(expired_importer).to receive(:fetch_jobs_rss).and_return File.open('spec/resources/neogov/expired.rss')
- end
-
- it 'should set their _ttl to 1s' do
- expect(PositionOpening).to receive(:get_external_ids_by_source).with('ng:michigan').and_return([])
- expect(PositionOpening).to receive(:import) do |position_openings|
- expect(position_openings.length).to eq(1)
-
- expect(position_openings[0]).to eq(
- {type: 'position_opening', source: 'ng:michigan',
- organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state),
- external_id: 282662, locations: [{city: 'Freeland', state: 'MI'}], _ttl: '1s'}
- )
- end
- expired_importer.import
- end
- end
+ # context 'when invalid/expired position openings are in the feed' do
+ # let(:expired_importer) { NeogovData.new('michigan', 'state', 'USMI') }
+ #
+ # before do
+ # allow(expired_importer).to receive(:fetch_jobs_rss).and_return File.open('spec/resources/neogov/expired.rss')
+ # end
+ #
+ # it 'should set their ttl to 1s' do
+ # expect(PositionOpening).to receive(:get_external_ids_by_source).with('ng:michigan').and_return([])
+ # expect(PositionOpening).to receive(:import) do |position_openings|
+ # expect(position_openings.length).to eq(1)
+ #
+ # expect(position_openings[0]).to eq(
+ # {type: 'position_opening', source: 'ng:michigan',
+ # organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state),
+ # external_id: 282662, locations: [{city: 'Freeland', state: 'MI'}], ttl: '1s'}
+ # )
+ # end
+ # expired_importer.import
+ # end
+ # end
context 'when the city or state is invalid' do
let(:bad_location_importer) { NeogovData.new('michigan', 'state', 'USMI') }
@@ -143,7 +141,7 @@
expect(position_openings[0]).to eq(
{type: 'position_opening', source: 'ng:michigan',
organization_id: 'USMI', organization_name: 'State of Michigan, MI', tags: %w(state),
- external_id: 386302, locations: [], _ttl: '1s'}
+ external_id: 386302, locations: []}
)
end
bad_location_importer.import
diff --git a/spec/lib/importers/usajobs_data_spec.rb b/spec/lib/importers/usajobs_data_spec.rb
index e50edf2..081cca4 100644
--- a/spec/lib/importers/usajobs_data_spec.rb
+++ b/spec/lib/importers/usajobs_data_spec.rb
@@ -11,7 +11,7 @@
expect(PositionOpening).to receive(:import) do |position_openings|
expect(position_openings.length).to eq(3)
expect(position_openings[0]).to eq(
- {type: 'position_opening', source: 'usajobs', external_id: 305972200, _ttl: ttl,
+ {type: 'position_opening', source: 'usajobs', external_id: 305972200,
position_title: 'Medical Officer', tags: %w(federal),
organization_id: 'AF09', organization_name: 'Air Force Personnel Center',
locations: [{city: 'Dyess AFB', state: 'TX'}],
@@ -19,7 +19,7 @@
minimum: 60274, maximum: 155500, rate_interval_code: 'PA', position_schedule_type_code: 1, position_offering_type_code: 15327}
)
expect(position_openings[1]).to eq(
- {type: 'position_opening', source: 'usajobs', external_id: 325054900, _ttl: ttl,
+ {type: 'position_opening', source: 'usajobs', external_id: 325054900,
position_title: 'Physician (Surgical Critical Care)', tags: %w(federal),
organization_id: 'VATA', organization_name: 'Veterans Affairs, Veterans Health Administration',
locations: [{city: 'Charleston', state: 'SC'}],
@@ -27,7 +27,7 @@
minimum: 125000, maximum: 295000, rate_interval_code: 'PA', position_schedule_type_code: 2, position_offering_type_code: 15317}
)
expect(position_openings[2]).to eq(
- {type: 'position_opening', source: 'usajobs', external_id: 327358300, _ttl: ttl,
+ {type: 'position_opening', source: 'usajobs', external_id: 327358300,
position_title: 'Student Nurse Technicians', tags: %w(federal),
organization_id: 'VATA', organization_name: 'Veterans Affairs, Veterans Health Administration',
locations: [{city: 'Odessa', state: 'TX'},
@@ -48,15 +48,15 @@
expect(PositionOpening).to receive(:import) do |position_openings|
expect(position_openings.length).to eq(3)
expect(position_openings[0]).to eq(
- {type: 'position_opening', source: 'usajobs', external_id: 305972200, _ttl: '1s',
+ {type: 'position_opening', source: 'usajobs', external_id: 305972200,
tags: %w(federal), locations: [{:city => "Dyess AFB", :state => "TX"}]}
)
expect(position_openings[1]).to eq(
- {type: 'position_opening', source: 'usajobs', external_id: 325054900, _ttl: '1s',
+ {type: 'position_opening', source: 'usajobs', external_id: 325054900,
tags: %w(federal), locations: [{:city => "Charleston", :state => "SC"}]}
)
expect(position_openings[2]).to eq(
- {type: 'position_opening', source: 'usajobs', external_id: 327358300, _ttl: '1s',
+ {type: 'position_opening', source: 'usajobs', external_id: 327358300,
tags: %w(federal), locations: [{:city => "Odessa", :state => "TX"},
{:city => "Pentagon, Arlington", :state => "VA"},
{:city => "San Angelo", :state => "TX"},
@@ -75,14 +75,14 @@
expect(PositionOpening).to receive(:import) do |position_openings|
expect(position_openings.length).to eq(2)
expect(position_openings[0]).to eq(
- {type: "position_opening", source: 'usajobs', external_id: 305972200, _ttl: ttl, position_title: "Medical Officer",
+ {type: "position_opening", source: 'usajobs', external_id: 305972200, position_title: "Medical Officer",
organization_id: "AF09", organization_name: "Air Force Personnel Center", tags: %w(federal),
locations: [{:city => "Fulton", :state => "MD"}],
start_date: Date.parse('28 Dec 2011'), end_date: far_away,
minimum: 60274, maximum: 155500, rate_interval_code: "PA", position_schedule_type_code: 1, position_offering_type_code: 15327}
)
expect(position_openings[1]).to eq(
- {type: "position_opening", source: 'usajobs', external_id: 325054900, _ttl: "1s", locations: [], tags: %w(federal)}
+ {type: "position_opening", source: 'usajobs', external_id: 325054900, locations: [], tags: %w(federal)}
)
end
bad_location_importer.import
@@ -96,7 +96,7 @@
expect(PositionOpening).to receive(:import) do |position_openings|
expect(position_openings.length).to eq(1)
expect(position_openings[0]).to eq(
- {type: 'position_opening', source: 'usajobs', external_id: 327358300, _ttl: '1s',
+ {type: 'position_opening', source: 'usajobs', external_id: 327358300,
tags: %w(federal), locations: []}
)
end
diff --git a/spec/models/geoname_spec.rb b/spec/models/geoname_spec.rb
index 68aa2d7..2ca8878 100644
--- a/spec/models/geoname_spec.rb
+++ b/spec/models/geoname_spec.rb
@@ -2,7 +2,7 @@
describe Geoname do
before do
- Geoname.delete_search_index if Geoname.search_index.exists?
+ Geoname.delete_search_index if Geoname.search_index_exists?
Geoname.create_search_index
end
@@ -14,11 +14,10 @@
end
it 'should return the lat/lon hash of the place' do
- expect(Geoname.geocode(location: "Someplace", state: 'XY')).to eq({lat: 12.34, lon: -123.45})
+ expect(Geoname.geocode(location: "Someplace", state: 'XY').to_json).to eq({lat: 12.34, lon: -123.45}.to_json)
end
end
-
context 'when query terms contain a synonym match with terms in location field' do
before do
geonames, @first_synonyms = [], []
@@ -26,7 +25,7 @@
first_synonym, remainder = batch_str.strip.gsub(/ ?, ?/, ',').split(',', 2)
@first_synonyms << first_synonym
remainder.split(',').each do |synonym|
- geonames << {type: 'geoname', location: "#{synonym} City", state: 'CA', geo: {lat: rand * 180, lon: rand * 180}}
+ geonames << {type: 'geoname', location: "#{synonym} City", state: 'CA', geo: {lat: rand * 90, lon: rand * 180}}
end
end
Geoname.import geonames
@@ -45,11 +44,11 @@
describe '.import(geonames)' do
it 'should set the document ID' do
Geoname.import [{type: 'geoname', location: "Someplace", state: 'XY', geo: {lat: 12.34, lon: -123.45}}]
- Geoname.import [{type: 'geoname', location: "Someplace", state: 'XY', geo: {lat: 92.34, lon: 23.45}}]
+ Geoname.import [{type: 'geoname', location: "Someplace", state: 'XY', geo: {lat: 82.34, lon: 23.45}}]
search = Geoname.search_for(location: 'Someplace', state: 'XY', size: 2)
expect(search.results.total).to eq(1)
expect(search.results.first.id).to eq('Someplace:XY')
- expect(search.results.first.geo.lat).to eq(92.34)
+ expect(search.results.first.geo.lat).to eq(82.34)
end
end
diff --git a/spec/models/position_opening_spec.rb b/spec/models/position_opening_spec.rb
index b69d0a1..89c336d 100644
--- a/spec/models/position_opening_spec.rb
+++ b/spec/models/position_opening_spec.rb
@@ -2,10 +2,55 @@
describe PositionOpening do
before do
- PositionOpening.delete_search_index if PositionOpening.search_index.exists?
+ PositionOpening.delete_search_index if PositionOpening.search_index_exists?
PositionOpening.create_search_index
end
+ describe '.delete_expired_docs' do
+ before do
+ position_openings = []
+ # deleted : end date is now
+ position_openings << { source: 'usajobs', external_id: 8801, type: 'position_opening', position_title: 'Deputy Special Assistant to the Chief Nurse Practitioner',
+ organization_id: 'AF09', organization_name: 'Air Force Personnel Center',
+ position_schedule_type_code: 1, position_offering_type_code: 15317, tags: %w(federal),
+ start_date: Date.current, end_date: Date.current, minimum: 80000, maximum: 100000, rate_interval_code: 'PA',
+ locations: [{ city: 'Andrews AFB', state: 'MD' },
+ { city: 'Pentagon Arlington', state: 'VA' },
+ { city: 'Air Force Academy', state: 'CO' }] }
+ # not deleted
+ position_openings << { source: 'usajobs', external_id: 8803, type: 'position_opening', position_title: 'Future Person',
+ organization_id: 'FUTU', organization_name: 'Future Administration',
+ position_schedule_type_code: 2, position_offering_type_code: 15327, tags: %w(federal),
+ start_date: Date.current + 1, end_date: Date.current + 8, minimum: 17, maximum: 23, rate_interval_code: 'PH',
+ locations: [{ city: 'San Francisco', state: 'CA' }] }
+ # deleted: end_date is less than start date
+ position_openings << { source: 'usajobs', external_id: 8804, type: 'position_opening', position_title: 'Making No Money',
+ organization_id: 'FUTU', organization_name: 'Future Administration',
+ position_schedule_type_code: 1, position_offering_type_code: 15328, tags: %w(federal),
+ start_date: Date.current + 10, end_date: Date.current, minimum: 0, maximum: 0, rate_interval_code: 'WC',
+ locations: [{ city: 'San Francisco', state: 'CA' }] }
+ position_openings << { source: 'usajobs', external_id: 8807, type: 'position_opening', position_title: 'Making No Money',
+ organization_id: 'FUTU', organization_name: 'Future Administration',
+ position_schedule_type_code: 1, position_offering_type_code: 15328, tags: %w(federal),
+ start_date: nil, end_date: Date.current + 8, minimum: 0, maximum: 0, rate_interval_code: 'WC',
+ locations: [{ city: 'San Francisco', state: 'CA' }] }
+ # deleted: end_date is nil
+ position_openings << { source: 'usajobs', external_id: 8805, type: 'position_opening', position_title: 'Physician Assistant',
+ position_schedule_type_code: 2, position_offering_type_code: 15318, tags: %w(federal),
+ organization_id: 'VATA', organization_name: 'Veterans Affairs, Veterans Health Administration',
+ start_date: Date.current, end_date: nil, minimum: 17, maximum: 23, rate_interval_code: 'PH',
+ locations: [{ city: 'Fulton', state: 'MD' }] }
+ PositionOpening.import position_openings
+ end
+
+ it 'should delete the position openings that are expired (less than today)' do
+ PositionOpening.delete_expired_docs
+ res = PositionOpening.search('*', index: 'test:jobs')
+ expect(res.size).to eq 1
+ expect(res.results.first.id).to eq('usajobs:8803')
+ end
+ end
+
describe '.search_for(options)' do
before do
position_openings = []
@@ -31,17 +76,17 @@
position_schedule_type_code: 1, position_offering_type_code: 15328, tags: %w(federal),
start_date: Date.current, end_date: Date.current + 8, minimum: 0, maximum: 0, rate_interval_code: 'WC',
locations: [{ city: 'San Francisco', state: 'CA' }] }
- position_openings << { type: 'position_opening', source: 'ng:michigan', _timestamp: Date.current.weeks_ago(1).iso8601, external_id: 629140,
+ position_openings << { type: 'position_opening', source: 'ng:michigan', timestamp: Date.current.weeks_ago(1).iso8601, external_id: 629140,
locations: [{ city: 'Lansing', state: 'MI' }], tags: %w(state),
rate_interval_code: 'PH', position_schedule_type_code: 1, position_offering_type_code: 15317,
position_title: 'Supervisor (DOH #28425)',
start_date: Date.current, end_date: Date.current.tomorrow, minimum: 20.7, maximum: 36.8 }
- position_openings << { type: 'position_opening', source: 'ng:michigan', _timestamp: Date.current.yesterday.iso8601, external_id: 616313,
+ position_openings << { type: 'position_opening', source: 'ng:michigan', timestamp: Date.current.yesterday.iso8601, external_id: 616313,
locations: [{ city: 'Detroit', state: 'MI' }], tags: %w(state),
rate_interval_code: 'PH', position_schedule_type_code: 1, position_offering_type_code: 15322,
position_title: 'Indoor Lifeguard',
start_date: Date.current, end_date: Date.current + 8, minimum: 15.68, maximum: 27.11 }
- position_openings << { type: 'position_opening', source: 'ng:bloomingtonmn', _timestamp: Date.current.iso8601, external_id: 632865,
+ position_openings << { type: 'position_opening', source: 'ng:bloomingtonmn', timestamp: Date.current.iso8601, external_id: 632865,
locations: [{ city: 'Detroit', state: 'MI' }], tags: %w(city),
rate_interval_code: 'PA', position_schedule_type_code: 1, position_offering_type_code: 15317,
position_title: 'Computer Specialist',
@@ -230,8 +275,8 @@
context 'when keywords present' do
it 'should sort by relevance' do
res = PositionOpening.search_for(query: 'physician nursing Practitioner')
- expect(res.first[:position_title]).to eq('Deputy Special Assistant to the Chief Nurse Practitioner')
- expect(res.last[:position_title]).to eq('Physician Assistant')
+ expect(res.first[:position_title]).to eq('Physician Assistant')
+ expect(res.last[:position_title]).to eq('Deputy Special Assistant to the Chief Nurse Practitioner')
end
end
@@ -282,14 +327,12 @@
start_date: Date.current, end_date: Date.tomorrow, minimum: 17, maximum: 23, rate_interval_code: 'PH',
locations: [{ city: 'Fulton', state: 'MD' }] }]
PositionOpening.import position_openings
- sleep(0.25)
position_openings = [{ source: 'usajobs', external_id: 1001, type: 'position_opening', position_title: 'Physician Assistant Newer',
position_schedule_type_code: 2, position_offering_type_code: 15318, tags: %w(federal),
organization_id: 'VATA', organization_name: 'Veterans Affairs, Veterans Health Administration',
start_date: Date.current, end_date: Date.tomorrow, minimum: 17, maximum: 23, rate_interval_code: 'PH',
locations: [{ city: 'Fulton', state: 'MD' }] }]
PositionOpening.import position_openings
- sleep(0.25)
position_openings = [{ source: 'usajobs', external_id: 1002, type: 'position_opening', position_title: 'Physician Assistant Newest',
position_schedule_type_code: 2, position_offering_type_code: 15318, tags: %w(federal),
organization_id: 'VATA', organization_name: 'Veterans Affairs, Veterans Health Administration',
@@ -396,12 +439,10 @@
expect(Geoname).to receive(:geocode).with(location: 'Washington', state: 'DC').and_return({ lat: 23.45, lon: -12.34 })
expect(Geoname).to receive(:geocode).with(location: 'Maui Island', state: 'HI').and_return({ lat: 45.67, lon: -13.31 })
PositionOpening.import([position_opening])
- position_openings = Tire.search 'test:jobs' do
- query { all }
- end
- expect(position_openings.results.first[:locations][0][:geo].to_hash).to eq({ lat: 12.34, lon: -23.45 })
- expect(position_openings.results.first[:locations][1][:geo].to_hash).to eq({ lat: 23.45, lon: -12.34 })
- expect(position_openings.results.first[:locations][2][:geo].to_hash).to eq({ lat: 45.67, lon: -13.31 })
+ position_openings = PositionOpening.search('*', index: 'test:jobs')
+ expect(position_openings.results.first.locations[0][:geo].to_json).to eq({ lat: 12.34, lon: -23.45 }.to_json)
+ expect(position_openings.results.first.locations[1][:geo].to_json).to eq({ lat: 23.45, lon: -12.34 }.to_json)
+ expect(position_openings.results.first.locations[2][:geo].to_json).to eq({ lat: 45.67, lon: -13.31 }.to_json)
end
context 'when no location information is present for job' do
@@ -414,9 +455,7 @@
it 'should leave locations empty' do
PositionOpening.import([position_opening_no_locations])
- position_openings = Tire.search 'test:jobs' do
- query { all }
- end
+ position_openings = PositionOpening.search('*', index: 'test:jobs')
expect(position_openings.results.first[:locations]).to be_nil
end