Skip to content

Commit

Permalink
[Completes #76012166] searchers should surface pics with tag "apollo1…
Browse files Browse the repository at this point in the history
…1" when searching on "apollo 11"
  • Loading branch information
loren committed Aug 11, 2014
1 parent 0ab10cd commit 9a29de8
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 8 deletions.
2 changes: 1 addition & 1 deletion app/models/flickr_photo.rb
Expand Up @@ -13,7 +13,7 @@ class FlickrPhoto
attribute :title, String, mapping: { type: 'string', analyzer: 'en_analyzer', copy_to: 'bigram' }
attribute :description, String, mapping: { type: 'string', analyzer: 'en_analyzer', copy_to: 'bigram' }
attribute :taken_at, Date
attribute :tags, String, mapping: ElasticSettings::KEYWORD
attribute :tags, String, mapping: ElasticSettings::TAG
attribute :url, String, mapping: ElasticSettings::KEYWORD
attribute :thumbnail_url, String, mapping: ElasticSettings::KEYWORD
attribute :popularity, Integer, default: 0, mapping: { type: 'integer', index: :not_analyzed }
Expand Down
28 changes: 23 additions & 5 deletions app/models/image_search.rb
Expand Up @@ -5,7 +5,7 @@ class ImageSearch
DEFAULT_PRE_TAG = '<strong>'
DEFAULT_POST_TAG = '</strong>'
NO_HITS = { 'hits' => { 'total' => 0, 'offset' => 0, 'hits' => [] } }
TEXT_FIELDS = %w(title description caption tags^2)
TEXT_FIELDS = %w(title description caption)

def initialize(query, options)
@query = (query || '').squish
Expand Down Expand Up @@ -140,15 +140,33 @@ def owner_terms(json, profiles)

def filtered_query_query(json)
json.query do
json.simple_query_string do
json.fields TEXT_FIELDS
json.bool do
json.set! :should do
json.child! { match_tags(json) }
json.child! { simple_query_string(json) }
end
end
end
end

def match_tags(json)
json.match do
json.tags do
json.query @query
json.analyzer "en_analyzer"
json.default_operator "AND"
json.analyzer "tag_analyzer"
end
end
end

def simple_query_string(json)
json.simple_query_string do
json.fields TEXT_FIELDS
json.query @query
json.analyzer "en_analyzer"
json.default_operator "AND"
end
end

def pre_tags
[DEFAULT_PRE_TAG]
end
Expand Down
2 changes: 1 addition & 1 deletion app/models/instagram_photo.rb
Expand Up @@ -9,7 +9,7 @@ class InstagramPhoto
end

attribute :username, String, mapping: ElasticSettings::KEYWORD
attribute :tags, String, mapping: ElasticSettings::KEYWORD
attribute :tags, String, mapping: ElasticSettings::TAG
attribute :url, String, mapping: ElasticSettings::KEYWORD
attribute :thumbnail_url, String, mapping: ElasticSettings::KEYWORD
attribute :caption, String, mapping: { type: 'string', analyzer: 'en_analyzer', copy_to: 'bigram' }
Expand Down
10 changes: 9 additions & 1 deletion lib/elastic_settings.rb
@@ -1,12 +1,14 @@
module ElasticSettings
KEYWORD = { type: 'string', analyzer: 'case_insensitive_keyword_analyzer' }
TAG = { type: 'string', analyzer: 'tag_analyzer' }
ENGLISH_STOPWORDS = %w(a an and are as at be but by for if in into is no not of on or s such t that the their then there these they this to was with)

COMMON = {
index: {
analysis: {
char_filter: {
ignore_chars: { type: "mapping", mappings: ["'=>", "’=>", "`=>"] }
ignore_chars: { type: "mapping", mappings: ["'=>", "’=>", "`=>"] },
strip_whitespace: { type: "mapping", mappings: ["\\u0020=>"] }
},
filter: {
bigram_filter: { type: 'shingle' },
Expand All @@ -26,6 +28,12 @@ module ElasticSettings
char_filter: %w(ignore_chars),
filter: %w(standard asciifolding lowercase bigram_filter)
},
tag_analyzer: {
type: "custom",
tokenizer: "standard",
char_filter: %w(strip_whitespace),
filter: %w(standard asciifolding lowercase)
},
case_insensitive_keyword_analyzer: {
tokenizer: 'keyword',
char_filter: %w(ignore_chars),
Expand Down
15 changes: 15 additions & 0 deletions spec/models/image_search_spec.rb
Expand Up @@ -17,6 +17,21 @@
end
end

context 'when smooshed user query matches tag in either Instagram or Flickr indexes' do
before do
FlickrPhoto.create(id: "photo1", owner: "owner1", profile_type: 'user', tags: %w(apollo11 space), title: "title1 earth", description: "desc 1", taken_at: Date.current, popularity: 100, url: "http://photo1", thumbnail_url: "http://photo_thumbnail1")
FlickrPhoto.refresh_index!
InstagramPhoto.create(id: "123456", username: 'user1', tags: %w(earth apollo11), caption: 'first photo of earth', taken_at: Date.current, popularity: 101, url: "http://photo2", thumbnail_url: "http://photo_thumbnail2")
InstagramPhoto.refresh_index!
end

it 'should return results from both indexes' do
image_search = ImageSearch.new("apollo 11", {})
image_search_results = image_search.search
expect(image_search_results.results.collect(&:type).uniq).to match_array(["InstagramPhoto", "FlickrPhoto"])
end
end

context 'when search term yields no results but a similar spelling does have results' do
before do
FlickrPhoto.create(id: "photo1", owner: "owner1", profile_type: 'user', tags: [], title: "title1 earth", description: "desc 1", taken_at: Date.current, popularity: 100, url: "http://photo1", thumbnail_url: "http://photo_thumbnail1")
Expand Down

0 comments on commit 9a29de8

Please sign in to comment.