forked from olivere/elastic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
search_queries_more_like_this.go
412 lines (362 loc) · 11.8 KB
/
search_queries_more_like_this.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
// Copyright 2012-present Oliver Eilhard. All rights reserved.
// Use of this source code is governed by a MIT-license.
// See http://olivere.mit-license.org/license.txt for details.
package elastic
import "errors"
// MoreLikeThis query (MLT Query) finds documents that are "like" a given
// set of documents. In order to do so, MLT selects a set of representative
// terms of these input documents, forms a query using these terms, executes
// the query and returns the results. The user controls the input documents,
// how the terms should be selected and how the query is formed.
//
// For more details, see
// https://www.elastic.co/guide/en/elasticsearch/reference/6.2/query-dsl-mlt-query.html
type MoreLikeThisQuery struct {
fields []string
docs []*MoreLikeThisQueryItem
unlikeDocs []*MoreLikeThisQueryItem
include *bool
minimumShouldMatch string
minTermFreq *int
maxQueryTerms *int
stopWords []string
minDocFreq *int
maxDocFreq *int
minWordLength *int
maxWordLength *int
boostTerms *float64
boost *float64
analyzer string
failOnUnsupportedField *bool
queryName string
}
// NewMoreLikeThisQuery creates and initializes a new MoreLikeThisQuery.
func NewMoreLikeThisQuery() *MoreLikeThisQuery {
return &MoreLikeThisQuery{
fields: make([]string, 0),
stopWords: make([]string, 0),
docs: make([]*MoreLikeThisQueryItem, 0),
unlikeDocs: make([]*MoreLikeThisQueryItem, 0),
}
}
// Field adds one or more field names to the query.
func (q *MoreLikeThisQuery) Field(fields ...string) *MoreLikeThisQuery {
q.fields = append(q.fields, fields...)
return q
}
// StopWord sets the stopwords. Any word in this set is considered
// "uninteresting" and ignored. Even if your Analyzer allows stopwords,
// you might want to tell the MoreLikeThis code to ignore them, as for
// the purposes of document similarity it seems reasonable to assume that
// "a stop word is never interesting".
func (q *MoreLikeThisQuery) StopWord(stopWords ...string) *MoreLikeThisQuery {
q.stopWords = append(q.stopWords, stopWords...)
return q
}
// LikeText sets the text to use in order to find documents that are "like" this.
func (q *MoreLikeThisQuery) LikeText(likeTexts ...string) *MoreLikeThisQuery {
for _, s := range likeTexts {
item := NewMoreLikeThisQueryItem().LikeText(s)
q.docs = append(q.docs, item)
}
return q
}
// LikeItems sets the documents to use in order to find documents that are "like" this.
func (q *MoreLikeThisQuery) LikeItems(docs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery {
q.docs = append(q.docs, docs...)
return q
}
// IgnoreLikeText sets the text from which the terms should not be selected from.
func (q *MoreLikeThisQuery) IgnoreLikeText(ignoreLikeText ...string) *MoreLikeThisQuery {
for _, s := range ignoreLikeText {
item := NewMoreLikeThisQueryItem().LikeText(s)
q.unlikeDocs = append(q.unlikeDocs, item)
}
return q
}
// IgnoreLikeItems sets the documents from which the terms should not be selected from.
func (q *MoreLikeThisQuery) IgnoreLikeItems(ignoreDocs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery {
q.unlikeDocs = append(q.unlikeDocs, ignoreDocs...)
return q
}
// Ids sets the document ids to use in order to find documents that are "like" this.
func (q *MoreLikeThisQuery) Ids(ids ...string) *MoreLikeThisQuery {
for _, id := range ids {
item := NewMoreLikeThisQueryItem().Id(id)
q.docs = append(q.docs, item)
}
return q
}
// Include specifies whether the input documents should also be included
// in the results returned. Defaults to false.
func (q *MoreLikeThisQuery) Include(include bool) *MoreLikeThisQuery {
q.include = &include
return q
}
// MinimumShouldMatch sets the number of terms that must match the generated
// query expressed in the common syntax for minimum should match.
// The default value is "30%".
//
// This used to be "PercentTermsToMatch" in Elasticsearch versions before 2.0.
func (q *MoreLikeThisQuery) MinimumShouldMatch(minimumShouldMatch string) *MoreLikeThisQuery {
q.minimumShouldMatch = minimumShouldMatch
return q
}
// MinTermFreq is the frequency below which terms will be ignored in the
// source doc. The default frequency is 2.
func (q *MoreLikeThisQuery) MinTermFreq(minTermFreq int) *MoreLikeThisQuery {
q.minTermFreq = &minTermFreq
return q
}
// MaxQueryTerms sets the maximum number of query terms that will be included
// in any generated query. It defaults to 25.
func (q *MoreLikeThisQuery) MaxQueryTerms(maxQueryTerms int) *MoreLikeThisQuery {
q.maxQueryTerms = &maxQueryTerms
return q
}
// MinDocFreq sets the frequency at which words will be ignored which do
// not occur in at least this many docs. The default is 5.
func (q *MoreLikeThisQuery) MinDocFreq(minDocFreq int) *MoreLikeThisQuery {
q.minDocFreq = &minDocFreq
return q
}
// MaxDocFreq sets the maximum frequency for which words may still appear.
// Words that appear in more than this many docs will be ignored.
// It defaults to unbounded.
func (q *MoreLikeThisQuery) MaxDocFreq(maxDocFreq int) *MoreLikeThisQuery {
q.maxDocFreq = &maxDocFreq
return q
}
// MinWordLength sets the minimum word length below which words will be
// ignored. It defaults to 0.
func (q *MoreLikeThisQuery) MinWordLength(minWordLength int) *MoreLikeThisQuery {
q.minWordLength = &minWordLength
return q
}
// MaxWordLength sets the maximum word length above which words will be ignored.
// Defaults to unbounded (0).
func (q *MoreLikeThisQuery) MaxWordLength(maxWordLength int) *MoreLikeThisQuery {
q.maxWordLength = &maxWordLength
return q
}
// BoostTerms sets the boost factor to use when boosting terms.
// It defaults to 1.
func (q *MoreLikeThisQuery) BoostTerms(boostTerms float64) *MoreLikeThisQuery {
q.boostTerms = &boostTerms
return q
}
// Analyzer specifies the analyzer that will be use to analyze the text.
// Defaults to the analyzer associated with the field.
func (q *MoreLikeThisQuery) Analyzer(analyzer string) *MoreLikeThisQuery {
q.analyzer = analyzer
return q
}
// Boost sets the boost for this query.
func (q *MoreLikeThisQuery) Boost(boost float64) *MoreLikeThisQuery {
q.boost = &boost
return q
}
// FailOnUnsupportedField indicates whether to fail or return no result
// when this query is run against a field which is not supported such as
// a binary/numeric field.
func (q *MoreLikeThisQuery) FailOnUnsupportedField(fail bool) *MoreLikeThisQuery {
q.failOnUnsupportedField = &fail
return q
}
// QueryName sets the query name for the filter that can be used when
// searching for matched_filters per hit.
func (q *MoreLikeThisQuery) QueryName(queryName string) *MoreLikeThisQuery {
q.queryName = queryName
return q
}
// Source creates the source for the MLT query.
// It may return an error if the caller forgot to specify any documents to
// be "liked" in the MoreLikeThisQuery.
func (q *MoreLikeThisQuery) Source() (interface{}, error) {
// {
// "match_all" : { ... }
// }
if len(q.docs) == 0 {
return nil, errors.New(`more_like_this requires some documents to be "liked"`)
}
source := make(map[string]interface{})
params := make(map[string]interface{})
source["more_like_this"] = params
if len(q.fields) > 0 {
params["fields"] = q.fields
}
var likes []interface{}
for _, doc := range q.docs {
src, err := doc.Source()
if err != nil {
return nil, err
}
likes = append(likes, src)
}
params["like"] = likes
if len(q.unlikeDocs) > 0 {
var dontLikes []interface{}
for _, doc := range q.unlikeDocs {
src, err := doc.Source()
if err != nil {
return nil, err
}
dontLikes = append(dontLikes, src)
}
params["unlike"] = dontLikes
}
if q.minimumShouldMatch != "" {
params["minimum_should_match"] = q.minimumShouldMatch
}
if q.minTermFreq != nil {
params["min_term_freq"] = *q.minTermFreq
}
if q.maxQueryTerms != nil {
params["max_query_terms"] = *q.maxQueryTerms
}
if len(q.stopWords) > 0 {
params["stop_words"] = q.stopWords
}
if q.minDocFreq != nil {
params["min_doc_freq"] = *q.minDocFreq
}
if q.maxDocFreq != nil {
params["max_doc_freq"] = *q.maxDocFreq
}
if q.minWordLength != nil {
params["min_word_length"] = *q.minWordLength
}
if q.maxWordLength != nil {
params["max_word_length"] = *q.maxWordLength
}
if q.boostTerms != nil {
params["boost_terms"] = *q.boostTerms
}
if q.boost != nil {
params["boost"] = *q.boost
}
if q.analyzer != "" {
params["analyzer"] = q.analyzer
}
if q.failOnUnsupportedField != nil {
params["fail_on_unsupported_field"] = *q.failOnUnsupportedField
}
if q.queryName != "" {
params["_name"] = q.queryName
}
if q.include != nil {
params["include"] = *q.include
}
return source, nil
}
// -- MoreLikeThisQueryItem --
// MoreLikeThisQueryItem represents a single item of a MoreLikeThisQuery
// to be "liked" or "unliked".
type MoreLikeThisQueryItem struct {
likeText string
index string
typ string
id string
doc interface{}
fields []string
routing string
fsc *FetchSourceContext
version int64
versionType string
}
// NewMoreLikeThisQueryItem creates and initializes a MoreLikeThisQueryItem.
func NewMoreLikeThisQueryItem() *MoreLikeThisQueryItem {
return &MoreLikeThisQueryItem{
version: -1,
}
}
// LikeText represents a text to be "liked".
func (item *MoreLikeThisQueryItem) LikeText(likeText string) *MoreLikeThisQueryItem {
item.likeText = likeText
return item
}
// Index represents the index of the item.
func (item *MoreLikeThisQueryItem) Index(index string) *MoreLikeThisQueryItem {
item.index = index
return item
}
// Type represents the document type of the item.
func (item *MoreLikeThisQueryItem) Type(typ string) *MoreLikeThisQueryItem {
item.typ = typ
return item
}
// Id represents the document id of the item.
func (item *MoreLikeThisQueryItem) Id(id string) *MoreLikeThisQueryItem {
item.id = id
return item
}
// Doc represents a raw document template for the item.
func (item *MoreLikeThisQueryItem) Doc(doc interface{}) *MoreLikeThisQueryItem {
item.doc = doc
return item
}
// Fields represents the list of fields of the item.
func (item *MoreLikeThisQueryItem) Fields(fields ...string) *MoreLikeThisQueryItem {
item.fields = append(item.fields, fields...)
return item
}
// Routing sets the routing associated with the item.
func (item *MoreLikeThisQueryItem) Routing(routing string) *MoreLikeThisQueryItem {
item.routing = routing
return item
}
// FetchSourceContext represents the fetch source of the item which controls
// if and how _source should be returned.
func (item *MoreLikeThisQueryItem) FetchSourceContext(fsc *FetchSourceContext) *MoreLikeThisQueryItem {
item.fsc = fsc
return item
}
// Version specifies the version of the item.
func (item *MoreLikeThisQueryItem) Version(version int64) *MoreLikeThisQueryItem {
item.version = version
return item
}
// VersionType represents the version type of the item.
func (item *MoreLikeThisQueryItem) VersionType(versionType string) *MoreLikeThisQueryItem {
item.versionType = versionType
return item
}
// Source returns the JSON-serializable fragment of the entity.
func (item *MoreLikeThisQueryItem) Source() (interface{}, error) {
if item.likeText != "" {
return item.likeText, nil
}
source := make(map[string]interface{})
if item.index != "" {
source["_index"] = item.index
}
if item.typ != "" {
source["_type"] = item.typ
}
if item.id != "" {
source["_id"] = item.id
}
if item.doc != nil {
source["doc"] = item.doc
}
if len(item.fields) > 0 {
source["fields"] = item.fields
}
if item.routing != "" {
source["_routing"] = item.routing
}
if item.fsc != nil {
src, err := item.fsc.Source()
if err != nil {
return nil, err
}
source["_source"] = src
}
if item.version >= 0 {
source["_version"] = item.version
}
if item.versionType != "" {
source["_version_type"] = item.versionType
}
return source, nil
}