/
tsearch.rb
210 lines (170 loc) · 6.26 KB
/
tsearch.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# frozen_string_literal: true
require "active_support/core_ext/module/delegation"
require "active_support/deprecation"
module PgSearch
module Features
class TSearch < Feature
def self.valid_options
super + %i[dictionary prefix negation any_word normalization tsvector_column highlight]
end
def conditions
Arel::Nodes::Grouping.new(
Arel::Nodes::InfixOperation.new("@@", arel_wrap(tsdocument), arel_wrap(tsquery))
)
end
def rank
arel_wrap(tsearch_rank)
end
def highlight
arel_wrap(ts_headline)
end
private
def ts_headline
Arel::Nodes::NamedFunction.new("ts_headline", [
dictionary,
arel_wrap(document),
arel_wrap(tsquery),
Arel::Nodes.build_quoted(ts_headline_options)
]).to_sql
end
def ts_headline_options
return "" unless options[:highlight].is_a?(Hash)
headline_options
.merge(deprecated_headline_options)
.filter_map { |key, value| "#{key} = #{value}" unless value.nil? }
.join(", ")
end
def headline_options
indifferent_options = options.with_indifferent_access
%w[
StartSel StopSel MaxFragments MaxWords MinWords ShortWord FragmentDelimiter HighlightAll
].reduce({}) do |hash, key|
hash.tap do
value = indifferent_options[:highlight][key]
hash[key] = ts_headline_option_value(value)
end
end
end
def deprecated_headline_options
indifferent_options = options.with_indifferent_access
%w[
start_sel stop_sel max_fragments max_words min_words short_word fragment_delimiter highlight_all
].reduce({}) do |hash, deprecated_key|
hash.tap do
value = indifferent_options[:highlight][deprecated_key]
unless value.nil?
key = deprecated_key.camelize
warn(
"pg_search 3.0 will no longer accept :#{deprecated_key} as an argument to :ts_headline, " \
"use :#{key} instead.",
category: :deprecated,
uplevel: 1
)
hash[key] = ts_headline_option_value(value)
end
end
end
end
def ts_headline_option_value(value)
case value
when String
%("#{value.gsub('"', '""')}")
when true
"TRUE"
when false
"FALSE"
else
value
end
end
DISALLOWED_TSQUERY_CHARACTERS = /['?\\:‘’ʻʼ]/
def tsquery_for_term(unsanitized_term)
if options[:negation] && unsanitized_term.start_with?("!")
unsanitized_term[0] = ""
negated = true
end
sanitized_term = unsanitized_term.gsub(DISALLOWED_TSQUERY_CHARACTERS, " ")
term_sql = Arel.sql(normalize(connection.quote(sanitized_term)))
tsquery = tsquery_expression(term_sql, negated: negated, prefix: options[:prefix])
Arel::Nodes::NamedFunction.new("to_tsquery", [dictionary, tsquery]).to_sql
end
# After this, the SQL expression evaluates to a string containing the term surrounded by single-quotes.
# If :prefix is true, then the term will have :* appended to the end.
# If :negated is true, then the term will have ! prepended to the front.
def tsquery_expression(term_sql, negated:, prefix:)
terms = [
(Arel::Nodes.build_quoted("!") if negated),
Arel::Nodes.build_quoted("' "),
term_sql,
Arel::Nodes.build_quoted(" '"),
(Arel::Nodes.build_quoted(":*") if prefix)
].compact
terms.inject do |memo, term|
Arel::Nodes::InfixOperation.new("||", memo, Arel::Nodes.build_quoted(term))
end
end
def tsquery
return "''" if query.blank?
query_terms = query.split.compact
tsquery_terms = query_terms.map { |term| tsquery_for_term(term) }
tsquery_terms.join(options[:any_word] ? " || " : " && ")
end
def tsdocument
tsdocument_terms = (columns_to_use || []).map do |search_column|
column_to_tsvector(search_column)
end
if options[:tsvector_column]
tsvector_columns = Array.wrap(options[:tsvector_column])
tsdocument_terms << tsvector_columns.map do |tsvector_column|
column_name = connection.quote_column_name(tsvector_column)
"#{quoted_table_name}.#{column_name}"
end
end
tsdocument_terms.join(" || ")
end
# From http://www.postgresql.org/docs/8.3/static/textsearch-controls.html
# 0 (the default) ignores the document length
# 1 divides the rank by 1 + the logarithm of the document length
# 2 divides the rank by the document length
# 4 divides the rank by the mean harmonic distance between extents (this is implemented only by ts_rank_cd)
# 8 divides the rank by the number of unique words in document
# 16 divides the rank by 1 + the logarithm of the number of unique words in document
# 32 divides the rank by itself + 1
# The integer option controls several behaviors, so it is a bit mask: you can specify one or more behaviors
def normalization
options[:normalization] || 0
end
def tsearch_rank
Arel::Nodes::NamedFunction.new("ts_rank", [
arel_wrap(tsdocument),
arel_wrap(tsquery),
normalization
]).to_sql
end
def dictionary
Arel::Nodes.build_quoted(options[:dictionary] || :simple)
end
def arel_wrap(sql_string)
Arel::Nodes::Grouping.new(Arel.sql(sql_string))
end
def columns_to_use
if options[:tsvector_column]
columns.select { |c| c.is_a?(PgSearch::Configuration::ForeignColumn) }
else
columns
end
end
def column_to_tsvector(search_column)
tsvector = Arel::Nodes::NamedFunction.new(
"to_tsvector",
[dictionary, Arel.sql(normalize(search_column.to_sql))]
).to_sql
if search_column.weight.nil?
tsvector
else
"setweight(#{tsvector}, #{connection.quote(search_column.weight)})"
end
end
end
end
end