|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
1 |
# sphinx config |
| |
2 |
source pages |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
3 |
{ |
| |
4 |
type = mysql |
| |
5 |
# whether to strip HTML |
| |
6 |
# values can be 0 (don't strip) or 1 (do strip) |
| |
7 |
# WARNING, only works with mysql source for now |
| |
8 |
# WARNING, should work ok for PERFECTLY formed XHTML for now |
| |
9 |
# WARNING, POSSIBLE TO BUG on malformed everday HTML |
| |
10 |
# optional, default is 0 |
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
11 |
strip_html = 1 |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
12 |
|
| |
13 |
# what HTML attributes to index if stripping HTML |
| |
14 |
# format is as follows: |
| |
15 |
# |
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
16 |
index_html_attrs = img=alt,title; a=title; |
| |
17 |
|
| |
18 |
sql_host = <%= sphinx_db_host %> |
| |
19 |
sql_user = <%= sphinx_db_user %> |
| |
20 |
sql_pass = <%= sphinx_db_pass %> |
| |
21 |
sql_db = <%= sphinx_db_name %> |
| |
22 |
sql_port = <%= sphinx_db_port %> # optional, default is 3306 |
|
1d49908a
»
|
gabriel |
2008-04-03 |
updating example conf file |
23 |
|
| |
24 |
sql_query_pre = SET NAMES UTF8 |
| |
25 |
sql_query_pre = SET SESSION query_cache_type=OFF |
|
d7572bd5
»
|
gabriel |
2008-04-03 |
hostname is a bad idea sinc... |
26 |
sql_query_pre = INSERT INTO indexer_status (started_at, status, index_name, hostname) VALUES (NOW(), 'indexing', 'pages', USER()) \ |
|
1d49908a
»
|
gabriel |
2008-04-03 |
updating example conf file |
27 |
ON DUPLICATE KEY UPDATE started_at = NOW(), status = 'indexing' |
| |
28 |
|
| |
29 |
sql_query = SELECT id, user_id, language, UNIX_TIMESTAMP(created_at) AS created_at, UNIX_TIMESTAMP(updated_at) AS updated_at, body, title FROM pages WHERE id>=$start AND id<=$end |
| |
30 |
sql_query_range = SELECT MIN(id),MAX(id) FROM pages where type='Article' |
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
31 |
sql_range_step = 1000 |
|
1d49908a
»
|
gabriel |
2008-04-03 |
updating example conf file |
32 |
|
|
d7572bd5
»
|
gabriel |
2008-04-03 |
hostname is a bad idea sinc... |
33 |
sql_query_post = UPDATE indexer_status SET updated_at = NOW(), status = 'updated' WHERE index_name = 'pages' and hostname = USER() |
|
1d49908a
»
|
gabriel |
2008-04-03 |
updating example conf file |
34 |
|
| |
35 |
sql_attr_uint = user_id |
| |
36 |
sql_attr_timestamp = created_at |
| |
37 |
sql_attr_timestamp = updated_at |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
38 |
} |
| |
39 |
|
|
1d49908a
»
|
gabriel |
2008-04-03 |
updating example conf file |
40 |
source pages_delta : pages |
| |
41 |
{ |
| |
42 |
# Clear and reset sql_query_pre |
| |
43 |
sql_query_pre = |
| |
44 |
sql_query_pre = SET NAMES UTF8 |
| |
45 |
sql_query_pre = SET SESSION query_cache_type=OFF |
|
d7572bd5
»
|
gabriel |
2008-04-03 |
hostname is a bad idea sinc... |
46 |
sql_query_pre = INSERT INTO indexer_status (id, started_at, status, index_name, hostname) VALUES (NOW(), 'indexing', 'pages_delta', USER()) \ |
|
1d49908a
»
|
gabriel |
2008-04-03 |
updating example conf file |
47 |
ON DUPLICATE KEY UPDATE started_at = NOW(), status = 'indexing' |
| |
48 |
|
| |
49 |
sql_query = SELECT id, user_id, language, UNIX_TIMESTAMP(created_at) AS created_at, UNIX_TIMESTAMP(updated_at) AS updated_at, body, title \ |
| |
50 |
FROM pages \ |
|
d7572bd5
»
|
gabriel |
2008-04-03 |
hostname is a bad idea sinc... |
51 |
WHERE updated_at >= (SELECT updated_at FROM indexer_status WHERE index_name = 'pages_delta' and hostname = USER()) |
|
1d49908a
»
|
gabriel |
2008-04-03 |
updating example conf file |
52 |
|
| |
53 |
sql_query_post = |
|
d7572bd5
»
|
gabriel |
2008-04-03 |
hostname is a bad idea sinc... |
54 |
sql_query_post = UPDATE indexer_status SET updated_at = NOW(), status = 'updated' WHERE index_name = 'pages_delta' and hostname = USER() |
|
1d49908a
»
|
gabriel |
2008-04-03 |
updating example conf file |
55 |
sql_query_range = |
| |
56 |
sql_range_step = |
| |
57 |
} |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
58 |
|
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
59 |
index pages |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
60 |
{ |
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
61 |
source = pages |
| |
62 |
path = <%= sphinx_index_root %>/pages |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
63 |
docinfo = extern |
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
64 |
morphology = stem_en |
| |
65 |
stopwords = <%= sphinx_conf_path %>/stopwords.txt |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
66 |
min_word_len = 1 |
| |
67 |
charset_type = utf-8 |
| |
68 |
min_prefix_len = 0 |
| |
69 |
min_infix_len = 0 |
| |
70 |
} |
| |
71 |
|
|
1d49908a
»
|
gabriel |
2008-04-03 |
updating example conf file |
72 |
index pages_delta : pages |
| |
73 |
{ |
| |
74 |
source = pages_delta |
| |
75 |
path = <%= sphinx_index_root %>/pages_delta |
| |
76 |
|
| |
77 |
} |
| |
78 |
|
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
79 |
############################################################################# |
| |
80 |
## indexer settings |
| |
81 |
############################################################################# |
| |
82 |
|
| |
83 |
indexer |
| |
84 |
{ |
| |
85 |
# memory limit |
| |
86 |
# |
| |
87 |
# may be specified in bytes (no postfix), kilobytes (mem_limit=1000K) |
| |
88 |
# or megabytes (mem_limit=10M) |
| |
89 |
# |
| |
90 |
# will grow if set unacceptably low |
| |
91 |
# will warn if set too low and potentially hurting the performance |
| |
92 |
# |
| |
93 |
# optional, default is 32M |
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
94 |
mem_limit = 64M |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
95 |
} |
| |
96 |
|
| |
97 |
############################################################################# |
| |
98 |
## searchd settings |
| |
99 |
############################################################################# |
| |
100 |
|
| |
101 |
searchd |
| |
102 |
{ |
| |
103 |
# IP address on which search daemon will bind and accept |
| |
104 |
# incoming network requests |
| |
105 |
# |
| |
106 |
# optional, default is to listen on all addresses, |
| |
107 |
# ie. address = 0.0.0.0 |
| |
108 |
# |
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
109 |
address = <%= sphinx_host %> |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
110 |
# address = 192.168.0.1 |
| |
111 |
|
| |
112 |
|
| |
113 |
# port on which search daemon will listen |
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
114 |
port = <%= sphinx_port %> |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
115 |
|
| |
116 |
|
| |
117 |
# log file |
| |
118 |
# searchd run info is logged here |
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
119 |
log = <%= sphinx_log_root %>/searchd.log |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
120 |
|
| |
121 |
|
| |
122 |
# query log file |
| |
123 |
# all the search queries are logged here |
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
124 |
query_log = <%= sphinx_log_root %>/query.log |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
125 |
|
| |
126 |
|
| |
127 |
# client read timeout, seconds |
| |
128 |
read_timeout = 5 |
| |
129 |
|
| |
130 |
|
| |
131 |
# maximum amount of children to fork |
| |
132 |
# useful to control server load |
| |
133 |
max_children = 30 |
| |
134 |
|
| |
135 |
|
| |
136 |
# a file which will contain searchd process ID |
| |
137 |
# used for different external automation scripts |
| |
138 |
# MUST be present |
|
f2df06ba
»
|
gabriel |
2008-02-24 |
fixes for sphinx |
139 |
pid_file = <%= sphinx_pid_path %> |
|
70c46b55
»
|
gabriel |
2008-02-06 |
moving stuff around |
140 |
|
| |
141 |
|
| |
142 |
# maximum amount of matches this daemon would ever retrieve |
| |
143 |
# from each index and serve to client |
| |
144 |
# |
| |
145 |
# this parameter affects per-client memory and CPU usage |
| |
146 |
# (16+ bytes per match) in match sorting phase; so blindly raising |
| |
147 |
# it to 1 million is definitely NOT recommended |
| |
148 |
# |
| |
149 |
# starting from 0.9.7, it can be decreased on the fly through |
| |
150 |
# the corresponding API call; increasing is prohibited to protect |
| |
151 |
# against malicious and/or malformed requests |
| |
152 |
# |
| |
153 |
# default is 1000 (just like with Google) |
| |
154 |
max_matches = 1000 |
| |
155 |
} |
| |
156 |
|
| |
157 |
# --eof-- |