gabriel / capitate

Capistrano recipes, plugins and templates.

This URL has Read+Write access

capitate / lib / templates / sphinx / sphinx.conf.erb
f2df06ba » gabriel 2008-02-24 fixes for sphinx 1 # sphinx config
2 source pages
70c46b55 » gabriel 2008-02-06 moving stuff around 3 {
4 type = mysql
5 # whether to strip HTML
6 # values can be 0 (don't strip) or 1 (do strip)
7 # WARNING, only works with mysql source for now
8 # WARNING, should work ok for PERFECTLY formed XHTML for now
9 # WARNING, POSSIBLE TO BUG on malformed everday HTML
10 # optional, default is 0
f2df06ba » gabriel 2008-02-24 fixes for sphinx 11 strip_html = 1
70c46b55 » gabriel 2008-02-06 moving stuff around 12
13 # what HTML attributes to index if stripping HTML
14 # format is as follows:
15 #
f2df06ba » gabriel 2008-02-24 fixes for sphinx 16 index_html_attrs = img=alt,title; a=title;
17
18 sql_host = <%= sphinx_db_host %>
19 sql_user = <%= sphinx_db_user %>
20 sql_pass = <%= sphinx_db_pass %>
21 sql_db = <%= sphinx_db_name %>
22 sql_port = <%= sphinx_db_port %> # optional, default is 3306
1d49908a » gabriel 2008-04-03 updating example conf file 23
24 sql_query_pre = SET NAMES UTF8
25 sql_query_pre = SET SESSION query_cache_type=OFF
d7572bd5 » gabriel 2008-04-03 hostname is a bad idea sinc... 26 sql_query_pre = INSERT INTO indexer_status (started_at, status, index_name, hostname) VALUES (NOW(), 'indexing', 'pages', USER()) \
1d49908a » gabriel 2008-04-03 updating example conf file 27 ON DUPLICATE KEY UPDATE started_at = NOW(), status = 'indexing'
28
29 sql_query = SELECT id, user_id, language, UNIX_TIMESTAMP(created_at) AS created_at, UNIX_TIMESTAMP(updated_at) AS updated_at, body, title FROM pages WHERE id>=$start AND id<=$end
30 sql_query_range = SELECT MIN(id),MAX(id) FROM pages where type='Article'
f2df06ba » gabriel 2008-02-24 fixes for sphinx 31 sql_range_step = 1000
1d49908a » gabriel 2008-04-03 updating example conf file 32
d7572bd5 » gabriel 2008-04-03 hostname is a bad idea sinc... 33 sql_query_post = UPDATE indexer_status SET updated_at = NOW(), status = 'updated' WHERE index_name = 'pages' and hostname = USER()
1d49908a » gabriel 2008-04-03 updating example conf file 34
35 sql_attr_uint = user_id
36 sql_attr_timestamp = created_at
37 sql_attr_timestamp = updated_at
70c46b55 » gabriel 2008-02-06 moving stuff around 38 }
39
1d49908a » gabriel 2008-04-03 updating example conf file 40 source pages_delta : pages
41 {
42 # Clear and reset sql_query_pre
43 sql_query_pre =
44 sql_query_pre = SET NAMES UTF8
45 sql_query_pre = SET SESSION query_cache_type=OFF
d7572bd5 » gabriel 2008-04-03 hostname is a bad idea sinc... 46 sql_query_pre = INSERT INTO indexer_status (id, started_at, status, index_name, hostname) VALUES (NOW(), 'indexing', 'pages_delta', USER()) \
1d49908a » gabriel 2008-04-03 updating example conf file 47 ON DUPLICATE KEY UPDATE started_at = NOW(), status = 'indexing'
48
49 sql_query = SELECT id, user_id, language, UNIX_TIMESTAMP(created_at) AS created_at, UNIX_TIMESTAMP(updated_at) AS updated_at, body, title \
50 FROM pages \
d7572bd5 » gabriel 2008-04-03 hostname is a bad idea sinc... 51 WHERE updated_at >= (SELECT updated_at FROM indexer_status WHERE index_name = 'pages_delta' and hostname = USER())
1d49908a » gabriel 2008-04-03 updating example conf file 52
53 sql_query_post =
d7572bd5 » gabriel 2008-04-03 hostname is a bad idea sinc... 54 sql_query_post = UPDATE indexer_status SET updated_at = NOW(), status = 'updated' WHERE index_name = 'pages_delta' and hostname = USER()
1d49908a » gabriel 2008-04-03 updating example conf file 55 sql_query_range =
56 sql_range_step =
57 }
70c46b55 » gabriel 2008-02-06 moving stuff around 58
f2df06ba » gabriel 2008-02-24 fixes for sphinx 59 index pages
70c46b55 » gabriel 2008-02-06 moving stuff around 60 {
f2df06ba » gabriel 2008-02-24 fixes for sphinx 61 source = pages
62 path = <%= sphinx_index_root %>/pages
70c46b55 » gabriel 2008-02-06 moving stuff around 63 docinfo = extern
f2df06ba » gabriel 2008-02-24 fixes for sphinx 64 morphology = stem_en
65 stopwords = <%= sphinx_conf_path %>/stopwords.txt
70c46b55 » gabriel 2008-02-06 moving stuff around 66 min_word_len = 1
67 charset_type = utf-8
68 min_prefix_len = 0
69 min_infix_len = 0
70 }
71
1d49908a » gabriel 2008-04-03 updating example conf file 72 index pages_delta : pages
73 {
74 source = pages_delta
75 path = <%= sphinx_index_root %>/pages_delta
76
77 }
78
70c46b55 » gabriel 2008-02-06 moving stuff around 79 #############################################################################
80 ## indexer settings
81 #############################################################################
82
83 indexer
84 {
85 # memory limit
86 #
87 # may be specified in bytes (no postfix), kilobytes (mem_limit=1000K)
88 # or megabytes (mem_limit=10M)
89 #
90 # will grow if set unacceptably low
91 # will warn if set too low and potentially hurting the performance
92 #
93 # optional, default is 32M
f2df06ba » gabriel 2008-02-24 fixes for sphinx 94 mem_limit = 64M
70c46b55 » gabriel 2008-02-06 moving stuff around 95 }
96
97 #############################################################################
98 ## searchd settings
99 #############################################################################
100
101 searchd
102 {
103 # IP address on which search daemon will bind and accept
104 # incoming network requests
105 #
106 # optional, default is to listen on all addresses,
107 # ie. address = 0.0.0.0
108 #
f2df06ba » gabriel 2008-02-24 fixes for sphinx 109 address = <%= sphinx_host %>
70c46b55 » gabriel 2008-02-06 moving stuff around 110 # address = 192.168.0.1
111
112
113 # port on which search daemon will listen
f2df06ba » gabriel 2008-02-24 fixes for sphinx 114 port = <%= sphinx_port %>
70c46b55 » gabriel 2008-02-06 moving stuff around 115
116
117 # log file
118 # searchd run info is logged here
f2df06ba » gabriel 2008-02-24 fixes for sphinx 119 log = <%= sphinx_log_root %>/searchd.log
70c46b55 » gabriel 2008-02-06 moving stuff around 120
121
122 # query log file
123 # all the search queries are logged here
f2df06ba » gabriel 2008-02-24 fixes for sphinx 124 query_log = <%= sphinx_log_root %>/query.log
70c46b55 » gabriel 2008-02-06 moving stuff around 125
126
127 # client read timeout, seconds
128 read_timeout = 5
129
130
131 # maximum amount of children to fork
132 # useful to control server load
133 max_children = 30
134
135
136 # a file which will contain searchd process ID
137 # used for different external automation scripts
138 # MUST be present
f2df06ba » gabriel 2008-02-24 fixes for sphinx 139 pid_file = <%= sphinx_pid_path %>
70c46b55 » gabriel 2008-02-06 moving stuff around 140
141
142 # maximum amount of matches this daemon would ever retrieve
143 # from each index and serve to client
144 #
145 # this parameter affects per-client memory and CPU usage
146 # (16+ bytes per match) in match sorting phase; so blindly raising
147 # it to 1 million is definitely NOT recommended
148 #
149 # starting from 0.9.7, it can be decreased on the fly through
150 # the corresponding API call; increasing is prohibited to protect
151 # against malicious and/or malformed requests
152 #
153 # default is 1000 (just like with Google)
154 max_matches = 1000
155 }
156
157 # --eof--