public
Description: Capistrano recipes, plugins and templates.
Homepage: http://capitate.rubyforge.org
Clone URL: git://github.com/gabriel/capitate.git
capitate / lib / templates / sphinx / sphinx.conf.erb
100644 158 lines (126 sloc) 4.729 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# sphinx config
source pages
{
  type        = mysql
  # whether to strip HTML
  # values can be 0 (don't strip) or 1 (do strip)
  # WARNING, only works with mysql source for now
  # WARNING, should work ok for PERFECTLY formed XHTML for now
  # WARNING, POSSIBLE TO BUG on malformed everday HTML
  # optional, default is 0
  strip_html      = 1
 
  # what HTML attributes to index if stripping HTML
  # format is as follows:
  #
  index_html_attrs  = img=alt,title; a=title;
  
  sql_host      = <%= sphinx_db_host %>
  sql_user      = <%= sphinx_db_user %>
  sql_pass      = <%= sphinx_db_pass %>
  sql_db        = <%= sphinx_db_name %>
  sql_port      = <%= sphinx_db_port %> # optional, default is 3306
 
  sql_query_pre = SET NAMES UTF8
  sql_query_pre = SET SESSION query_cache_type=OFF
  sql_query_pre = INSERT INTO indexer_status (started_at, status, index_name, hostname) VALUES (NOW(), 'indexing', 'pages', USER()) \
    ON DUPLICATE KEY UPDATE started_at = NOW(), status = 'indexing'
  
  sql_query = SELECT id, user_id, language, UNIX_TIMESTAMP(created_at) AS created_at, UNIX_TIMESTAMP(updated_at) AS updated_at, body, title FROM pages WHERE id>=$start AND id<=$end
  sql_query_range = SELECT MIN(id),MAX(id) FROM pages where type='Article'
  sql_range_step = 1000
  
  sql_query_post = UPDATE indexer_status SET updated_at = NOW(), status = 'updated' WHERE index_name = 'pages' and hostname = USER()
  
  sql_attr_uint = user_id
  sql_attr_timestamp = created_at
  sql_attr_timestamp = updated_at
}
 
source pages_delta : pages
{
  # Clear and reset sql_query_pre
  sql_query_pre =
  sql_query_pre = SET NAMES UTF8
  sql_query_pre = SET SESSION query_cache_type=OFF
  sql_query_pre = INSERT INTO indexer_status (id, started_at, status, index_name, hostname) VALUES (NOW(), 'indexing', 'pages_delta', USER()) \
    ON DUPLICATE KEY UPDATE started_at = NOW(), status = 'indexing'
  
  sql_query = SELECT id, user_id, language, UNIX_TIMESTAMP(created_at) AS created_at, UNIX_TIMESTAMP(updated_at) AS updated_at, body, title \
   FROM pages \
   WHERE updated_at >= (SELECT updated_at FROM indexer_status WHERE index_name = 'pages_delta' and hostname = USER())
   
  sql_query_post =
  sql_query_post = UPDATE indexer_status SET updated_at = NOW(), status = 'updated' WHERE index_name = 'pages_delta' and hostname = USER()
  sql_query_range =
  sql_range_step =
}
 
index pages
{
  source      = pages
  path      = <%= sphinx_index_root %>/pages
  docinfo      = extern
  morphology    = stem_en
  stopwords      = <%= sphinx_conf_path %>/stopwords.txt
  min_word_len    = 1
  charset_type    = utf-8
  min_prefix_len    = 0
  min_infix_len    = 0
}
 
index pages_delta : pages
{
  source = pages_delta
  path = <%= sphinx_index_root %>/pages_delta
  
}
 
#############################################################################
## indexer settings
#############################################################################
 
indexer
{
  # memory limit
  #
  # may be specified in bytes (no postfix), kilobytes (mem_limit=1000K)
  # or megabytes (mem_limit=10M)
  #
  # will grow if set unacceptably low
  # will warn if set too low and potentially hurting the performance
  #
  # optional, default is 32M
  mem_limit      = 64M
}
 
#############################################################################
## searchd settings
#############################################################################
 
searchd
{
  # IP address on which search daemon will bind and accept
  # incoming network requests
  #
  # optional, default is to listen on all addresses,
  # ie. address = 0.0.0.0
  #
  address        = <%= sphinx_host %>
  # address        = 192.168.0.1
 
 
  # port on which search daemon will listen
  port        = <%= sphinx_port %>
 
 
  # log file
  # searchd run info is logged here
  log          = <%= sphinx_log_root %>/searchd.log
 
 
  # query log file
  # all the search queries are logged here
  query_log      = <%= sphinx_log_root %>/query.log
 
 
  # client read timeout, seconds
  read_timeout    = 5
 
 
  # maximum amount of children to fork
  # useful to control server load
  max_children    = 30
 
 
  # a file which will contain searchd process ID
  # used for different external automation scripts
  # MUST be present
  pid_file      = <%= sphinx_pid_path %>
 
 
  # maximum amount of matches this daemon would ever retrieve
  # from each index and serve to client
  #
  # this parameter affects per-client memory and CPU usage
  # (16+ bytes per match) in match sorting phase; so blindly raising
  # it to 1 million is definitely NOT recommended
  #
  # starting from 0.9.7, it can be decreased on the fly through
  # the corresponding API call; increasing is prohibited to protect
  # against malicious and/or malformed requests
  #
  # default is 1000 (just like with Google)
  max_matches      = 1000
}
 
# --eof--