/
lyndex.ini
47 lines (38 loc) · 1.42 KB
/
lyndex.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# Lyndex version 1.0b october 2005 written by Feherke
# create a word index from the Lynx traversal crawl files
# settings file
# base url and starting point for crawl from
# should be the highest level directory from the structure
starturl="http://example.com/"
# minimum word length
# words with less then this many characters will be excluded
minword=3
# maximum ASCII-art length
# words containing one character more then this many time will be excluded
maxaart=3
# maximum percent of pages, including a relevant word
# if more then this many pages contains it, the word is not relevant
# the word could be irrelevant only if all pages contain it in same count
maxrele=90
# decrement almost irrelevant word scoring
# for the words above relevance limit, but with different count per page
# the count could be
decirel="yes"
# size of page sample in characters
# will be displayed in the page list
sample=250
# directory to exclude from crawling
# more then one directories can be specified in an array :
# nocrawl=( "http://example.com/sample/" "http://example.com/temp/" )
nocrawl="http://example.com/private/"
# checksum calculation tool
# used to detect multiple instances of a page
# some possible values :
# cksum | md5sum | sha1sum | sum
# du -b | wc -c
checksum="md5sum"
# spelling verification tool
# if specified, will be used to check the word list
# some possible values :
# ispell -a | aspell -a
shellcheck="ispell -a"