mfisk / filemap

File-Based Map-Reduce

filemap / filemap.conf
100644 76 lines (62 sloc) 2.115 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
[global]
 
#
# How many distributed copies of source data should be stored
#
replication = 1
 
#
# Build a work queue by hostname rather than by [Nodename]
# (Use this if you have multiple "nodes" (e.g. disks) on a hostname
# and sshd is rejecting the rate of connections. Better to raise
# MaxStartups in sshd_config).
#
# queuebyhost = True
 
##
## Global defaults: (can be overridden per node)
##
 
#
# Directory used for synchronization files.
# Must be globally accessible from Nodes (but not necessarily this client machine)
# and coherent, but need not be large or fast. This path is relative to the Nodes,
# not necessarily this client machine. If the location is mounted in different
# locations on nodes, override the setting in node definitions.
#
syncdir = /tmp/locks
 
#
# FM command (specify alternate path)
# Note: Many versions of SSH will not execute your .login equivalent,
# so all commands must be explicitly referenced or in a PATH in your
# ~/.ssh/environment file (man ssh(1)).
# Default: fm = %(ROOTDIR)/sys/fm
 
#
# PYTHON command (specify alternate path)
# The fm script will be executed under the interpreter specified here.
# Default: python = python
#
# python = python2.4
 
#
# SSH command (specify alternate path or options)
# Note: for reduce operations to function, credential or agent forwarding must work
# Note: older versions of rsync don't support quoted arguments in the ssh command
# For improved performance use -S
# Default: ssh = ssh -o 'GSSAPIDelegateCredentials yes' -Ax
#
#ssh = ssh -o 'GSSAPIDelegateCredentials yes' -A -o 'ControlMaster auto' -S ~/.ssh/%l-%r@%h:%p
 
#
# RSYNC command (specify alternate path or options)
# Default: rsync = rsync
#
#rsync = /opt/local/bin/rsync
 
##
## List your nodes here.
## The [Nodename] should be unique, but is not used.
## If a 'hostname' is specified, then this is a remote host and
## the given hostname must be accessible from all nodes.
## Most of the global settings can be over-ridden with per-node values.
##
 
[Node1]
rootdir = /tmp/foo
 
[Node4]
rootdir = /tmp/bar
 
[Node2]
#hostname = localhost
rootdir = /var/tmp/baz