diff --git a/docs/monit.md b/docs/monit.md new file mode 100644 index 000000000..fff639c91 --- /dev/null +++ b/docs/monit.md @@ -0,0 +1,66 @@ +Monitoring TorQ +=============== + +**Monit** is a small open source utility for monitoring and managing UNIX systems. Monit's ease of use makes it the perfect tool for tracking the status of TorQ processes. + +Installation +------------ +Monit is included in most Unix distributions but can also be downloaded from [here](https://mmonit.com/monit/#download). This monit addition to TorQ allows the monit config files to be easily generated, based on the contents of the process.csv file. + +The basic monit directory which has been added to TorQ can be seen below: +``` +${TORQHOME} +|---monit + |---bin + | |---monit.sh + |---templates + |---monitalert.cfg + |---monitrc + |---monittemplate.txt +``` + +It is important to mention that AquaQ will not offer support for **monitalert.cfg** and **monitrc**. Those two files have been added as an example on how **monit** can be configured to monitor your system and to offer an out-of-the-box configuration that you can use to test that **monit** works. If the monit installation contains an updated version of monitrc, this should be used instead. + +Features +-------- +Monit is only available for UNIX and it comes with a bash script that you can use to generate the configuration and start the processes. More details on how you use this script can be found below. + +We have also included a standard **monitrc** which will: ++ Set the check interval to 30 seconds ++ Set the location of the **monit.log** file ++ Set the location of **monit.state** fsile ++ Define the **mail alert** basic configuration ++ Define the **e-mail format** ++ Set the **interface port** (11000) **user** and **password** ++ Set the location of the ***.cfg** files + +The **monitalert.cfg** it is only an example on how you can configure your own alerts for monitoring your UNIX system. There are no TorQ specific examples in this file. + +The only file which will be updated with future TorQ releases is the **monittemplate.txt** which generates the **monitconfig.cfg**. An example is included below: + +``` +check process tickerplant1 + matching "15000 -proctype tickerplant -procname tickerplant1" + start program = "/bin/bash -c '/home/USER/torqprodsupp/torqdev/deploy/torq.sh start tickerplant1'" + with timeout 10 seconds + stop program = "/bin/bash -c '/home/USER/torqprodsupp/torqdev/deploy/torq.sh stop tickerplant1'" + every "* * * * *" + mode active +``` + +Usage Guide +----------- +If you want to use **monit** to monitor your UNIX system and TorQ processes you must first generate the configuration files and then start **monit**. We will assume that you start with a fresh copy of TorQ. +1. Install TorQ and the any optional customisations (e.g. the TorQ Finanace Starter Pack) +2. Navigate to **${TORQHOME}/monit/bin/** +3. Execute: + * bash monit.sh generate all - to generate all the config files + * bash monit.sh generate alert - to generate the alert configuration file + * bash monit.sh generate monitconfig - to generate the monitconfig.cfg + * bash monit.sh generate monitrc - to generate the monitrc file + +However, you can also use your own configuration files by either creating a new directory in monit called **config** and moving all the *.cfg files and the **monitrc** file in there or by modifying the last line in the monitrc to point to the folder where the *.cfg files can be found. + +4. Start monit by executing bash monit.sh start + +The start function also take a parameter **("string")** whch can specify the location of the **monitrc**. diff --git a/mkdocs.yml b/mkdocs.yml index c971330ac..ef8718088 100755 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -9,9 +9,10 @@ pages: - Connection Management: conn.md - Processes: Processes.md - Analytics Library: analyticslib.md + - Monitoring: monit.md - Visualisation: visualisation.md - TorQ Blog Posts: blog.md -copyright: 'Copyright © 2016 AquaQ Analytics Limited. Kx ® and kdb+ are registered trademarks of Kx Systems Inc.' +copyright: 'Copyright © 2018 AquaQ Analytics Limited. Kx ® and kdb+ are registered trademarks of Kx Systems Inc.' theme: 'material' repo_name: 'GitHub' diff --git a/monit/bin/monit.sh b/monit/bin/monit.sh new file mode 100755 index 000000000..66530fc8c --- /dev/null +++ b/monit/bin/monit.sh @@ -0,0 +1,162 @@ +#!/bin/bash +#FUNCTION DECLARATION ############################################################################### + + cd ../.. && export BASEDIR="${PWD}" && cd - > /dev/null; # set BASEDIR to the root of the torq directory + mkdir -p ../logs + +checkst(){ +#function to check if file exists +case $3 in + exist) + if [[ -e $1 ]]; then + echo -e "[ \033[01;32mOK\033[0m ] $2" + else + echo -e "[ \033[01;31mFAILED\033[0m ] $2" + fi + ;; + nexist) + if [[ -e $1 ]]; then + echo -e "[ \033[01;31mFAILED\033[0m ] $2" + else + echo -e "[ \033[01;32mOK\033[0m ] $2" + fi + ;; + *) + echo "Not yet implemented" + ;; +esac +} + +createmonconfig(){ + #function to read all processes from the processes.csv + #and build the array + #generating the monitconfig.cfg + procs=$1 + startstopsc=$2 + procs="${procs:-${BASEDIR}/appconfig/process.csv}" # sets procs to default only if unset already + startstopsc="${startstopsc:-${BASEDIR}/torq.sh}" + output="$configs/monitconfig.cfg" + + proclist=`tail -n +2 ${procs} | awk -F "\"*,\"*" '{print $3 " " $4}'|cut -d" " -f1,2` + echo "$proclist"|while read procs;do + array=($procs) + proctype=${array[0]} + procname=${array[1]} + if [[ ! "$procname" == "killtick" ]];then # exclude killtick from the list of monitored processes + #eval "echo $2" >> $output + eval "echo \"${monittemplate}\"" >> $output + echo "" >> $output + fi + done + checkst "$configs/monitconfig.cfg" "Output file created..." "exist" +} + +createmonalert(){ + #generating the monitalert file from template + if [ -f ${configs}/monitalert.cfg ];then + rm ${configs}/monitalert.cfg + checkst "${configs}/monitalert.cfg" "Deleting monitalert.cfg..." "nexist" + fi + + cp ${templates}/monitalert.cfg ${configs} + checkst "${configs}/monitalert.cfg" "Copying monitalert from ${templates}..." "exist" +} + +generate(){ + eval "cd $BASEDIR && . setenv.sh && cd - > /dev/null" # set environment variables + templates="${BASEDIR}/monit/templates" # set temmplates folder + configs="${BASEDIR}/monit/config" # set configs folder + monit_control="${BASEDIR}/monit/config/monitrc" # set output file for main monit conf + monittemplate="$(cat ${templates}/monittemplate.txt)" + mkdir -p $configs + + case $1 in + monitalert) + if [ ! -f ${configs}/monitalert.cfg ];then + createmonalert + fi + ;; + monitconfig) + if [ ! -f ${configs}/monitconfig.cfg ];then + createmonconfig "$2" "$3" + fi + ;; + monitrc) + if [ ! -f ${monit_control} ];then + if [ -z $2 ]; then + controltemplate="$(cat ${templates}/monitrc)" + else + controltemplate="$(cat $2)" + fi + eval "echo \"${controltemplate}\"" > ${monit_control} + chmod 700 ${monit_control} + checkst "$monit_control" "Creating monitrc" "exist" + fi + ;; + all) + #create monitalert + if [ ! -f ${configs}/monitalert.cfg ];then + createmonalert + fi + + #create monitconfig + if [ ! -f ${configs}/monitconfig.cfg ];then + createmonconfig "$2" "$3" + fi + + #create monitrc + if [ ! -f ${monit_control} ];then + if [ -z $4 ]; then + controltemplate="$(cat ${templates}/monitrc)" + else + controltemplate="$(cat $4)" + fi + eval "echo \"${controltemplate}\"" > ${monit_control} + chmod 700 ${monit_control} + checkst "$monit_control" "Creating monitrc" "exist" + fi + ;; + *) + echo "Not yet implemented" + ;; + esac + } + + start(){ + #this function just starts monit and specifies the location of the monitrc + if [ -z $1 ];then + echo "Argument not provided monit will default to the following monitrc file: ${BASEDIR}/monit/config/monitrc" + monit -c ${BASEDIR}/monit/config/monitrc + else + monit -c $1 + fi + } + + usage(){ + echo "" + echo "NOTE: if any of the arguments are missing the default locations will be used" + echo "" + echo "----------------------------------------------------------------------------" + printf "%-20s | %-30s | %-30s\n" "FILE" "DEFAULT TEMPLATE PATH" "DEFAULT CONFIG PATH" + echo "----------------------------------------------------------------------------" + printf "%-20s | %-30s | %-30s\n" "monitconfig.cfg" "deploy/monit/templates" "deploy/monit/config" + printf "%-20s | %-30s | %-30s\n" "monitalert.cfg" "deploy/monit/templates" "deploy/monit/config" + printf "%-20s | %-30s | %-30s\n" "monitrc" "deploy/monit/templates" "deploy/monit/config" + printf "%-20s | %-30s | %-30s\n" "monit.log" "NA" "deploy/monit/logs" + printf "%-20s | %-30s | %-30s\n" "monit.state" "NA" "deploy/monit/logs" + echo "----------------------------------------------------------------------------" + echo "" + echo "" + echo "----------------------------------------------------------------------------------------------------------------------------------------------" + printf "%-10s | %-15s | %-40s | %-75s\n" "FUNCTION" "OPTION" "COMMENTS" "ARGUMENTS" + echo "----------------------------------------------------------------------------------------------------------------------------------------------" + printf "%-10s | %-15s | %-40s | %-75s\n" "generate" "monitalert" "generates the monitalert.cfg" "no arguments" + printf "%-10s | %-15s | %-40s | %-75s\n" "generate" "monitconfig" "generates the monitconfig.cfg" "\"\" & \"\"" + printf "%-10s | %-15s | %-40s | %-75s\n" "generate" "monitrc" "generates the monitrc.cfg" "\"\"" + printf "%-10s | %-15s | %-40s | %-75s\n" "generate" "all" "generates all *.cfg files & monitrc" "\"\" & \"\" & \"\"" + printf "%-10s | %-15s | %-40s | %-75s\n" "start" "NA" "starts monit" "\"\"" + echo "----------------------------------------------------------------------------------------------------------------------------------------------" + echo "" + } + +"$@" diff --git a/monit/templates/monitalert.cfg b/monit/templates/monitalert.cfg new file mode 100644 index 000000000..62800e6a3 --- /dev/null +++ b/monit/templates/monitalert.cfg @@ -0,0 +1,12 @@ +check system examplehost + if loadavg (5min) > 3 for 4 cycles then alert + if loadavg (15min) > 1 for 4 cycles then alert + if memory usage > 80% for 4 cycles then alert + if swap usage > 20% for 4 cycles then alert + if cpu usage (system) > 20% for 4 cycles then alert + +check host examplehost.com with address gwadawdawdwad.com + if failed url http://gawdawdawdaw.com + then alert + + diff --git a/monit/templates/monitrc b/monit/templates/monitrc new file mode 100755 index 000000000..f7aa167f2 --- /dev/null +++ b/monit/templates/monitrc @@ -0,0 +1,321 @@ +############################################################################### +## Monit control file +############################################################################### +## +## Comments begin with a '#' and extend through the end of the line. Keywords +## are case insensitive. All path's MUST BE FULLY QUALIFIED, starting with '/'. +## +## Below you will find examples of some frequently used statements. For +## information about the control file and a complete list of statements and +## options, please have a look in the Monit manual. +## +## +############################################################################### +## Global section +############################################################################### +## +## Start Monit in the background (run as a daemon): +# + set daemon 30 # check services at 30 seconds intervals + #with start delay 240 # optional: delay the first check by 4-minutes (by +# # default Monit check immediately after Monit start) +# +# +## Set syslog logging. If you want to log to a standalone log file instead, +## specify the full path to the log file +# + set logfile ${BASEDIR}/monit/logs/monit.log + +# +# +## Set the location of the Monit lock file which stores the process id of the +## running Monit instance. By default this file is stored in $HOME/.monit.pid +# +#set pidfile $TORQMONIT/monit.pid +# +## Set the location of the Monit id file which stores the unique id for the +## Monit instance. The id is generated and stored on first Monit start. By +## default the file is placed in $HOME/.monit.id. +# +# set idfile /var/.monit.id + #set idfile $TORQMONIT/.monit.id +# +## Set the location of the Monit state file which saves monitoring states +## on each cycle. By default the file is placed in $HOME/.monit.state. If +## the state file is stored on a persistent filesystem, Monit will recover +## the monitoring state across reboots. If it is on temporary filesystem, the +## state will be lost on reboot which may be convenient in some situations. +# + set statefile ${BASEDIR}/monit/logs/monit.state + #set statefile /var/lib/monit/state +# +# + +## Set limits for various tests. The following example shows the default values: +## +# set limits { +# programOutput: 512 B, # check program's output truncate limit +# sendExpectBuffer: 256 B, # limit for send/expect protocol test +# fileContentBuffer: 512 B, # limit for file content test +# httpContentBuffer: 1 MB, # limit for HTTP content test +# networkTimeout: 5 seconds # timeout for network I/O +# programTimeout: 300 seconds # timeout for check program +# stopTimeout: 30 seconds # timeout for service stop +# startTimeout: 30 seconds # timeout for service start +# restartTimeout: 30 seconds # timeout for service restart +# } + +## Set global SSL options (just most common options showed, see manual for +## full list). +# +# set ssl { +# verify : enable, # verify SSL certificates (disabled by default but STRONGLY RECOMMENDED) +# selfsigned : allow # allow self signed SSL certificates (reject by default) +# } +# +# +## Set the list of mail servers for alert delivery. Multiple servers may be +## specified using a comma separator. If the first mail server fails, Monit +# will use the second mail server in the list and so on. By default Monit uses +# port 25 - it is possible to override this with the PORT option. +# +# set mailserver mail.bar.baz, # primary mailserver +# backup.bar.baz port 10025, # backup mailserver on port 10025 +# localhost # fallback relay + set mailserver smtp.gmail.com port 587 + username "\"torqmonit@gmail.com\"" password "\"torqmonit2018\"" + using tlsv1 + with timeout 30 seconds + + + set alert rdanutalexandru@gmail.com +# +# +## By default Monit will drop alert events if no mail servers are available. +## If you want to keep the alerts for later delivery retry, you can use the +## EVENTQUEUE statement. The base directory where undelivered alerts will be +## stored is specified by the BASEDIR option. You can limit the queue size +## by using the SLOTS option (if omitted, the queue is limited by space +## available in the back end filesystem). +# + set eventqueue +# basedir /var/lib/monit/events # set the base directory where events will be stored + basedir ${BASEDIR}/monit/events + slots 100 # optionally limit the queue size +# +# +## Send status and events to M/Monit (for more informations about M/Monit +## see https://mmonit.com/). By default Monit registers credentials with +## M/Monit so M/Monit can smoothly communicate back to Monit and you don't +## have to register Monit credentials manually in M/Monit. It is possible to +## disable credential registration using the commented out option below. +## Though, if safety is a concern we recommend instead using https when +## communicating with M/Monit and send credentials encrypted. The password +## should be URL encoded if it contains URL-significant characters like +## \":\", \"?\", \"@\". +# +# set mmonit http://monit:monit@104.46.37.155:2810/collector +# # and register without credentials # Don't register credentials +# +# +## Monit by default uses the following format for alerts if the the mail-format +## statement is missing:: +## --8<-- +## set mail-format { +## from: Monit +## subject: monit alert -- $EVENT $SERVICE +## message: $EVENT Service $SERVICE +## Date: $DATE +## Action: $ACTION +## Host: $HOST +## Description: $DESCRIPTION +## +## Your faithful employee, +## Monit +## } +## --8<-- + set mail-format { + from: torqmonit@gmail.com + subject: [\$SERVICE] monit alert -- \$EVENT at \$DATE + message: Monit Report: +ACTION: \$ACTION +SERVICE: \$SERVICE +DATE: \$DATE +HOST: \$HOST +DESCRIPTION: \$DESCRIPTION + +Powered by Monit + +This message has been generated automatically! + } +## +## You can override this message format or parts of it, such as subject +## or sender using the MAIL-FORMAT statement. Macros such as $DATE, etc. +## are expanded at runtime. For example, to override the sender, use: +# +# set mail-format { from: monit@foo.bar } +# +# +## You can set alert recipients whom will receive alerts if/when a +## service defined in this file has errors. Alerts may be restricted on +## events by using a filter as in the second example below. +# +# set alert sysadm@foo.bar # receive all alerts +# +## Do not alert when Monit starts, stops or performs a user initiated action. +## This filter is recommended to avoid getting alerts for trivial cases. +# +# set alert your-name@your.domain not on { instance, action } +# +# +## Monit has an embedded HTTP interface which can be used to view status of +## services monitored and manage services from a web interface. The HTTP +## interface is also required if you want to issue Monit commands from the +## command line, such as 'monit status' or 'monit restart service' The reason +## for this is that the Monit client uses the HTTP interface to send these +## commands to a running Monit daemon. See the Monit Wiki if you want to +## enable SSL for the HTTP interface. +# + set httpd port 11000 and +# use address localhost # only accept connection from localhost +# allow localhost # allow localhost to connect to the server and + allow admin:monit # require user 'admin' with password 'monit' + +############################################################################### +## Services +############################################################################### +## +## Check general system resources such as load average, cpu and memory +## usage. Each test specifies a resource, conditions and the action to be +## performed should a test fail. +# +# check system $HOST +# if loadavg (1min) > 4 then alert +# if loadavg (5min) > 2 then alert +# if cpu usage > 95% for 10 cycles then alert +# if memory usage > 75% then alert +# if swap usage > 25% then alert +# +# +## Check if a file exists, checksum, permissions, uid and gid. In addition +## to alert recipients in the global section, customized alert can be sent to +## additional recipients by specifying a local alert handler. The service may +## be grouped using the GROUP option. More than one group can be specified by +## repeating the 'group name' statement. +# +# check file apache_bin with path /usr/local/apache/bin/httpd +# if failed checksum and +# expect the sum 8f7f419955cefa0b33a2ba316cba3659 then unmonitor +# if failed permission 755 then unmonitor +# if failed uid root then unmonitor +# if failed gid root then unmonitor +# alert security@foo.bar on { +# checksum, permission, uid, gid, unmonitor +# } with the mail-format { subject: Alarm! } +# group server +# +# +## Check that a process is running, in this case Apache, and that it respond +## to HTTP and HTTPS requests. Check its resource usage such as cpu and memory, +## and number of children. If the process is not running, Monit will restart +## it by default. In case the service is restarted very often and the +## problem remains, it is possible to disable monitoring using the TIMEOUT +## statement. This service depends on another service (apache_bin) which +## is defined above. +# +# check process apache with pidfile /usr/local/apache/logs/httpd.pid +# start program = \"/etc/init.d/httpd start\" with timeout 60 seconds +# stop program = \"/etc/init.d/httpd stop\" +# if cpu > 60% for 2 cycles then alert +# if cpu > 80% for 5 cycles then restart +# if totalmem > 200.0 MB for 5 cycles then restart +# if children > 250 then restart +# if loadavg(5min) greater than 10 for 8 cycles then stop +# if failed host www.tildeslash.com port 80 protocol http +# and request \"/somefile.html\" +# then restart +# if failed port 443 protocol https with timeout 15 seconds then restart +# if 3 restarts within 5 cycles then unmonitor +# depends on apache_bin +# group server +# +# +## Check filesystem permissions, uid, gid, space and inode usage. Other services, +## such as databases, may depend on this resource and an automatically graceful +## stop may be cascaded to them before the filesystem will become full and data +## lost. +# +# check filesystem datafs with path /dev/sdb1 +# start program = \"/bin/mount /data\" +# stop program = \"/bin/umount /data\" +# if failed permission 660 then unmonitor +# if failed uid root then unmonitor +# if failed gid disk then unmonitor +# if space usage > 80% for 5 times within 15 cycles then alert +# if space usage > 99% then stop +# if inode usage > 30000 then alert +# if inode usage > 99% then stop +# group server +# +# +## Check a file's timestamp. In this example, we test if a file is older +## than 15 minutes and assume something is wrong if its not updated. Also, +## if the file size exceed a given limit, execute a script +# +# check file database with path /data/mydatabase.db +# if failed permission 700 then alert +# if failed uid data then alert +# if failed gid data then alert +# if timestamp > 15 minutes then alert +# if size > 100 MB then exec \"/my/cleanup/script\" as uid dba and gid dba +# +# +## Check directory permission, uid and gid. An event is triggered if the +## directory does not belong to the user with uid 0 and gid 0. In addition, +## the permissions have to match the octal description of 755 (see chmod(1)). +# +# check directory bin with path /bin +# if failed permission 755 then unmonitor +# if failed uid 0 then unmonitor +# if failed gid 0 then unmonitor +# +# +## Check a remote host availability by issuing a ping test and check the +## content of a response from a web server. Up to three pings are sent and +## connection to a port and an application level network check is performed. +# +# check host myserver with address 192.168.1.1 +# if failed ping then alert +# if failed port 3306 protocol mysql with timeout 15 seconds then alert +# if failed port 80 protocol http +# and request /some/path with content = \"a string\" +# then alert +# +# +## Check a network link status (up/down), link capacity changes, saturation +## and bandwidth usage. +# +# check network public with interface eth0 +# if failed link then alert +# if changed link then alert +# if saturation > 90% then alert +# if download > 10 MB/s then alert +# if total uploaded > 1 GB in last hour then alert +# +# +## Check custom program status output. +# +# check program myscript with path /usr/local/bin/myscript.sh +# if status != 0 then alert +# +# +############################################################################### +## Includes +############################################################################### +## +## It is possible to include additional configuration parts from other files or +## directories. +# +# include /etc/monit.d/* +# + include ${BASEDIR}/monit/config/*.cfg diff --git a/monit/templates/monittemplate.txt b/monit/templates/monittemplate.txt new file mode 100644 index 000000000..a0fe13022 --- /dev/null +++ b/monit/templates/monittemplate.txt @@ -0,0 +1,7 @@ +check process $procname + matching \"$KDBBASEPORT -proctype $proctype -procname $procname\" + start program = \"/bin/bash -c '$startstopsc start $procname'\" + with timeout 10 seconds + stop program = \"/bin/bash -c '$startstopsc stop $procname'\" + every \"* * * * *\" + mode active diff --git a/setenv.sh b/setenv.sh index dd7f972a0..c8d29f523 100644 --- a/setenv.sh +++ b/setenv.sh @@ -1,6 +1,12 @@ -export TORQHOME=${PWD} # if running the kdb+tick example, change these to full paths -export TORQDATA=${PWD} # some of the kdb+tick processes will change directory, and these will no longer be valid -export TORQAPPHOME=${PWD} +if [ "-bash" = $0 ]; then + dirpath="${BASH_SOURCE[0]}" +else + dirpath="$0" +fi + +export TORQHOME=$(dirname $dirpath) # if running the kdb+tick example, change these to full paths +export TORQDATA=$(dirname $dirpath) # some of the kdb+tick processes will change directory, and these will no longer be valid +export TORQAPPHOME=$(dirname $dirpath) export KDBLOG=${TORQDATA}/logs export KDBHTML=${TORQHOME}/html @@ -17,3 +23,5 @@ export KDBSTACKID="-stackid ${KDBBASEPORT}" export TORQPROCESSES=${KDBAPPCONFIG}/process.csv # set TORQPROCESSES to the default process csv export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$KDBLIB/l32 + +export TORQMONIT=${TORQHOME}/logs/monit # set the folder for monit outputs diff --git a/torq.sh b/torq.sh index e3d619b55..665517bc3 100755 --- a/torq.sh +++ b/torq.sh @@ -214,8 +214,14 @@ usage() { exit 1 } -if [[ -z $SETENV ]]; then - SETENV=${PWD}/setenv.sh; # set the environment if not predefined +if [ "-bash" = $0 ]; then + dirpath="${BASH_SOURCE[0]}" +else + dirpath="$0" +fi + +if [[ -z $SETENV ]]; then + SETENV=$(dirname $dirpath)/setenv.sh; # set the environment if not predefined fi if [ -f $SETENV ]; then # check script exists