From ffbd4d759c090e708484face8b725dea69456c95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Peccatte?= Date: Tue, 17 Nov 2015 11:06:25 +0100 Subject: [PATCH] Fixes #7409: Support cfengine enterprise in check-rudder-agent --- rudder-agent/SOURCES/check-rudder-agent | 181 ++++++++++++++---------- rudder-agent/SOURCES/rudder-agent.init | 3 +- 2 files changed, 105 insertions(+), 79 deletions(-) diff --git a/rudder-agent/SOURCES/check-rudder-agent b/rudder-agent/SOURCES/check-rudder-agent index 000ba4762..b228f71c4 100755 --- a/rudder-agent/SOURCES/check-rudder-agent +++ b/rudder-agent/SOURCES/check-rudder-agent @@ -49,14 +49,12 @@ echo_n() { } # Default variables for CFEngine binaries and disable files -CFE_DIR=/var/rudder/cfengine-community -CFE_BIN_DIR=${CFE_DIR}/bin +RUDDER_CFE_DIR=/var/rudder/cfengine-community +ENTERPRISE_CFE_DIR=/var/cfengine CFE_DISABLE_FILE=/opt/rudder/etc/disable-agent - -LAST_UPDATE_FILE=${CFE_DIR}/last_successful_inputs_update - UUID_FILE=/opt/rudder/etc/uuid.hive + clean_cf_lock_files() { rm -f ${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT} rm -f ${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT}.lock @@ -83,7 +81,7 @@ check_and_fix_cfengine_processes() { # A standard kill won't kill them, so the -9 is necessary to make sure they are stopped # They will be restarted by the next check, if the disable file is not set # List the cf-execd processes running (without the path, they can be run manually) - CF_EXECD_RUNNING=`${PS_COMMAND} | grep "cf-execd" | sed -e '/grep/d' | cat` + CF_EXECD_RUNNING=`${PS_COMMAND} | grep "${CFE_BIN_DIR}/cf-execd" | sed -e '/grep/d' | cat` NB_CF_EXECD_RUNNING=`echo "${CF_EXECD_RUNNING}" | sed -e '/^$/d' | wc -l` if [ ${NB_CF_EXECD_RUNNING} -gt 1 ]; then echo_n "WARNING: Too many instance of CFEngine cf-execd processes running. Killing them..." @@ -91,24 +89,23 @@ check_and_fix_cfengine_processes() { echo " Done" fi - # List the CFEngine processes running - CF_PROCESS_RUNNING=`${PS_COMMAND} | egrep "${CFE_BIN_DIR}/(cf-execd|cf-agent)" | sed -e '/grep/d' | cat` + CF_PROCESS_RUNNING=`${PS_COMMAND} | sed -ne "\\%${CFE_BIN_DIR}/\\(cf-execd\\|cf-agent\\)%p" | cat` # Count the number of processes running, filtering empty lines NB_CF_PROCESS_RUNNING=`echo "${CF_PROCESS_RUNNING}" | sed -e '/^$/d' | wc -l` # If no disable file AND no process of CFEngine from Rudder, then relaunch cf-agent with a failsafe first # But this is applied only on servers or nodes already initialized (policy server set) - if [ ! -f ${CFE_DISABLE_FILE} ] && [ ${NB_CF_PROCESS_RUNNING} -eq 0 ] && [ -f ${CFE_DIR}/policy_server.dat ]; then - echo_n "WARNING: No disable file detected and no CFEngine process neither. Relaunching CFEngine processes..." + if [ ! -f "${CFE_DISABLE_FILE}" ] && [ "${NB_CF_PROCESS_RUNNING}" -eq 0 ] && [ -f "${CFE_DIR}/policy_server.dat" ]; then + echo_n "WARNING: No disable file detected and no CFEngine process either. Relaunching CFEngine processes..." ${CFE_BIN_DIR}/cf-agent -f failsafe.cf >/dev/null 2>&1 ${CFE_BIN_DIR}/cf-agent >/dev/null 2>&1 echo " Done" fi # Get the value of rudder-agent run interval from file /var/rudder/cfengine-community/inputs/run_interval - if [ -f /var/rudder/cfengine-community/inputs/run_interval ]; then - RUN_INTERVAL=`cat /var/rudder/cfengine-community/inputs/run_interval` + if [ -f "${CFE_DIR}/inputs/run_interval" ]; then + RUN_INTERVAL=`cat "${CFE_DIR}/inputs/run_interval"` else # File does not exists, use default value 5 RUN_INTERVAL=5 @@ -119,7 +116,7 @@ check_and_fix_cfengine_processes() { # Check for the age of the last update file # if it is older than CHECK_INTERVAL minutes, and the disable file is not defined, we # need to kill the lock database - if [ ! -f ${LAST_UPDATE_FILE} ] || [ -f ${CFE_DISABLE_FILE} ]; then + if [ ! -f "${LAST_UPDATE_FILE}" ] || [ -f "${CFE_DISABLE_FILE}" ]; then # Either the file ${LAST_UPDATE_FILE} is not yet present, and this node is # probably not accepted yet, either the file ${CFE_DISABLE_FILE} is present, so # the agent won't update the ${LAST_UPDATE_FILE}. @@ -134,10 +131,10 @@ check_and_fix_cfengine_processes() { # Check for anomalous number of CFEngine processes # If there are more than 8 agent/executor processes, we should kill them, and purge the lock database - if [ ${NB_CF_PROCESS_RUNNING} -gt 8 ]; then + if [ "${NB_CF_PROCESS_RUNNING}" -gt 8 ]; then echo_n "WARNING: Too many instance of CFEngine processes running. Killing them, and purging the CFEngine lock database..." echo "${CF_PROCESS_RUNNING}" | awk 'BEGIN { OFS=" "} {print $2 }' | xargs kill -9 || true - if [ "z${OS_FAMILY}" != "zAIX" ]; then + if [ -e /etc/init.d/rudder-agent ] && [ "${OS_FAMILY}" != "AIX" ] || [ "${OS_FAMILY}" = "SunOS" ]; then /etc/init.d/rudder-agent forcestop || true fi clean_cf_lock_files @@ -150,13 +147,13 @@ check_and_fix_cf_lock() { MAX_CF_LOCK_SIZE=10485760 if [ -f "${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT}" ]; then - if [ "z${OS_FAMILY}" = "zAIX" ]; then + if [ "${OS_FAMILY}" = "AIX" ]; then CF_LOCK_SIZE=`istat "${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT}" | grep Length | sed "s%^.*Length \([0-9]*\) bytes.*$%\1%"` else CF_LOCK_SIZE=`stat -c%s "${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT}"` fi - if [ ${CF_LOCK_SIZE} -ge ${MAX_CF_LOCK_SIZE} ]; then + if [ "${CF_LOCK_SIZE}" -ge "${MAX_CF_LOCK_SIZE}" ]; then echo_n "WARNING: The file ${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT} is too big (${CF_LOCK_SIZE} bytes), purging it..." clean_cf_lock_files echo " Done" @@ -166,88 +163,118 @@ check_and_fix_cf_lock() { check_and_fix_rudder_uuid() { -# Default variable about UUID backup -LATEST_BACKUPED_UUID="" - -# Generate a UUID if we don't have one yet -if [ ! -f ${UUID_FILE} ]; then - if [ -d ${BACKUP_DIR} ]; then - LATEST_BACKUPED_UUID=`ls -v1 ${BACKUP_DIR}uuid-*.hive | tail -n1` - fi - if [ "z${LATEST_BACKUPED_UUID}" != "z" ]; then - echo_n "WARNING: The UUID of the node does not exist. The lastest backup (${LATEST_BACKUPED_UUID}) will be recovered..." - ${CP_A} ${LATEST_BACKUPED_UUID} ${UUID_FILE} >/dev/null 2>&1 - echo " Done" + # Default variable about UUID backup + LATEST_BACKUPED_UUID="" + + # Generate a UUID if we don't have one yet + if [ ! -f "${UUID_FILE}" ]; then + if [ -d "${BACKUP_DIR}" ]; then + LATEST_BACKUPED_UUID=`ls -v1 ${BACKUP_DIR}uuid-*.hive | tail -n1` + fi + if [ "${LATEST_BACKUPED_UUID}" != "" ]; then + echo_n "WARNING: The UUID of the node does not exist. The lastest backup (${LATEST_BACKUPED_UUID}) will be recovered..." + ${CP_A} ${LATEST_BACKUPED_UUID} ${UUID_FILE} >/dev/null 2>&1 + echo " Done" + else + echo_n "WARNING: The UUID of the node does not exist and no backup exist. A new one will be generated..." + uuidgen > ${UUID_FILE} + echo " Done" + fi else - echo_n "WARNING: The UUID of the node does not exist and no backup exist. A new one will be generated..." - uuidgen > ${UUID_FILE} - echo " Done" - fi -else - # UUID is valid only if it has been generetaed by uuidgen or if it is set to 'root' for policy server - REGEX=`x="[a-f0-9][a-f0-9][a-f0-9][a-f0-9]" && echo "$x$x-$x-$x-$x-$x$x$x"` - CHECK_UUID=`cat /opt/rudder/etc/uuid.hive | egrep "^$REGEX|^root" | wc -l` - # If the UUID is not valid, regenerate it - if [ ${CHECK_UUID} -ne 1 ]; then - echo_n "INFO: Creating a new UUID for Rudder as the existing one is invalid..." - # Keep a backup of UUID even if it is not valid - mkdir -p /var/backups/rudder - cp -f /opt/rudder/etc/uuid.hive /var/backups/rudder/uuid-`date +%Y%m%d`.hive - # Generate a new one - uuidgen > ${UUID_FILE} - echo " Done." + # UUID is valid only if it has been generetaed by uuidgen or if it is set to 'root' for policy server + REGEX=`x="[a-f0-9][a-f0-9][a-f0-9][a-f0-9]" && echo "$x$x-$x-$x-$x-$x$x$x"` + CHECK_UUID=`cat /opt/rudder/etc/uuid.hive | egrep "^$REGEX|^root" | wc -l` + # If the UUID is not valid, regenerate it + if [ ${CHECK_UUID} -ne 1 ]; then + echo_n "INFO: Creating a new UUID for Rudder as the existing one is invalid..." + # Keep a backup of UUID even if it is not valid + mkdir -p "${BACKUP_DIR}" + cp -f /opt/rudder/etc/uuid.hive ${BACKUP_DIR}/uuid-`date +%Y%m%d`.hive + # Generate a new one + uuidgen > ${UUID_FILE} + echo " Done." + fi fi -fi } check_and_fix_specific_rudder_agent_file() { + FILE_TO_RESTORE=$1 + FILE_TYPE=$2 + LATEST_BACKUPED_FILES="" + + if [ ! -f ${FILE_TO_RESTORE} ]; then + if [ -d ${BACKUP_DIR} ]; then + LATEST_BACKUPED_FILES=`ls -v1 ${BACKUP_DIR}rudder-agent.${FILE_TYPE}-* | tail -n1` + fi + if [ "${LATEST_BACKUPED_FILES}" != "" ]; then + echo_n "WARNING: The file ${FILE_TO_RESTORE} does not exist. The lastest backup (${LATEST_BACKUPED_FILES}) will be recovered..." + ${CP_A} ${LATEST_BACKUPED_FILES} ${FILE_TO_RESTORE} >/dev/null 2>&1 + echo " Done" + else + echo "ERROR: The file ${FILE_TO_RESTORE} does not exist and no backup exist. Please reinstall the rudder-agent package" + fi + fi +} -FILE_TO_RESTORE=$1 -FILE_TYPE=$2 -LATEST_BACKUPED_FILES="" +empty() { + [ ! -f "$1" ] || [ $(du "$1" | awk '{print $1}') = "0" ] +} -if [ ! -f ${FILE_TO_RESTORE} ]; then - if [ -d ${BACKUP_DIR} ]; then - LATEST_BACKUPED_FILES=`ls -v1 ${BACKUP_DIR}rudder-agent.${FILE_TYPE}-* | tail -n1` - fi - if [ "z${LATEST_BACKUPED_FILES}" != "z" ]; then - echo_n "WARNING: The file ${FILE_TO_RESTORE} does not exist. The lastest backup (${LATEST_BACKUPED_FILES}) will be recovered..." - ${CP_A} ${LATEST_BACKUPED_FILES} ${FILE_TO_RESTORE} >/dev/null 2>&1 +check_and_fix_inputs() { + # if file is absent or empty there have been a problem with update + if empty "${CFE_DIR}/inputs/common/1.0/update.cf" || empty "${CFE_DIR}/inputs/failsafe.cf" || empty "${CFE_DIR}/inputs/promises.cf" + then + echo_n "ERROR: There was an error during promises update. Reseting to initial promises and updating..." + rm -rf ${CFE_DIR}/inputs/* + if [ -d "/opt/rudder/share/initial-promises" ] # rudder package has been properly installed + then + cp -r /opt/rudder/share/initial-promises/* ${CFE_DIR}/inputs/ + ${CFE_BIN_DIR}/cf-agent -K -f failsafe.cf && ${CFE_BIN_DIR}/cf-agent + else # this is an enterprise only agent + if [ -e "${CFE_DIR}/policy_server.dat" ] + then + ${CFE_BIN_DIR}/cf-agent --bootstrap `cat "${CFE_DIR}/policy_server.dat"` + fi + fi echo " Done" - else - echo "ERROR: The file ${FILE_TO_RESTORE} does not exist and no backup exist. Please reinstall the rudder-agent package" fi -fi } - # Ensure script is executed by root MYUID=`id | cut -d\( -f2 | cut -d\) -f1` if [ "${MYUID}" != 'root' ];then echo "You must be root"; exit; fi # Launch each check with a certain order check_and_fix_rudder_uuid -check_and_fix_cfengine_processes -check_and_fix_cf_lock + +# Once for rudder's cfengine +if [ -e "${RUDDER_CFE_DIR}/bin/cf-agent" ] +then + CFE_DIR="${RUDDER_CFE_DIR}" + CFE_BIN_DIR="${CFE_DIR}/bin" + LAST_UPDATE_FILE="${CFE_DIR}/last_successful_inputs_update" + NEED_INIT_FILES="true" + check_and_fix_cfengine_processes + check_and_fix_cf_lock + check_and_fix_inputs +fi + +# Once for enterprise cfengine +if [ -e "${ENTERPRISE_CFE_DIR}/bin/cf-agent" ] +then + CFE_DIR="${ENTERPRISE_CFE_DIR}" + CFE_BIN_DIR="${CFE_DIR}/bin" + LAST_UPDATE_FILE="${CFE_DIR}/last_successful_inputs_update" + check_and_fix_cfengine_processes + check_and_fix_cf_lock + check_and_fix_inputs +fi # The following files are not present on AIX systems -if [ "z${OS_FAMILY}" != "zAIX" ]; then +if [ "${NEED_INIT_FILES}" = "true" ] && [ "${OS_FAMILY}" != "AIX" ] || [ "${OS_FAMILY}" = "SunOS" ]; then check_and_fix_specific_rudder_agent_file /etc/init.d/rudder-agent init check_and_fix_specific_rudder_agent_file /etc/default/rudder-agent default check_and_fix_specific_rudder_agent_file /etc/cron.d/rudder-agent cron fi -# if file is absent or empty there have been a problem with update -base="/var/rudder/cfengine-community/inputs" -empty() { - [ ! -f "$1" ] || [ $(du "$1" | awk '{print $1}') = "0" ] -} -if empty "${base}/common/1.0/update.cf" || empty "${base}/failsafe.cf" || empty "${base}/promises.cf" -then - echo_n "ERROR: There was an error during promises update. Reseting to initial promises and updating..." - rm -rf /var/rudder/cfengine-community/inputs/* - cp -r /opt/rudder/share/initial-promises/* /var/rudder/cfengine-community/inputs - /var/rudder/cfengine-community/bin/cf-agent -K -f failsafe.cf && /var/rudder/cfengine-community/bin/cf-agent - echo " Done" -fi diff --git a/rudder-agent/SOURCES/rudder-agent.init b/rudder-agent/SOURCES/rudder-agent.init index e0dfb6ce8..088e5e2a4 100755 --- a/rudder-agent/SOURCES/rudder-agent.init +++ b/rudder-agent/SOURCES/rudder-agent.init @@ -83,7 +83,6 @@ fi # CFEngine Community directory and files -CFENGINE_COMMUNITY_PATH="/opt/rudder" CFENGINE_COMMUNITY_VAR_PATH="/var/rudder/cfengine-community" CFENGINE_COMMUNITY_RUN_0="1" CFENGINE_COMMUNITY_RUN_1="1" @@ -196,7 +195,7 @@ start_daemons() { if [ $i -eq $TIMEOUT -o $i -eq `expr $TIMEOUT / 2` ] then eval message "alert" "\"[ALERT] no PID file for \${CFENGINE_COMMUNITY_NAME_$daemon} after $i seconds. Trying again...\"" - eval "\${CFENGINE_COMMUNITY_BIN_\$daemon}" "\${CFENGINE_COMMUNITY_PARAMS_$daemon}" + eval "\${CFENGINE_COMMUNITY_BIN_$daemon}" "\${CFENGINE_COMMUNITY_PARAMS_$daemon}" fi if [ $i -eq `expr $TIMEOUT + 10` ]