Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #5198: Do not use stat on AIX in check-rudder-agent, but istat ins... #428

Merged
merged 1 commit into from Jul 2, 2014
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 23 additions & 13 deletions rudder-agent/SOURCES/check-rudder-agent
Expand Up @@ -32,7 +32,11 @@ export PATH=/opt/rudder/bin/:$PATH
BACKUP_DIR=/var/backups/rudder/
OS_FAMILY=$(uname -s)

# If we are on AIX, use alternative commands and options
## Which database system is used in CFEngine ?
## TokyoCabinet (tcdb), Lightning MDB (lmdb)
CFENGINE_DB_EXT=tcdb

## If we are on AIX, use alternative commands and options
if [ "z${OS_FAMILY}" = "zAIX" ]; then
CP_A="cp -hpPr"
PS_OPTIONS="-ef"
Expand All @@ -59,8 +63,8 @@ LAST_UPDATE_FILE=${CFE_DIR}/last_successful_inputs_update
UUID_FILE=/opt/rudder/etc/uuid.hive

clean_cf_lock_files() {
rm -f ${CFE_DIR}/state/cf_lock.tcdb
rm -f ${CFE_DIR}/state/cf_lock.tcdb.lock
rm -f ${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT}
rm -f ${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT}.lock
}

check_and_fix_cfengine_processes() {
Expand Down Expand Up @@ -120,24 +124,24 @@ check_and_fix_cfengine_processes() {

# Check for the age of the last update file
# if it is older than CHECK_INTERVAL minutes, and the disable file is not defined, we
# need to kill the lock file on tcdb database
# need to kill the lock database
if [ ! -e ${LAST_UPDATE_FILE} -o -e ${CFE_DISABLE_FILE} ]; then
# Either the file ${LAST_UPDATE_FILE} is not yet present, and this node is
# probably not accepted yet, either the file ${CFE_DISABLE_FILE} is present, so
# the agent won't update the ${LAST_UPDATE_FILE}.
# In both case, do nothing
:
elif test `find ${LAST_UPDATE_FILE} -mmin +${CHECK_INTERVAL}`; then
echo_n "WARNING: The file ${LAST_UPDATE_FILE} is older than ${CHECK_INTERVAL} minutes, the agent is probably stuck. Purging the TokyoCabinet lock file..."
echo_n "WARNING: The file ${LAST_UPDATE_FILE} is older than ${CHECK_INTERVAL} minutes, the agent is probably stuck. Purging the CFEngine lock database..."
clean_cf_lock_files
echo " Done";
fi


# Check for anomalous number of CFEngine processes
# If there are more than 8 agent/executor processes, we should kill them, and purge the tcdb database
# If there are more than 8 agent/executor processes, we should kill them, and purge the lock database
if [ ${NB_CF_PROCESS_RUNNING} -gt 8 ]; then
echo_n "WARNING: Too many instance of CFEngine processes running. Killing them, and purging the TokyoCabinet database..."
echo_n "WARNING: Too many instance of CFEngine processes running. Killing them, and purging the CFEngine lock database..."
echo "${CF_PROCESS_RUNNING}" | awk 'BEGIN { OFS=" "} {print $2 }' | xargs kill -9 || true
if [ "z${OS_FAMILY}" != "zAIX" ]; then
/etc/init.d/rudder-agent forcestop || true
Expand All @@ -147,13 +151,19 @@ check_and_fix_cfengine_processes() {
fi
}

# Check the size of the cf_lock.tcdb file
# Check the size of the cf_lock file
check_and_fix_cf_lock() {
MAX_CF_LOCK_SIZE=10485760
if [ -e "${CFE_DIR}/state/cf_lock.tcdb" ]; then
CF_LOCK_SIZE=`stat -c%s "${CFE_DIR}/state/cf_lock.tcdb"`
if [ -e "${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT}" ]; then

if [ "z${OS_FAMILY}" = "zAIX" ]; then
CF_LOCK_SIZE=`istat "${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT}" | grep Length | sed "s%^.*Length \([0-9]*\) bytes.*$%\1%"`
else
CF_LOCK_SIZE=`stat -c%s "${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT}"`
fi

if [ ${CF_LOCK_SIZE} -ge ${MAX_CF_LOCK_SIZE} ]; then
echo_n "WARNING: The file ${CFE_DIR}/state/cf_lock.tcdb is too big ( ${CF_LOCK_SIZE} bytes), purging it..."
echo_n "WARNING: The file ${CFE_DIR}/state/cf_lock.${CFENGINE_DB_EXT} is too big (${CF_LOCK_SIZE} bytes), purging it..."
clean_cf_lock_files
echo " Done"
fi
Expand Down Expand Up @@ -185,7 +195,7 @@ else
# If the UUID is not valid, regenerate it
if [ ${CHECK_UUID} -ne 1 ]; then
echo_n "INFO: Creating a new UUID for Rudder as the existing one is invalid..."
# Keep a backup of UUID even if it is not valid
# Keep a backup of UUID even if it is not valid
mkdir -p /var/backups/rudder
cp -f /opt/rudder/etc/uuid.hive /var/backups/rudder/uuid-$(date +%Y%m%d).hive
# Generate a new one
Expand All @@ -211,7 +221,7 @@ if [ ! -e ${FILE_TO_RESTORE} ]; then
${CP_A} ${LATEST_BACKUPED_FILES} ${FILE_TO_RESTORE} >/dev/null 2>&1
echo " Done"
else
echo "WARNING: The file ${FILE_TO_RESTORE} does not exist and no backup exist. Please reinstall the rudder-agent package"
echo "ERROR: The file ${FILE_TO_RESTORE} does not exist and no backup exist. Please reinstall the rudder-agent package"
fi
fi
}
Expand Down