Skip to content
Permalink
Browse files
MDEV-25880: rsync may be mistakenly killed when overlapping SST
This commit fixes a bug was originally discovered during the
galera_nbo_sst_slave mtr test for 10.6 branch. However it is
relevant for all versions and can lead to intermittent SST
crashes via rsync on very fast server restarts - when a new
SST process (for example, after starting a new server instance)
overlaps the old SST process started by the previous, already
terminated server. This overlap can result in the new rsync
being killed instead of the old rsync, or the pid file from
the new rsync being killed, which then lead to problems.
  • Loading branch information
sysprg committed Jun 15, 2021
1 parent 1c35a3f commit 18d5be5
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
@@ -1190,7 +1190,6 @@ trim_string()
check_pid()
{
local pid_file="$1"
local remove=${2:-0}
if [ -r "$pid_file" ]; then
local pid=$(cat "$pid_file" 2>/dev/null)
if [ -n "$pid" ]; then
@@ -1201,6 +1200,7 @@ check_pid()
fi
fi
fi
local remove=${2:-0}
if [ $remove -eq 1 ]; then
rm -f "$pid_file"
fi
@@ -68,6 +68,8 @@ cleanup_joiner()
if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then
wsrep_cleanup_progress_file
fi

[ -f "$SST_PID" ] && rm -f "$SST_PID"
}

check_pid_and_port()
@@ -281,6 +283,7 @@ then
*)
wsrep_log_error "Unrecognized ssl-mode option: '$SSLMODE'"
exit 22 # EINVAL
;;
esac
if [ -z "$CAFILE_OPT" ]; then
wsrep_log_error "Can't have ssl-mode='$SSLMODE' without CA file"
@@ -499,6 +502,21 @@ elif [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]
then
check_sockets_utils

SST_PID="$WSREP_SST_OPT_DATA/wsrep_rsync_sst.pid"

# give some time for lingering stunnel from previous SST to complete
check_round=0
while check_pid "$SST_PID" 0
do
wsrep_log_info "previous SST not completed, waiting for it to exit"
check_round=$(( check_round + 1 ))
if [ $check_round -eq 10 ]; then
wsrep_log_error "SST script already running."
exit 114 # EALREADY
fi
sleep 1
done

# give some time for lingering stunnel from previous SST to complete
check_round=0
while check_pid "$STUNNEL_PID" 1
@@ -583,12 +601,14 @@ EOF
RSYNC_ADDR="*"
fi

echo $$ > "$SST_PID"

if [ -z "$STUNNEL" ]
then
rsync --daemon --no-detach --port "$RSYNC_PORT" --config "$RSYNC_CONF" $RSYNC_EXTRA_ARGS &
RSYNC_REAL_PID=$!
TRANSFER_REAL_PID="$RSYNC_REAL_PID"
TRANSFER_PID=$RSYNC_PID
TRANSFER_REAL_PID=$RSYNC_REAL_PID
TRANSFER_PID="$RSYNC_PID"
else
# Let's check if the path to the config file contains a space?
if [ "${RSYNC_CONF#* }" = "$RSYNC_CONF" ]; then
@@ -631,8 +651,8 @@ EOF
fi
stunnel "$STUNNEL_CONF" &
STUNNEL_REAL_PID=$!
TRANSFER_REAL_PID="$STUNNEL_REAL_PID"
TRANSFER_PID=$STUNNEL_PID
TRANSFER_REAL_PID=$STUNNEL_REAL_PID
TRANSFER_PID="$STUNNEL_PID"
fi

if [ "${SSLMODE#VERIFY}" != "$SSLMODE" ]

0 comments on commit 18d5be5

Please sign in to comment.