Permalink
Browse files

MDEV-10004: Galera's pc.recovery process fails in 10.1 with systemd

Galera recovery process works in two phases. In the first
phase, mysqld is started as non-daemon with --wsrep-recover
to recover and fetch the last logged global transaction ID.
This ID is then used in second phase as the start position
(--wsrep-start-position=XX) to start mysqld as daemon.

As this process was implemented in mysqld_safe script, the
recovery did not work when server was started using systemd.

Fixed by introducing a shell script (wsrep_recovery.sh) that
mimics the first phase of the recovery process.
  • Loading branch information...
Nirbhay Choubey
Nirbhay Choubey committed Jun 27, 2016
1 parent 0645699 commit a6816995ee6cc3dc0508e35fc2da4921b53fb931
Showing with 152 additions and 4 deletions.
  1. +2 −1 cmake/systemd.cmake
  2. +118 −0 scripts/galera_recovery.sh
  3. +12 −1 support-files/mariadb.service.in
  4. +20 −2 support-files/mariadb@.service.in
@@ -55,9 +55,10 @@ MACRO(CHECK_SYSTEMD)
IF(HAVE_SYSTEMD AND HAVE_SYSTEMD_SD_DAEMON_H AND HAVE_SYSTEMD_SD_LISTEN_FDS
AND HAVE_SYSTEMD_SD_NOTIFY AND HAVE_SYSTEMD_SD_NOTIFYF)
ADD_DEFINITIONS(-DHAVE_SYSTEMD)
SET(SYSTEMD_SCRIPTS mariadb-service-convert galera_new_cluster)
SET(SYSTEMD_SCRIPTS mariadb-service-convert galera_new_cluster galera_recovery)
SET(SYSTEMD_DEB_FILES "usr/bin/mariadb-service-convert
usr/bin/galera_new_cluster
usr/bin/galera_recovery
${INSTALL_SYSTEMD_UNITDIR}/mariadb.service
${INSTALL_SYSTEMD_UNITDIR}/mariadb@.service
${INSTALL_SYSTEMD_UNITDIR}/mariadb@bootstrap.service.d/use_galera_new_cluster.conf")
@@ -0,0 +1,118 @@
#!/bin/sh

# Copyright (c) 2016 MariaDB Corporation
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */


# This script is intended to be executed by systemd. It starts mysqld with
# --wsrep-recover to recover from a non-graceful shutdown, determines the
# last stored global transaction ID and echoes it in --wsrep-start-position=XX
# format. The output is then captured and used by systemd to start mysqld.
# If the server was configured to start without wsrep, nothing is echoed.

cmdline_args=$@
user="@MYSQLD_USER@"
print_defaults="@bindir@/my_print_defaults"
log_file=$(mktemp /tmp/wsrep_recovery.XXXXXX)
euid=$(id -u)
recovered_pos=""
skipped=""
start_pos=""
start_pos_opt=""
ret=0
wsrep_on=0

log ()
{
local msg="$1"
# Print all messages to stderr as we reserve stdout for printing
# --wsrep-start-position=XXXX.
echo "$msg" >&2
}

finish()
{
rm -f "$log_file"
}

trap finish EXIT

parse_arguments() {
for arg do
val=`echo "$arg" | sed -e "s;--[^=]*=;;"`
case "$arg" in
--wsrep[-_]on) wsrep_on=1 ;;
--skip[-_]wsrep[-_]on) wsrep_on=0 ;;
--wsrep[-_]on=*)
if echo $val | grep -iq '\(ON\|1\)'; then
wsrep_on=1
else
wsrep_on=0
fi
;;
esac
done
}

wsrep_recover_position() {
# Redirect server's error log to the log file.
eval /usr/sbin/mysqld $cmdline_args --user=$user --wsrep_recover 2> "$log_file"
ret=$?
if [ $ret -ne 0 ]; then
# Something went wrong, let us also print the error log so that it
# shows up in systemctl status output as a hint to the user.
log "WSREP: Failed to start mysqld for wsrep recovery: '`cat $log_file`'"
exit 1
fi

# Parse server's error log for recovered position. The server prints
# "..skipping position recovery.." if started without wsrep.

recovered_pos="$(grep 'WSREP: Recovered position:' $log_file)"

if [ -z "$recovered_pos" ]; then
skipped="$(grep WSREP $log_file | grep 'skipping position recovery')"
if [ -z "$skipped" ]; then
log "WSREP: Failed to recover position: '`cat $log_file`'"
exit 1
else
log "WSREP: Position recovery skipped."
fi
else
start_pos="$(echo $recovered_pos | sed 's/.*WSREP\:\ Recovered\ position://' \
| sed 's/^[ \t]*//')"
log "WSREP: Recovered position $start_pos"
start_pos_opt="--wsrep_start_position=$start_pos"
fi
}

# Safety checks
if [ -n "$log_file" -a -f "$log_file" ]; then
[ "$euid" = "0" ] && chown $user $log_file
chmod 600 $log_file
else
log "WSREP: mktemp failed"
fi

parse_arguments `$print_defaults $cmdline_args --loose-verbose \
mariadb mariadb_safe mysqld mysqld_safe safe_mysqld galera`

# Perform wsrep position recovery if wsrep_on=1, skip otherwise.
if [ "$wsrep_on" -eq 1 ]; then
wsrep_recover_position
fi

echo "$start_pos_opt"

@@ -48,6 +48,14 @@ CapabilityBoundingSet=CAP_IPC_LOCK
# Execute pre and post scripts as root, otherwise it does it as User=
PermissionsStartOnly=true

# Perform automatic wsrep recovery. When server is started without wsrep,
# galera_recovery simply returns an empty string. In any case, however,
# the script is not expected to return with a non-zero status.
# It is always safe to unset _WSREP_START_POSITION environment variable.
ExecStartPre=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION"
ExecStartPre=/bin/sh -c "VAR=`/usr/bin/galera_recovery`; [ $? -eq 0 ] && \
systemctl set-environment _WSREP_START_POSITION=$VAR || exit 1"

# Needed to create system tables etc.
# ExecStartPre=/usr/bin/mysql_install_db -u mysql

@@ -57,9 +65,12 @@ PermissionsStartOnly=true
# This isn't a replacement for my.cnf.
# _WSREP_NEW_CLUSTER is for the exclusive use of the script galera_new_cluster
@SYSTEMD_EXECSTARTPRE@
ExecStart=/usr/sbin/mysqld $MYSQLD_OPTS $_WSREP_NEW_CLUSTER
ExecStart=/usr/sbin/mysqld $MYSQLD_OPTS $_WSREP_NEW_CLUSTER $_WSREP_START_POSITION
@SYSTEMD_EXECSTARTPOST@

# Unset _WSREP_START_POSITION environment variable.
ExecStartPost=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION"

KillMode=process
KillSignal=SIGTERM

@@ -55,6 +55,21 @@ CapabilityBoundingSet=CAP_IPC_LOCK
# Execute pre and post scripts as root, otherwise it does it as User=
PermissionsStartOnly=true

# Perform automatic wsrep recovery. When server is started without wsrep,
# galera_recovery simply returns an empty string. In any case, however,
# the script is not expected to return with a non-zero status.
# It is always safe to unset _WSREP_START_POSITION%I environment variable.
ExecStartPre=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION%I"
ExecStartPre=/bin/sh -c "VAR=`/usr/bin/galera_recovery \
--defaults-file=@INSTALL_SYSCONF2DIR@/my%I.cnf`; [ $? -eq 0 ] && \
systemctl set-environment _WSREP_START_POSITION%I=$VAR || exit 1"
# Alternate: (remove ConditionPathExists above)
# use [mysqld.INSTANCENAME] as sections in my.cnf
#
#ExecStartPre=/bin/sh -c "VAR=`/usr/bin/galera_recovery \
# --defaults-group-suffix=%I`; [ $? -eq 0 ] && \
# systemctl set-environment _WSREP_START_POSITION%I=$VAR || exit 1"

# Needed to create system tables etc.
# ExecStartPre=/usr/bin/mysql_install_db -u mysql

@@ -67,12 +82,15 @@ PermissionsStartOnly=true
# Note: Place $MYSQLD_OPTS at the very end for its options to take precedence.

ExecStart=/usr/sbin/mysqld --defaults-file=@INSTALL_SYSCONF2DIR@/my%I.cnf \
$_WSREP_NEW_CLUSTER $MYSQLD_OPTS
$_WSREP_NEW_CLUSTER $_WSREP_START_POSITION%I $MYSQLD_OPTS
# Alternate: (remove ConditionPathExists above)
# use [mysqld.INSTANCENAME] as sections in my.cnf
#
# ExecStart=/usr/sbin/mysqld --defaults-group-suffix=%I \
# $_WSREP_NEW_CLUSTER $MYSQLD_OPTS
# $_WSREP_NEW_CLUSTER $_WSREP_START_POSITION%I $MYSQLD_OPTS

# Unset _WSREP_START_POSITION environment variable.
ExecStartPost=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION%I"

KillMode=process
KillSignal=SIGTERM

0 comments on commit a681699

Please sign in to comment.