Skip to content

Commit

Permalink
MDEV-33211 : Galera SST on maria-backup causes donor node to be unres…
Browse files Browse the repository at this point in the history
…ponsive

If mariabackup with backup locks is used on SST we do not
pause and desync galera provider at all. If WSREP_MODE_BF_MARIABACKUP
case provider is paused and desync at BLOCK_COMMIT phase. In
other cases provider is paused and desync at BLOCK_DDL phase.
  • Loading branch information
janlindstrom authored and montywi committed Feb 27, 2024
1 parent 5d4adea commit 41b435f
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 33 deletions.
95 changes: 93 additions & 2 deletions extra/mariabackup/backup_mysql.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1417,12 +1417,103 @@ write_slave_info(ds_ctxt *datasink, MYSQL *connection)


/*********************************************************************//**
Old function, not needed anymore with BACKUP LOCKS
Retrieves MySQL Galera and saves it in a file. It also prints it to stdout.
We should create xtrabackup_galelera_info file even when backup locks
are used because donor's wsrep_gtid_domain_id is needed later in joiner.
Note that at this stage wsrep_local_state_uuid and wsrep_last_committed
are inconsistent but they are not used in joiner. Joiner will rewrite this file
at mariabackup --prepare phase and thus there is extra file donor_galera_info.
Information is needed to maitain wsrep_gtid_domain_id and gtid_binlog_pos
same across the cluster. If joiner node have different wsrep_gtid_domain_id
we should still receive effective domain id from the donor node,
and use it.
*/
bool
write_galera_info(ds_ctxt *datasink, MYSQL *connection)
{
return true; // Success
char *state_uuid = NULL, *state_uuid55 = NULL;
char *last_committed = NULL, *last_committed55 = NULL;
char *domain_id = NULL, *domain_id55 = NULL;
bool result=true;
uint n_values=0;
char *wsrep_on = NULL, *wsrep_on55 = NULL;

mysql_variable vars[] = {
{"Wsrep_on", &wsrep_on},
{"wsrep_on", &wsrep_on55},
{NULL, NULL}
};

mysql_variable status[] = {
{"Wsrep_local_state_uuid", &state_uuid},
{"wsrep_local_state_uuid", &state_uuid55},
{"Wsrep_last_committed", &last_committed},
{"wsrep_last_committed", &last_committed55},
{NULL, NULL}
};

mysql_variable value[] = {
{"Wsrep_gtid_domain_id", &domain_id},
{"wsrep_gtid_domain_id", &domain_id55},
{NULL, NULL}
};

n_values= read_mysql_variables(connection, "SHOW VARIABLES", vars, true);

if (n_values == 0 || (wsrep_on == NULL && wsrep_on55 == NULL))
{
msg("Server is not Galera node thus --galera-info does not "
"have any effect.");
result = true;
goto cleanup;
}

read_mysql_variables(connection, "SHOW STATUS", status, true);

if ((state_uuid == NULL && state_uuid55 == NULL)
|| (last_committed == NULL && last_committed55 == NULL))
{
msg("Warning: failed to get master wsrep state from SHOW STATUS.");
result = true;
goto cleanup;
}

n_values= read_mysql_variables(connection, "SHOW VARIABLES LIKE 'wsrep%'", value, true);

if (n_values == 0 || (domain_id == NULL && domain_id55 == NULL))
{
msg("Warning: failed to get master wsrep state from SHOW VARIABLES.");
result = true;
goto cleanup;
}

result= datasink->backup_file_printf(XTRABACKUP_GALERA_INFO,
"%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);

if (result)
{
result= datasink->backup_file_printf(XTRABACKUP_DONOR_GALERA_INFO,
"%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);
}

if (result)
write_current_binlog_file(datasink, connection);

if (result)
msg("Writing Galera info succeeded with %s:%s %s",
state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);

cleanup:
free_mysql_variables(status);

return(result);
}


Expand Down
2 changes: 1 addition & 1 deletion mysql-test/include/wait_until_connected_again.inc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ let $counter= 5000;
let $mysql_errno= 9999;
while ($mysql_errno)
{
--error 0,ER_ACCESS_DENIED_ERROR,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,ER_LOCK_WAIT_TIMEOUT,2002,2006,2013,HA_ERR_NO_ENCRYPTION
--error 0,ER_ACCESS_DENIED_ERROR,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,ER_LOCK_WAIT_TIMEOUT,2002,2006,2013,HA_ERR_NO_ENCRYPTION,2026
select 1;

dec $counter;
Expand Down
24 changes: 13 additions & 11 deletions mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ connection node_1;
connection node_2;
Starting server ...
connection node_1;
# Both should return FOUND 2 as we have bootstrap and SST
FOUND 2 /Desyncing and pausing the provider/ in mysqld.1.err
FOUND 2 /Resuming and resyncing the provider/ in mysqld.1.err
# Both should return NOT FOUND as we have mariabackup with backup locks
NOT FOUND /Desyncing and pausing the provider/ in mysqld.1.err
NOT FOUND /Resuming and resyncing the provider/ in mysqld.1.err
connection node_1;
SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP";
# Restart node_2, force SST.
Expand All @@ -25,9 +25,9 @@ connection node_2;
Starting server ...
connection node_2;
connection node_1;
# Both should return FOUND 3 as we have 1 new SST
FOUND 3 /Desyncing and pausing the provider/ in mysqld.1.err
FOUND 3 /Resuming and resyncing the provider/ in mysqld.1.err
# Both should return NOT FOUND as we have mariabackup with backup locks
NOT FOUND /Desyncing and pausing the provider/ in mysqld.1.err
NOT FOUND /Resuming and resyncing the provider/ in mysqld.1.err
SET GLOBAL wsrep_mode = "";
DROP TABLE t;
# Case 2: MariaBackup backup from node_2
Expand All @@ -46,11 +46,13 @@ SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP";
SELECT @@wsrep_mode;
@@wsrep_mode
BF_ABORT_MARIABACKUP
# Both should return FOUND 1 as node should not desync
FOUND 1 /Desyncing and pausing the provider/ in mysqld.2.err
FOUND 1 /Resuming and resyncing the provider/ in mysqld.2.err
# Should return FOUND 1 because only last backup does not desync
FOUND 1 /Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used./ in mysqld.2.err
# Both should return FOUND 2 because both backups do desync but on different points
FOUND 2 /Desyncing and pausing the provider/ in mysqld.2.err
FOUND 2 /Resuming and resyncing the provider/ in mysqld.2.err
# Should return FOUND 1 as server did not desync at BLOCK_DDL
FOUND 1 /Server not desynched from group at BLOCK_DDL because WSREP_MODE_BF_MARIABACKUP is used./ in mysqld.2.err
# Should return FOUND 1 as server did desync and pause at BLOCK_COMMIT
FOUND 1 /Server desynched from group during BACKUP STAGE BLOCK_COMMIT./ in mysqld.2.err
SET GLOBAL wsrep_mode = "";
connection node_1;
DROP TABLE t;
Expand Down
15 changes: 9 additions & 6 deletions mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
CREATE TABLE t(i INT NOT NULL PRIMARY KEY) ENGINE INNODB;
INSERT INTO t VALUES(1);
#
# In default settings donor should desync
# In default settings donor should not desync
#
--echo # Restart node_2, force SST.
--connection node_2
Expand All @@ -37,7 +37,7 @@ let $restart_noprint=2;

--connection node_1
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err;
--echo # Both should return FOUND 2 as we have bootstrap and SST
--echo # Both should return NOT FOUND as we have mariabackup with backup locks
let SEARCH_PATTERN = Desyncing and pausing the provider;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN = Resuming and resyncing the provider;
Expand Down Expand Up @@ -76,7 +76,7 @@ let $restart_noprint=2;

--connection node_1
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err;
--echo # Both should return FOUND 3 as we have 1 new SST
--echo # Both should return NOT FOUND as we have mariabackup with backup locks
let SEARCH_PATTERN = Desyncing and pausing the provider;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN = Resuming and resyncing the provider;
Expand Down Expand Up @@ -117,13 +117,16 @@ let $targetdir=$MYSQLTEST_VARDIR/tmp/backup2;
--enable_result_log

let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.2.err;
--echo # Both should return FOUND 1 as node should not desync
--echo # Both should return FOUND 2 because both backups do desync but on different points
let SEARCH_PATTERN = Desyncing and pausing the provider;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN = Resuming and resyncing the provider;
--source include/search_pattern_in_file.inc
--echo # Should return FOUND 1 because only last backup does not desync
let SEARCH_PATTERN = Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used.;
--echo # Should return FOUND 1 as server did not desync at BLOCK_DDL
let SEARCH_PATTERN = Server not desynched from group at BLOCK_DDL because WSREP_MODE_BF_MARIABACKUP is used.;
--source include/search_pattern_in_file.inc
--echo # Should return FOUND 1 as server did desync and pause at BLOCK_COMMIT
let SEARCH_PATTERN = Server desynched from group during BACKUP STAGE BLOCK_COMMIT.;
--source include/search_pattern_in_file.inc

SET GLOBAL wsrep_mode = "";
Expand Down
60 changes: 47 additions & 13 deletions sql/backup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#ifdef WITH_WSREP
#include "wsrep_server_state.h"
#include "wsrep_mysqld.h"
#include "wsrep_sst.h"
#endif /* WITH_WSREP */

static const char *stage_names[]=
Expand Down Expand Up @@ -293,29 +294,40 @@ static bool backup_block_ddl(THD *thd)

#ifdef WITH_WSREP
DBUG_ASSERT(thd->wsrep_desynced_backup_stage == false);
/*
if user is specifically choosing to allow BF aborting for BACKUP STAGE BLOCK_DDL lock
holder, then do not desync and pause the node from cluster replication.
e.g. mariabackup uses BACKUP STATE BLOCK_DDL; and will be abortable by this.
But, If node is processing as SST donor or WSREP_MODE_BF_MARIABACKUP mode is not set,
we desync the node for BACKUP STAGE because applier threads
bypass backup MDL locks (see MDL_lock::can_grant_lock)
*/
if (WSREP_NNULL(thd))
{
Wsrep_server_state &server_state= Wsrep_server_state::instance();

if (!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) ||
server_state.state() == Wsrep_server_state::s_donor)
/*
If user is specifically choosing to allow BF aborting for
BACKUP STAGE BLOCK_DDL lock holder, then do not desync and
pause the node from cluster replication. e.g. mariabackup
uses BACKUP STATE BLOCK_DDL; and will be abortable by this.
*/
bool mariabackup= (server_state.state() == Wsrep_server_state::s_donor
&& !strcmp(wsrep_sst_method, "mariabackup"));
bool allow_bf= wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP);
bool pause_and_desync= true;

if ((allow_bf) || (mariabackup))
{
pause_and_desync= false;
}

if (pause_and_desync)
{
if (server_state.desync_and_pause().is_undefined()) {
if (server_state.desync_and_pause().is_undefined())
DBUG_RETURN(1);
}

WSREP_INFO("Server desynched from group during BACKUP STAGE BLOCK_DDL.");
DEBUG_SYNC(thd, "wsrep_backup_stage_after_desync_and_pause");
thd->wsrep_desynced_backup_stage= true;
}
else
WSREP_INFO("Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used.");
{
WSREP_INFO("Server not desynched from group at BLOCK_DDL because %s is used.",
allow_bf ? "WSREP_MODE_BF_MARIABACKUP" : wsrep_sst_method);
}
}
#endif /* WITH_WSREP */

Expand Down Expand Up @@ -399,6 +411,28 @@ static bool backup_block_commit(THD *thd)
}
thd->clear_error();

#ifdef WITH_WSREP
if (WSREP_NNULL(thd) && !thd->wsrep_desynced_backup_stage)
{
Wsrep_server_state &server_state= Wsrep_server_state::instance();
bool mariabackup= (server_state.state() == Wsrep_server_state::s_donor
&& !strcmp(wsrep_sst_method, "mariabackup"));

/* If this node is donor and mariabackup is not used
we desync and pause provider here if it is not yet done.
*/
if (!mariabackup)
{
if (server_state.desync_and_pause().is_undefined())
DBUG_RETURN(1);

WSREP_INFO("Server desynched from group during BACKUP STAGE BLOCK_COMMIT.");
thd->wsrep_desynced_backup_stage= true;
DEBUG_SYNC(thd, "wsrep_backup_stage_commit_after_desync_and_pause");
}
}
#endif /* WITH_WSREP */

DBUG_RETURN(0);
}

Expand Down

0 comments on commit 41b435f

Please sign in to comment.