Skip to content

Commit

Permalink
MDEV-26391 BF abortable mariabackup execution
Browse files Browse the repository at this point in the history
This commit changes backup execution (namely the block ddl phase),
so that node is not paused from cluster. Instead, the following
backup execution is declared as vulnerable for possible cluster
level conflicts, especially with DDL statement applying.
With this, the mariabackup execution may be aborted, if DDL
statements happen during backup execution. This abortable
backup execution is optional feature and may be
enabled/disabled by wsrep_mode: BF_ABORT_MARIABACKUP.
Note that old style node desync and pause, despite of
WSREP_MODE_BF_MARIABACKUP is needed if node is operating as
SST donor.

Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
  • Loading branch information
sjaakola authored and Jan Lindström committed Jan 17, 2023
1 parent 3386b30 commit 95de524
Show file tree
Hide file tree
Showing 10 changed files with 53 additions and 15 deletions.
4 changes: 2 additions & 2 deletions mysql-test/suite/galera/r/galera_var_wsrep_mode.result
Expand Up @@ -20,8 +20,8 @@ SET GLOBAL wsrep_mode='A';
ERROR 42000: Variable 'wsrep_mode' can't be set to the value of 'A'
SET GLOBAL wsrep_mode=NULL;
ERROR 42000: Variable 'wsrep_mode' can't be set to the value of 'NULL'
SET GLOBAL wsrep_mode=64;
ERROR 42000: Variable 'wsrep_mode' can't be set to the value of '64'
SET GLOBAL wsrep_mode=128;
ERROR 42000: Variable 'wsrep_mode' can't be set to the value of '128'
SET GLOBAL wsrep_mode=REQUIRED_PRIMARY_KEY,REPLICATE_MYISAM;
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '' at line 1
SET GLOBAL wsrep_mode=1;
Expand Down
2 changes: 1 addition & 1 deletion mysql-test/suite/galera/t/galera_var_wsrep_mode.test
Expand Up @@ -22,7 +22,7 @@ SET GLOBAL wsrep_mode='A';
--error ER_WRONG_VALUE_FOR_VAR
SET GLOBAL wsrep_mode=NULL;
--error ER_WRONG_VALUE_FOR_VAR
SET GLOBAL wsrep_mode=64;
SET GLOBAL wsrep_mode=128;
--error ER_PARSE_ERROR
SET GLOBAL wsrep_mode=REQUIRED_PRIMARY_KEY,REPLICATE_MYISAM;
#
Expand Down
2 changes: 1 addition & 1 deletion mysql-test/suite/sys_vars/r/sysvars_wsrep.result
Expand Up @@ -342,7 +342,7 @@ VARIABLE_COMMENT Set of WSREP features that are enabled.
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST STRICT_REPLICATION,BINLOG_ROW_FORMAT_ONLY,REQUIRED_PRIMARY_KEY,REPLICATE_MYISAM,REPLICATE_ARIA,DISALLOW_LOCAL_GTID
ENUM_VALUE_LIST STRICT_REPLICATION,BINLOG_ROW_FORMAT_ONLY,REQUIRED_PRIMARY_KEY,REPLICATE_MYISAM,REPLICATE_ARIA,DISALLOW_LOCAL_GTID,BF_ABORT_MARIABACKUP
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
GLOBAL_VALUE_PATH NULL
Expand Down
35 changes: 30 additions & 5 deletions sql/backup.cc
Expand Up @@ -35,8 +35,10 @@
#include "sql_handler.h" // mysql_ha_cleanup_no_free
#include <my_sys.h>
#include <strfunc.h> // strconvert()
#include "debug_sync.h"
#ifdef WITH_WSREP
#include "wsrep_server_state.h"
#include "wsrep_mysqld.h"
#endif /* WITH_WSREP */

static const char *stage_names[]=
Expand Down Expand Up @@ -291,16 +293,26 @@ static bool backup_block_ddl(THD *thd)

#ifdef WITH_WSREP
/*
We desync the node for BACKUP STAGE because applier threads
if user is specifically choosing to allow BF aborting for BACKUP STAGE BLOCK_DDL lock
holder, then do not desync and pause the node from cluster replication.
e.g. mariabackup uses BACKUP STATE BLOCK_DDL; and will be abortable by this.
But, If node is processing as SST donor or WSREP_MODE_BF_MARIABACKUP mode is not set,
we desync the node for BACKUP STAGE because applier threads
bypass backup MDL locks (see MDL_lock::can_grant_lock)
*/
if (WSREP_NNULL(thd))
{
Wsrep_server_state &server_state= Wsrep_server_state::instance();
if (server_state.desync_and_pause().is_undefined()) {
DBUG_RETURN(1);
if (!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) ||
server_state.state() == Wsrep_server_state::s_donor)
{
if (server_state.desync_and_pause().is_undefined()) {
DBUG_RETURN(1);
}
thd->wsrep_desynced_backup_stage= true;
}
thd->wsrep_desynced_backup_stage= true;
else
WSREP_INFO("Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used.");
}
#endif /* WITH_WSREP */

Expand Down Expand Up @@ -340,6 +352,18 @@ static bool backup_block_ddl(THD *thd)
/* There can't be anything more that needs to be logged to ddl log */
THD_STAGE_INFO(thd, org_stage);
stop_ddl_logging();
#ifdef WITH_WSREP
// Allow tests to block the applier thread using the DBUG facilities
DBUG_EXECUTE_IF("sync.wsrep_after_mdl_block_ddl",
{
const char act[]=
"now "
"signal signal.wsrep_apply_toi";
DBUG_ASSERT(!debug_sync_set_action(thd,
STRING_WITH_LEN(act)));
};);
#endif /* WITH_WSREP */

DBUG_RETURN(0);
err:
THD_STAGE_INFO(thd, org_stage);
Expand Down Expand Up @@ -399,7 +423,8 @@ bool backup_end(THD *thd)
thd->current_backup_stage= BACKUP_FINISHED;
thd->mdl_context.release_lock(old_ticket);
#ifdef WITH_WSREP
if (WSREP_NNULL(thd) && thd->wsrep_desynced_backup_stage)
if (WSREP_NNULL(thd) && thd->wsrep_desynced_backup_stage &&
!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP))

This comment has been minimized.

Copy link
@airikh

airikh Jul 12, 2023

I think you also need to consider whether the node was being used as an SST donor (with wsrep_sst_method = mariabackup), since I find that a node never returns from Donor/Desynced to Synced after a serving as an SST donor.

    Wsrep_server_state &server_state= Wsrep_server_state::instance();
    if (WSREP_NNULL(thd) && thd->wsrep_desynced_backup_stage &&
	    (!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) || 
         server_state.state() == Wsrep_server_state::s_donor))
    {
      server_state.resume_and_resync();
      thd->wsrep_desynced_backup_stage= false;
    }

This comment has been minimized.

Copy link
@janlindstrom

janlindstrom Jul 13, 2023

Contributor

Note that wsrep_desynced_backup_stage = true only when !wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) || server_state.state() == Wsrep_server_state::s_donor) see line 306.

This comment has been minimized.

Copy link
@airikh

airikh Jul 18, 2023

@janlindstrom,

Isn't server_state.state() == Wsrep_server_state::s_donor true when the node is serving as an SST donor?

This comment has been minimized.

Copy link
@janlindstrom

janlindstrom Jul 19, 2023

Contributor

Yes, for some reason git lost my first comment about there is a bug in that condition.

{
Wsrep_server_state &server_state= Wsrep_server_state::instance();
server_state.resume_and_resync();
Expand Down
5 changes: 2 additions & 3 deletions sql/mdl.cc
Expand Up @@ -1747,10 +1747,9 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg,
#ifdef WITH_WSREP
/*
Approve lock request in BACKUP namespace for BF threads.
We should get rid of this code and forbid FTWRL/BACKUP statements
when wsrep is active.
*/
if ((wsrep_thd_is_toi(requestor_ctx->get_thd()) ||
if (!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) &&
(wsrep_thd_is_toi(requestor_ctx->get_thd()) ||
wsrep_thd_is_applying(requestor_ctx->get_thd())) &&
key.mdl_namespace() == MDL_key::BACKUP)
{
Expand Down
3 changes: 1 addition & 2 deletions sql/service_wsrep.cc
Expand Up @@ -241,7 +241,7 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
victim_thd->awake_no_mutex(KILL_QUERY);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
} else {
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake");
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake, signal %d", signal);
}
return ret;
}
Expand Down Expand Up @@ -277,7 +277,6 @@ extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd)
return (cs.state() == wsrep::client_state::s_exec ||
cs.state() == wsrep::client_state::s_result);
case wsrep::transaction::s_aborting:
case wsrep::transaction::s_aborted:
return true;
default:
return false;
Expand Down
1 change: 1 addition & 0 deletions sql/sys_vars.cc
Expand Up @@ -6013,6 +6013,7 @@ static const char *wsrep_mode_names[]=
"REPLICATE_MYISAM",
"REPLICATE_ARIA",
"DISALLOW_LOCAL_GTID",
"BF_ABORT_MARIABACKUP",
NullS
};
static Sys_var_set Sys_wsrep_mode(
Expand Down
5 changes: 5 additions & 0 deletions sql/wsrep_mysqld.cc
Expand Up @@ -3053,6 +3053,11 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
WSREP_DEBUG("BF thread waiting for FLUSH");
ticket->wsrep_report(wsrep_debug);
mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
if (granted_thd->current_backup_stage != BACKUP_FINISHED &&
wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP))
{
wsrep_abort_thd(request_thd, granted_thd, 1);
}
}
else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE)
{
Expand Down
3 changes: 2 additions & 1 deletion sql/wsrep_mysqld.h
Expand Up @@ -130,7 +130,8 @@ enum enum_wsrep_mode {
WSREP_MODE_REQUIRED_PRIMARY_KEY= (1ULL << 2),
WSREP_MODE_REPLICATE_MYISAM= (1ULL << 3),
WSREP_MODE_REPLICATE_ARIA= (1ULL << 4),
WSREP_MODE_DISALLOW_LOCAL_GTID= (1ULL << 5)
WSREP_MODE_DISALLOW_LOCAL_GTID= (1ULL << 5),
WSREP_MODE_BF_MARIABACKUP= (1ULL << 6)
};

// Streaming Replication
Expand Down
8 changes: 8 additions & 0 deletions sql/wsrep_thd.cc
Expand Up @@ -375,6 +375,14 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
have acquired MDL locks (due to DDL execution), and this has caused BF conflict.
such case does not require aborting in wsrep or replication provider state.
*/
if (victim_thd->current_backup_stage != BACKUP_FINISHED &&
wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP))
{
WSREP_DEBUG("killing connection for non wsrep session");
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
victim_thd->awake_no_mutex(KILL_CONNECTION);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
}
return false;
}

Expand Down

0 comments on commit 95de524

Please sign in to comment.