Skip to content

Commit 27c2480

Browse files
committed
MDEV-15636 mariabackup --lock-ddl-per-table hangs if ALTER table is running
concurrently. There is a deadlock between C1 mariabackup's connection that holds MDL locks C2 Online ALTER TABLE that wants to have MDL exclusively and tries to upgrade its mdl lock. C3 another mariabackup's connection that does FLUSH TABLES (or FTWRL) C3 waits waits for C2, which waits for C1, which waits for C3, thus the deadlock. MDL locks cannot be released until FLUSH succeeds, because otherwise it would allow ALTER to sneak in, causing backup to abort and breaking lock-ddl-per-table's promise. The fix here workarounds the deadlock, by killing connections in "Waiting for metadata lock" status (i.e ALTER). This killing continues until FTWRL succeeds. Killing connections is skipped in case --no-locks parameter was passed to backup, because there won't be a FLUSH. For the reference,in Percona's xtrabackup --lock-ddl-per-connection silently implies --no-lock ie FLUSH is always skipped there. A rather large part of fix is introducing DBUG capability to start a query the new connection at the right moment of backup compensating somewhat for mariabackup' lack of send_query or DBUG_SYNC.
1 parent a1d68fa commit 27c2480

File tree

4 files changed

+179
-27
lines changed

4 files changed

+179
-27
lines changed

extra/mariabackup/backup_copy.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,6 +1428,10 @@ void backup_release()
14281428
history_lock_time = time(NULL) - history_lock_time;
14291429
}
14301430

1431+
if (opt_lock_ddl_per_table) {
1432+
mdl_unlock_all();
1433+
}
1434+
14311435
if (opt_safe_slave_backup && sql_thread_started) {
14321436
msg("Starting slave SQL thread\n");
14331437
xb_mysql_query(mysql_connection,

extra/mariabackup/backup_mysql.cc

Lines changed: 79 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -868,6 +868,76 @@ stop_query_killer()
868868
os_event_wait_time(kill_query_thread_stopped, 60000);
869869
}
870870

871+
872+
/*
873+
Killing connections that wait for MDL lock.
874+
If lock-ddl-per-table is used, there can be some DDL statements
875+
876+
FLUSH TABLES would hang infinitely, if DDL statements are waiting for
877+
MDL lock, which mariabackup currently holds. Therefore we start killing
878+
those statements from a dedicated thread, until FLUSH TABLES WITH READ LOCK
879+
succeeds.
880+
*/
881+
882+
static os_event_t mdl_killer_stop_event;
883+
static os_event_t mdl_killer_finished_event;
884+
885+
static
886+
os_thread_ret_t
887+
DECLARE_THREAD(kill_mdl_waiters_thread(void *))
888+
{
889+
MYSQL *mysql;
890+
if ((mysql = xb_mysql_connect()) == NULL) {
891+
msg("Error: kill mdl waiters thread failed to connect\n");
892+
goto stop_thread;
893+
}
894+
895+
for(;;){
896+
if (os_event_wait_time(mdl_killer_stop_event, 1000) == 0)
897+
break;
898+
899+
MYSQL_RES *result = xb_mysql_query(mysql,
900+
"SELECT ID, COMMAND FROM INFORMATION_SCHEMA.PROCESSLIST "
901+
" WHERE State='Waiting for table metadata lock'",
902+
true, true);
903+
while (MYSQL_ROW row = mysql_fetch_row(result))
904+
{
905+
char query[64];
906+
msg_ts("Killing MDL waiting query '%s' on connection '%s'\n",
907+
row[1], row[0]);
908+
snprintf(query, sizeof(query), "KILL QUERY %s", row[0]);
909+
xb_mysql_query(mysql, query, true);
910+
}
911+
}
912+
913+
mysql_close(mysql);
914+
915+
stop_thread:
916+
msg_ts("Kill mdl waiters thread stopped\n");
917+
os_event_set(mdl_killer_finished_event);
918+
os_thread_exit();
919+
return os_thread_ret_t(0);
920+
}
921+
922+
923+
static void start_mdl_waiters_killer()
924+
{
925+
mdl_killer_stop_event = os_event_create(0);
926+
mdl_killer_finished_event = os_event_create(0);
927+
os_thread_create(kill_mdl_waiters_thread, 0, 0);
928+
}
929+
930+
931+
/* Tell MDL killer to stop and finish for its completion*/
932+
static void stop_mdl_waiters_killer()
933+
{
934+
os_event_set(mdl_killer_stop_event);
935+
os_event_wait(mdl_killer_finished_event);
936+
937+
os_event_destroy(mdl_killer_stop_event);
938+
os_event_destroy(mdl_killer_finished_event);
939+
}
940+
871941
/*********************************************************************//**
872942
Function acquires either a backup tables lock, if supported
873943
by the server, or a global read lock (FLUSH TABLES WITH READ LOCK)
@@ -890,6 +960,10 @@ lock_tables(MYSQL *connection)
890960
return(true);
891961
}
892962

963+
if (opt_lock_ddl_per_table) {
964+
start_mdl_waiters_killer();
965+
}
966+
893967
if (!opt_lock_wait_timeout && !opt_kill_long_queries_timeout) {
894968

895969
/* We do first a FLUSH TABLES. If a long update is running, the
@@ -930,6 +1004,10 @@ lock_tables(MYSQL *connection)
9301004

9311005
xb_mysql_query(connection, "FLUSH TABLES WITH READ LOCK", false);
9321006

1007+
if (opt_lock_ddl_per_table) {
1008+
stop_mdl_waiters_killer();
1009+
}
1010+
9331011
if (opt_kill_long_queries_timeout) {
9341012
stop_query_killer();
9351013
}
@@ -1647,25 +1725,6 @@ mdl_lock_init()
16471725
}
16481726
}
16491727

1650-
#ifndef DBUG_OFF
1651-
/* Test that table is really locked, if lock_ddl_per_table is set.
1652-
The test is executed in DBUG_EXECUTE_IF block inside mdl_lock_table().
1653-
*/
1654-
static void check_mdl_lock_works(const char *table_name)
1655-
{
1656-
MYSQL *test_con= xb_mysql_connect();
1657-
char *query;
1658-
xb_a(asprintf(&query,
1659-
"SET STATEMENT max_statement_time=1 FOR ALTER TABLE %s"
1660-
" ADD COLUMN mdl_lock_column int", table_name));
1661-
int err = mysql_query(test_con, query);
1662-
DBUG_ASSERT(err);
1663-
int err_no = mysql_errno(test_con);
1664-
DBUG_ASSERT(err_no == ER_STATEMENT_TIMEOUT);
1665-
mysql_close(test_con);
1666-
free(query);
1667-
}
1668-
#endif
16691728
void
16701729
mdl_lock_table(ulint space_id)
16711730
{
@@ -1681,13 +1740,10 @@ mdl_lock_table(ulint space_id)
16811740
while (MYSQL_ROW row = mysql_fetch_row(mysql_result)) {
16821741
std::string full_table_name = ut_get_name(0,row[0]);
16831742
std::ostringstream lock_query;
1684-
lock_query << "SELECT * FROM " << full_table_name << " LIMIT 0";
1743+
lock_query << "SELECT 1 FROM " << full_table_name << " LIMIT 0";
16851744

16861745
msg_ts("Locking MDL for %s\n", full_table_name.c_str());
16871746
xb_mysql_query(mdl_con, lock_query.str().c_str(), false, false);
1688-
1689-
DBUG_EXECUTE_IF("check_mdl_lock_works",
1690-
check_mdl_lock_works(full_table_name.c_str()););
16911747
}
16921748

16931749
pthread_mutex_unlock(&mdl_lock_con_mutex);

extra/mariabackup/xtrabackup.cc

Lines changed: 95 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,91 @@ datafiles_iter_free(datafiles_iter_t *it)
434434
free(it);
435435
}
436436

437+
#ifndef DBUG_OFF
438+
struct dbug_thread_param_t
439+
{
440+
MYSQL *con;
441+
const char *query;
442+
int expect_err;
443+
int expect_errno;
444+
os_event_t done_event;
445+
};
446+
447+
448+
/* Thread procedure used in dbug_start_query_thread. */
449+
extern "C"
450+
os_thread_ret_t
451+
DECLARE_THREAD(dbug_execute_in_new_connection)(void *arg)
452+
{
453+
mysql_thread_init();
454+
dbug_thread_param_t *par= (dbug_thread_param_t *)arg;
455+
int err = mysql_query(par->con, par->query);
456+
int err_no = mysql_errno(par->con);
457+
DBUG_ASSERT(par->expect_err == err);
458+
if (err && par->expect_errno)
459+
DBUG_ASSERT(err_no == par->expect_errno);
460+
mysql_close(par->con);
461+
mysql_thread_end();
462+
os_event_t done = par->done_event;
463+
delete par;
464+
os_event_set(done);
465+
os_thread_exit();
466+
return os_thread_ret_t(0);
467+
}
468+
469+
/*
470+
Execute query from a new connection, in own thread.
471+
472+
@param query - query to be executed
473+
@param wait_state - if not NULL, wait until query from new connection
474+
reaches this state (value of column State in I_S.PROCESSLIST)
475+
@param expected_err - if 0, query is supposed to finish successfully,
476+
otherwise query should return error.
477+
@param expected_errno - if not 0, and query finished with error,
478+
expected mysql_errno()
479+
*/
480+
static os_event_t dbug_start_query_thread(
481+
const char *query,
482+
const char *wait_state,
483+
int expected_err,
484+
int expected_errno)
485+
486+
{
487+
dbug_thread_param_t *par = new dbug_thread_param_t;
488+
par->query = query;
489+
par->expect_err = expected_err;
490+
par->expect_errno = expected_errno;
491+
par->done_event = os_event_create(0);
492+
par->con = xb_mysql_connect();
493+
os_thread_create(dbug_execute_in_new_connection, par, 0);
494+
495+
if (!wait_state)
496+
return par->done_event;
497+
498+
char q[256];
499+
snprintf(q, sizeof(q),
500+
"SELECT 1 FROM INFORMATION_SCHEMA.PROCESSLIST where ID=%lu"
501+
" AND Command='Query' AND State='%s'",
502+
mysql_thread_id(par->con), wait_state);
503+
for (;;) {
504+
MYSQL_RES *result = xb_mysql_query(mysql_connection,q, true, true);
505+
while (MYSQL_ROW row = mysql_fetch_row(result)) {
506+
goto end;
507+
}
508+
msg_ts("Waiting for query '%s' on connection %lu to "
509+
" reach state '%s'", query, mysql_thread_id(par->con),
510+
wait_state);
511+
my_sleep(1000);
512+
}
513+
end:
514+
msg_ts("query '%s' on connection %lu reached state '%s'", query,
515+
mysql_thread_id(par->con), wait_state);
516+
return par->done_event;
517+
}
518+
519+
os_event_t dbug_alter_thread_done;
520+
#endif
521+
437522
void mdl_lock_all()
438523
{
439524
mdl_lock_init();
@@ -449,6 +534,11 @@ void mdl_lock_all()
449534
mdl_lock_table(node->space->id);
450535
}
451536
datafiles_iter_free(it);
537+
538+
DBUG_EXECUTE_IF("check_mdl_lock_works",
539+
dbug_alter_thread_done =
540+
dbug_start_query_thread("ALTER TABLE test.t ADD COLUMN mdl_lock_column int",
541+
"Waiting for table metadata lock",1, ER_QUERY_INTERRUPTED););
452542
}
453543

454544
/** Check if the space id belongs to the table which name should
@@ -4078,6 +4168,11 @@ xtrabackup_backup_func()
40784168

40794169
backup_release();
40804170

4171+
DBUG_EXECUTE_IF("check_mdl_lock_works",
4172+
os_event_wait(dbug_alter_thread_done);
4173+
os_event_destroy(dbug_alter_thread_done);
4174+
);
4175+
40814176
if (ok) {
40824177
backup_finish();
40834178
}
@@ -4087,10 +4182,6 @@ xtrabackup_backup_func()
40874182
goto fail;
40884183
}
40894184

4090-
if (opt_lock_ddl_per_table) {
4091-
mdl_unlock_all();
4092-
}
4093-
40944185
xtrabackup_destroy_datasinks();
40954186

40964187
msg("mariabackup: Redo log (from LSN " LSN_PF " to " LSN_PF

extra/mariabackup/xtrabackup.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ extern my_bool opt_noversioncheck;
110110
extern my_bool opt_no_backup_locks;
111111
extern my_bool opt_decompress;
112112
extern my_bool opt_remove_original;
113+
extern my_bool opt_lock_ddl_per_table;
113114

114115
extern char *opt_incremental_history_name;
115116
extern char *opt_incremental_history_uuid;

0 commit comments

Comments
 (0)