Skip to content

Commit

Permalink
MENT-328 Retry BACKUP STAGE BLOCK DDL in case of deadlocks
Browse files Browse the repository at this point in the history
MENT-328 wrongly assumed that the backup failed because of warnings from
mariabackup about not found files. This is normal (and the error message
should be deleted).

randgen failed because mariabackup didn't retry BACKUP STAGE BLOCK DDL
if it failed with a deadlock.

To simplify things, I implemented the retry loop in the server as
this particular deadlock should be quickly resolved.
  • Loading branch information
montywi committed Feb 8, 2022
1 parent 0ec27d7 commit a1c2380
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 12 deletions.
23 changes: 22 additions & 1 deletion mysql-test/main/backup_locks.result
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,28 @@ MDL_INTENTION_EXCLUSIVE Schema metadata lock test
select * from t1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
backup unlock;
connection con1;
connection default;
#
# Check that BACKUP LOCK blocks some operations
#
create sequence seq1;
create sequence seq2;
backup lock seq1;
connection con1;
CREATE OR REPLACE SEQUENCE seq1 START -28;
ERROR HY000: Sequence 'test.seq1' values are conflicting
SET STATEMENT max_statement_time=10 FOR CREATE OR REPLACE SEQUENCE seq1 START 50;
ERROR 70100: Query execution was interrupted (max_statement_time exceeded)
SET STATEMENT max_statement_time=10 FOR ALTER SEQUENCE IF EXISTS seq1 NOMAXVALUE;
ERROR 70100: Query execution was interrupted (max_statement_time exceeded)
SET STATEMENT max_statement_time=10 FOR ALTER SEQUENCE IF EXISTS seq1 MAXVALUE 1000;
ERROR 70100: Query execution was interrupted (max_statement_time exceeded)
SET STATEMENT max_statement_time=10 for rename table seq2 to seq3, seq3 to seq1;
ERROR 70100: Query execution was interrupted (max_statement_time exceeded)
connection default;
backup unlock;
drop table seq1,seq2;
#
# BACKUP LOCK and BACKUP UNLOCK are not allowed in procedures.
#
Expand Down Expand Up @@ -141,7 +163,6 @@ ERROR HY000: Can't execute the given command because you have active locked tabl
SET STATEMENT max_statement_time=180 FOR BACKUP LOCK test.u;
# restart
#
connection con1;
connection default;
disconnect con1;
show tables;
Expand Down
31 changes: 29 additions & 2 deletions mysql-test/main/backup_locks.test
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,39 @@ SELECT LOCK_MODE, LOCK_TYPE, TABLE_SCHEMA, TABLE_NAME FROM information_schema.me
--error ER_LOCK_DEADLOCK
select * from t1;
backup unlock;
connection con1;
--reap
connection default;

--echo #
--echo # Check that BACKUP LOCK blocks some operations
--echo #

# These test has to be done with timeouts as we want to ensure that the tables
# doesn't change

create sequence seq1;
create sequence seq2;
backup lock seq1;
connection con1;
--error ER_SEQUENCE_INVALID_DATA
CREATE OR REPLACE SEQUENCE seq1 START -28;
--error ER_STATEMENT_TIMEOUT
SET STATEMENT max_statement_time=10 FOR CREATE OR REPLACE SEQUENCE seq1 START 50;
--error ER_STATEMENT_TIMEOUT
SET STATEMENT max_statement_time=10 FOR ALTER SEQUENCE IF EXISTS seq1 NOMAXVALUE;
--error ER_STATEMENT_TIMEOUT
SET STATEMENT max_statement_time=10 FOR ALTER SEQUENCE IF EXISTS seq1 MAXVALUE 1000;
--error ER_STATEMENT_TIMEOUT
SET STATEMENT max_statement_time=10 for rename table seq2 to seq3, seq3 to seq1;
connection default;
backup unlock;
drop table seq1,seq2;

--echo #
--echo # BACKUP LOCK and BACKUP UNLOCK are not allowed in procedures.
--echo #

delimiter |;
--error ER_SP_BADSTATEMENT
CREATE PROCEDURE p_BACKUP_LOCK()
Expand Down Expand Up @@ -162,8 +191,6 @@ SET STATEMENT max_statement_time=180 FOR BACKUP LOCK test.u;

--echo #

connection con1;
--reap
connection default;
disconnect con1;
show tables;
Expand Down
37 changes: 28 additions & 9 deletions sql/backup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,12 @@ static bool backup_flush(THD *thd)
This will probably require a callback from the InnoDB code.
*/

/* Retry to get inital lock for 0.1 + 0.5 + 2.25 + 11.25 + 56.25 = 70.35 sec */
#define MAX_RETRY_COUNT 5

static bool backup_block_ddl(THD *thd)
{
uint sleep_time;
DBUG_ENTER("backup_block_ddl");

kill_delayed_threads();
Expand Down Expand Up @@ -275,17 +279,32 @@ static bool backup_block_ddl(THD *thd)
block new DDL's, in addition to all previous blocks
We didn't do this lock above, as we wanted DDL's to be executed while
we wait for non transactional tables (which may take a while).
We do this lock in a loop as we can get a deadlock if there are multi-object
ddl statements like
RENAME TABLE t1 TO t2, t3 TO t3
and the MDL happens in the middle of it.
*/
if (thd->mdl_context.upgrade_shared_lock(backup_flush_ticket,
MDL_BACKUP_WAIT_DDL,
thd->variables.lock_wait_timeout))
sleep_time= 100; // Start with 0.1 seconds
for (uint i= 0 ; i <= MAX_RETRY_COUNT ; i++)
{
/*
Could be a timeout. Downgrade lock to what is was before this function
was called so that this function can be called again
*/
backup_flush_ticket->downgrade_lock(MDL_BACKUP_FLUSH);
DBUG_RETURN(1);
if (!thd->mdl_context.upgrade_shared_lock(backup_flush_ticket,
MDL_BACKUP_WAIT_DDL,
thd->variables.lock_wait_timeout))
break;
if (thd->get_stmt_da()->sql_errno() != ER_LOCK_DEADLOCK || thd->killed ||
i == MAX_RETRY_COUNT)
{
/*
Could be a timeout. Downgrade lock to what is was before this function
was called so that this function can be called again
*/
backup_flush_ticket->downgrade_lock(MDL_BACKUP_FLUSH);
DBUG_RETURN(1);
}
thd->clear_error(); // Forget the DEADLOCK error
my_sleep(sleep_time);
sleep_time*= 5; // Wait a bit longer next time
}
DBUG_RETURN(0);
}
Expand Down

0 comments on commit a1c2380

Please sign in to comment.