Skip to content

Commit

Permalink
q# This is a combination of 2 commits.
Browse files Browse the repository at this point in the history
MDEV-31503 ALTER SEQUENCE ends up in optimistic parallel slave binlog out-of-order

The OOO error still was possible even after MDEV-31077. This time
it occured through open_table() when the sequence table was not in
the table cache *and* the table was created before the last server
restart.
In such context a internal (read-only) transaction is committed
and it was not blocked from doing a wakeup() call to subsequent
transactions.

Fixed with extending suspend_subsequent_commits() effect for the entirety
of Sql_cmd_alter_sequence::execute().
An elaborated MDEV-31077 test proves the fixes of both failure scenarios.

Also the bug condition suggests a workaround to pre-SELECT sequence
tables before START SLAVE.

Reviewed-by: Brandon Nesterenko <brandon.nesterenko@mariadb.com>
  • Loading branch information
andrelkin committed Jul 12, 2023
1 parent 3f5cee8 commit 7dde504
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 17 deletions.
13 changes: 12 additions & 1 deletion mysql-test/suite/rpl/r/rpl_parallel_seq.result
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,21 @@ Warnings:
Note 1255 Slave already has been stopped
RESET MASTER;
SET @@global.gtid_slave_pos="";
SET @@global.gtid_strict_mode=1;
connection master;
RESET MASTER;
CREATE TABLE ti (a INT) ENGINE=innodb;
CREATE SEQUENCE s2 ENGINE=innodb;
include/save_master_gtid.inc
connection slave;
include/start_slave.inc
include/sync_with_master_gtid.inc
include/stop_slave.inc
include/rpl_restart_server.inc [server_number=2]
SET @@global.slave_parallel_threads=2;
SET @@global.slave_parallel_mode=optimistic;
SET @@global.debug_dbug="+d,hold_worker_on_schedule";
SET @@global.gtid_strict_mode=1;
connection master;
SET @@gtid_seq_no=100;
ALTER SEQUENCE s2 restart with 1;
INSERT INTO ti SET a=1;
Expand All @@ -60,6 +70,7 @@ SELECT @@global.gtid_binlog_state "Master gtid state";
Master gtid state
0-1-101
connection slave;
SET STATEMENT sql_log_bin=0 FOR FLUSH TABLES;
include/start_slave.inc
SELECT @@global.gtid_binlog_state, @@global.gtid_slave_pos as "no 100,101 yet in both";
@@global.gtid_binlog_state no 100,101 yet in both
Expand Down
21 changes: 19 additions & 2 deletions mysql-test/suite/rpl/t/rpl_parallel_seq.test
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,39 @@ SET DEBUG_SYNC = 'now SIGNAL continue_worker';
--source include/stop_slave.inc
RESET MASTER;
SET @@global.gtid_slave_pos="";
--let $slave_gtid_strict_mode=`select @@global.gtid_strict_mode`
SET @@global.gtid_strict_mode=1;
--connection master
RESET MASTER;

# Load from master
CREATE TABLE ti (a INT) ENGINE=innodb;
CREATE SEQUENCE s2 ENGINE=innodb;
--source include/save_master_gtid.inc

--connection slave
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
--let $rpl_server_number= 2
--source include/rpl_restart_server.inc
# upon restart
SET @@global.slave_parallel_threads=2;
SET @@global.slave_parallel_mode=optimistic;
SET @@global.debug_dbug="+d,hold_worker_on_schedule";
--let $slave_gtid_strict_mode=`select @@global.gtid_strict_mode`
SET @@global.gtid_strict_mode=1;

--connection master
SET @@gtid_seq_no=100;
ALTER SEQUENCE s2 restart with 1;
INSERT INTO ti SET a=1;
--source include/save_master_gtid.inc
SELECT @@global.gtid_binlog_state "Master gtid state";

--connection slave
# The following FT complicates the opening table time with committing
# an internal transaction. The rest of the test also proves
# MDEV-31503 "branch" of the OOO error is fixed.
SET STATEMENT sql_log_bin=0 FOR FLUSH TABLES;
--source include/start_slave.inc

--let $wait_condition= SELECT count(*) = 1 FROM information_schema.processlist WHERE state LIKE "Waiting for prior transaction to commit"
Expand Down
41 changes: 27 additions & 14 deletions sql/sql_sequence.cc
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,20 @@ int SEQUENCE::set_value(TABLE *table, longlong next_val, ulonglong next_round,
DBUG_RETURN(error);
}

#if defined(HAVE_REPLICATION)
class wait_for_commit_raii
{
private:
THD *m_thd;
wait_for_commit *m_wfc;

public:
wait_for_commit_raii(THD* thd) :
m_thd(thd), m_wfc(thd->suspend_subsequent_commits())
{}
~wait_for_commit_raii() { m_thd->resume_subsequent_commits(m_wfc); }
};
#endif

bool Sql_cmd_alter_sequence::execute(THD *thd)
{
Expand All @@ -912,7 +926,10 @@ bool Sql_cmd_alter_sequence::execute(THD *thd)
SEQUENCE *seq;
No_such_table_error_handler no_such_table_handler;
DBUG_ENTER("Sql_cmd_alter_sequence::execute");

#if defined(HAVE_REPLICATION)
/* No wakeup():s of subsequent commits is allowed in this function. */
wait_for_commit_raii suspend_wfc(thd);
#endif

if (check_access(thd, ALTER_ACL, first_table->db.str,
&first_table->grant.privilege,
Expand Down Expand Up @@ -1010,19 +1027,15 @@ bool Sql_cmd_alter_sequence::execute(THD *thd)
else
table->file->print_error(error, MYF(0));
table->s->sequence->write_unlock(table);
{
wait_for_commit* suspended_wfc= thd->suspend_subsequent_commits();
if (trans_commit_stmt(thd))
error= 1;
if (trans_commit_implicit(thd))
error= 1;
thd->resume_subsequent_commits(suspended_wfc);
DBUG_EXECUTE_IF("hold_worker_on_schedule",
{
/* delay binlogging of a parent trx in rpl_parallel_seq */
my_sleep(100000);
});
}
if (trans_commit_stmt(thd))
error= 1;
if (trans_commit_implicit(thd))
error= 1;
DBUG_EXECUTE_IF("hold_worker_on_schedule",
{
/* delay binlogging of a parent trx in rpl_parallel_seq */
my_sleep(100000);
});
if (likely(!error))
error= write_bin_log(thd, 1, thd->query(), thd->query_length());
if (likely(!error))
Expand Down

0 comments on commit 7dde504

Please sign in to comment.