Skip to content
Permalink
Browse files
10.4 wsrep group commit fixes (#1224)
* MDEV-16509 Improve wsrep commit performance with binlog disabled

Release commit order critical section early after trx_commit_low() if
binlog is not transaction coordinator. In order to avoid two phase commit,
binlog_hton is not registered for THD during IO_CACHE population.

Implemented a test which verifies that the transactions release
commit order early.

This optimization will change behavior during recovery as the commit
is not two phase when binlog is off. Fixed and recorded wsrep-recover-v25
and wsrep-recover to match the behavior.

* MDEV-18730 Ordering for wsrep binlog group commit

Previously out of order execution was allowed for wsrep commits.
Established proper ordering by populating wait_for_commit
for every wsrep THD and making group commit leader to wait for
prior commits before proceeding to trx_group_commit_leader().

* MDEV-18730 Added a test case to verify correct commit ordering

* MDEV-16509, MDEV-18730 Review fixes

Use WSREP_EMULATE_BINLOG() macro to decide if the binlog_hton
should be registered. Whitespace/syntax fixes and cleanups.

* MDEV-16509 Require binlog for galera_var_innodb_disallow_writes test

If the commit to InnoDB is done in one phase, the native InnoDB behavior
is that the transaction is committed in memory before it is persisted to
disk. This means that the innodb_disallow_writes=ON may not prevent
transaction to become visible to other readers before commit is completely
over. On the other hand, if the commit is two phase (as it is with binlog),
the transaction will be blocked in prepare phase.

Fixed the test to use binlog, which enforces two phase commit, which
in turn makes commit to block before the changes become visible to
other connections. This guarantees that the test produces expected
result.
  • Loading branch information
temeo authored and Jan Lindström committed Mar 15, 2019
1 parent b234f81 commit 1ef50a3
Show file tree
Hide file tree
Showing 27 changed files with 572 additions and 160 deletions.
@@ -82,6 +82,7 @@ extern struct wsrep_service_st {
my_bool (*wsrep_thd_skip_locking_func)(const MYSQL_THD thd);
const char* (*wsrep_get_sr_table_name_func)();
my_bool (*wsrep_get_debug_func)();
void (*wsrep_commit_ordered_func)(MYSQL_THD thd);
} *wsrep_service;

#define MYSQL_SERVICE_WSREP_INCLUDED
@@ -120,6 +121,7 @@ extern struct wsrep_service_st {
#define wsrep_thd_skip_locking(T) wsrep_service->wsrep_thd_skip_locking_func(T)
#define wsrep_get_sr_table_name() wsrep_service->wsrep_get_sr_table_name_func()
#define wsrep_get_debug() wsrep_service->wsrep_get_debug_func()
#define wsrep_commit_ordered(T) wsrep_service->wsrep_commit_ordered(T)

#else

@@ -209,5 +211,8 @@ extern const char* wsrep_sr_table_name_full;
extern "C" const char* wsrep_get_sr_table_name();

extern "C" my_bool wsrep_get_debug();

extern "C" void wsrep_commit_ordered(MYSQL_THD thd);

#endif
#endif /* MYSQL_SERVICE_WSREP_INCLUDED */
@@ -0,0 +1,75 @@
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INT PRIMARY KEY) ENGINE=InnoDB;
SET SESSION wsrep_sync_wait = 0;
connection node_1;
SET SESSION wsrep_sync_wait = 0;
SET DEBUG_SYNC = "wsrep_before_commit_order_leave SIGNAL bcol_reached WAIT_FOR bcol_continue";
SET DEBUG_SYNC = "wsrep_after_commit_order_leave SIGNAL acol_reached WAIT_FOR acol_continue";
SET DEBUG_SYNC = "after_group_after_commit SIGNAL after_group_reached WAIT_FOR after_group_continue";
INSERT INTO t1 VALUES (1);
connection ctrl;
SET DEBUG_SYNC = "now WAIT_FOR bcol_reached";
wsrep_last_seen_gtid_match
1
SELECT * FROM t1;
f1
1
SET DEBUG_SYNC = "now SIGNAL bcol_continue";
SET DEBUG_SYNC = "now WAIT_FOR acol_reached";
wsrep_last_seen_gtid_match
1
SELECT * FROM t1;
f1
1
SET DEBUG_SYNC = "now SIGNAL acol_continue";
SET DEBUG_SYNC = "now WAIT_FOR after_group_reached";
wsrep_last_seen_gtid_do_not_match
1
SET DEBUG_SYNC = "now SIGNAL after_group_continue";
connection node_1;
SET SESSION wsrep_sync_wait = 0;
connection ctrl;
connection node_1;
SET DEBUG_SYNC = "wsrep_before_commit_order_leave SIGNAL bcol_reached_1 WAIT_FOR bcol_continue_1";
SET DEBUG_SYNC = "wsrep_after_commit_order_leave SIGNAL acol_reached_1 WAIT_FOR acol_continue_1";
SET DEBUG_SYNC = "after_group_after_commit SIGNAL agac_reached_1 WAIT_FOR agac_continue_1";
INSERT INTO t1 VALUES (2);;
connection ctrl;
SET DEBUG_SYNC = "now WAIT_FOR bcol_reached_1";
wsrep_last_seen_gtid_match
1
connection node_1a;
SET DEBUG_SYNC = "wsrep_before_commit_order_leave SIGNAL bcol_reached_2 WAIT_FOR bcol_continue_2";
SET DEBUG_SYNC = "wsrep_after_commit_order_leave SIGNAL acol_reached_2 WAIT_FOR acol_continue_2";
SET DEBUG_SYNC = "after_group_after_commit SIGNAL agac_reached_2 WAIT_FOR agac_continue_2";
INSERT INTO t1 VALUES (3);;
connection ctrl;
SET DEBUG_SYNC = "now SIGNAL bcol_continue_1";
SET DEBUG_SYNC = "now WAIT_FOR acol_reached_1";
SET DEBUG_SYNC = "now WAIT_FOR bcol_reached_2";
wsrep_last_seen_gtid_match
1
SET DEBUG_SYNC = "now SIGNAL bcol_continue_2";
SET DEBUG_SYNC = "now WAIT_FOR acol_reached_2";
wsrep_last_seen_gtid_match
1
SET DEBUG_SYNC = "now SIGNAL acol_continue_1";
SET DEBUG_SYNC = "now WAIT_FOR agac_reached_1";
wsrep_last_seen_gtid_no_match
1
SET DEBUG_SYNC = "now SIGNAL acol_continue_2";
SET DEBUG_SYNC = "now WAIT_FOR agac_reached_2";
wsrep_last_seen_gtid_no_match
1
SET DEBUG_SYNC = "now SIGNAL agac_continue_1";
SET DEBUG_SYNC = "now SIGNAL agac_continue_2";
connection node_1;
connection node_1a;
connection ctrl;
SET DEBUG_SYNC = "RESET";
DROP TABLE t1;
disconnect ctrl;
disconnect node_1a;
disconnect node_2;
disconnect node_1;
@@ -0,0 +1,27 @@
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INT PRIMARY KEY);
connection node_1;
SET DEBUG_SYNC = "wsrep_after_commit_order_leave SIGNAL acol_reached WAIT_FOR acol_continue";
INSERT INTO t1 VALUES (1);
connection ctrl;
SET DEBUG_SYNC = "now WAIT_FOR acol_reached";
connection node_1_sr;
SET SESSION wsrep_sync_wait = 0;
SET SESSION wsrep_trx_fragment_unit = 'rows';
SET SESSION wsrep_trx_fragment_size = 1;
START TRANSACTION;
INSERT INTO t1 VALUES (2);
connection ctrl;
SET DEBUG_SYNC = "now SIGNAL acol_continue";
connection node_1;
connection node_1_sr;
ROLLBACK;
connection ctrl;
SET DEBUG_SYNC = "RESET";
disconnect ctrl;
disconnect node_1_sr;
connection node_1;
DROP TABLE t1;
disconnect node_2;
disconnect node_1;
@@ -0,0 +1,141 @@
#
# Test various executions which go through binlog group commit
#

--source include/galera_cluster.inc
--source include/have_debug_sync.inc


CREATE TABLE t1 (f1 INT PRIMARY KEY) ENGINE=InnoDB;

--let $galera_connection_name = ctrl
--let $galera_server_number = 1
--source include/galera_connect.inc

# Scenario 1: Block INSERT after commit order release after queued for
# group commit. Verify that
#
# - WSREP_LAST_SEEN_GTID is not advanced before commit finishes
# - The INSERT does not become visible before commit finishes

# Turn off sync wait to avoid blocking and use WSREP_LAST_SEEN_GTID()
# to observe gtid position.
SET SESSION wsrep_sync_wait = 0;
--let $last_seen_gtid_prev = `SELECT WSREP_LAST_SEEN_GTID()`

--connection node_1
SET SESSION wsrep_sync_wait = 0;
# Set up sync points
SET DEBUG_SYNC = "wsrep_before_commit_order_leave SIGNAL bcol_reached WAIT_FOR bcol_continue";
SET DEBUG_SYNC = "wsrep_after_commit_order_leave SIGNAL acol_reached WAIT_FOR acol_continue";
SET DEBUG_SYNC = "after_group_after_commit SIGNAL after_group_reached WAIT_FOR after_group_continue";
# Send insert which will block in the sync points above
--send INSERT INTO t1 VALUES (1)

--connection ctrl
# INSERT has gone through wsrep_ordered_commit() and the transaction is
# committed in memory.
SET DEBUG_SYNC = "now WAIT_FOR bcol_reached";
--disable_query_log
--eval SELECT WSREP_LAST_SEEN_GTID() = '$last_seen_gtid_prev' AS wsrep_last_seen_gtid_match
--enable_query_log
SELECT * FROM t1;
SET DEBUG_SYNC = "now SIGNAL bcol_continue";

# SE commit finished but wsrep_after_commit() has not called yet.
SET DEBUG_SYNC = "now WAIT_FOR acol_reached";
--disable_query_log
--eval SELECT WSREP_LAST_SEEN_GTID() = '$last_seen_gtid_prev' AS wsrep_last_seen_gtid_match
--enable_query_log
SELECT * FROM t1;
SET DEBUG_SYNC = "now SIGNAL acol_continue";

SET DEBUG_SYNC = "now WAIT_FOR after_group_reached";
--disable_query_log
--eval SELECT WSREP_LAST_SEEN_GTID() != '$last_seen_gtid_prev' AS wsrep_last_seen_gtid_do_not_match
--enable_query_log
SET DEBUG_SYNC = "now SIGNAL after_group_continue";

--connection node_1
--reap

#
# Scenario 2: Verify that two INSERTs from two different connections
# queue for commit.
#
--let $galera_connection_name = node_1a
--let $galera_server_number = 1
--source include/galera_connect.inc
SET SESSION wsrep_sync_wait = 0;

--connection ctrl
--let $last_seen_gtid_prev = `SELECT WSREP_LAST_SEEN_GTID()`

--connection node_1
SET DEBUG_SYNC = "wsrep_before_commit_order_leave SIGNAL bcol_reached_1 WAIT_FOR bcol_continue_1";
SET DEBUG_SYNC = "wsrep_after_commit_order_leave SIGNAL acol_reached_1 WAIT_FOR acol_continue_1";
SET DEBUG_SYNC = "after_group_after_commit SIGNAL agac_reached_1 WAIT_FOR agac_continue_1";
--send INSERT INTO t1 VALUES (2);
--connection ctrl
SET DEBUG_SYNC = "now WAIT_FOR bcol_reached_1";

--disable_query_log
--eval SELECT WSREP_LAST_SEEN_GTID() = '$last_seen_gtid_prev' AS wsrep_last_seen_gtid_match
--enable_query_log

--connection node_1a
SET DEBUG_SYNC = "wsrep_before_commit_order_leave SIGNAL bcol_reached_2 WAIT_FOR bcol_continue_2";
SET DEBUG_SYNC = "wsrep_after_commit_order_leave SIGNAL acol_reached_2 WAIT_FOR acol_continue_2";
SET DEBUG_SYNC = "after_group_after_commit SIGNAL agac_reached_2 WAIT_FOR agac_continue_2";
--send INSERT INTO t1 VALUES (3);

# Now INSERTs are queued, node_1 waiting after releasing commit order,
# node_1a waiting before releasing commit order.
--connection ctrl
SET DEBUG_SYNC = "now SIGNAL bcol_continue_1";
SET DEBUG_SYNC = "now WAIT_FOR acol_reached_1";
SET DEBUG_SYNC = "now WAIT_FOR bcol_reached_2";

--disable_query_log
--eval SELECT WSREP_LAST_SEEN_GTID() = '$last_seen_gtid_prev' AS wsrep_last_seen_gtid_match
--enable_query_log

SET DEBUG_SYNC = "now SIGNAL bcol_continue_2";
SET DEBUG_SYNC = "now WAIT_FOR acol_reached_2";

--disable_query_log
--eval SELECT WSREP_LAST_SEEN_GTID() = '$last_seen_gtid_prev' AS wsrep_last_seen_gtid_match
--enable_query_log

# Last seen GTIDs are incremented one by one once after_group_after_commit
# is reached.
SET DEBUG_SYNC = "now SIGNAL acol_continue_1";
SET DEBUG_SYNC = "now WAIT_FOR agac_reached_1";

--disable_query_log
--eval SELECT WSREP_LAST_SEEN_GTID() != '$last_seen_gtid_prev' AS wsrep_last_seen_gtid_no_match
--enable_query_log

--let $last_seen_gtid_prev = `SELECT WSREP_LAST_SEEN_GTID()`
SET DEBUG_SYNC = "now SIGNAL acol_continue_2";
SET DEBUG_SYNC = "now WAIT_FOR agac_reached_2";
--disable_query_log
--eval SELECT WSREP_LAST_SEEN_GTID() != '$last_seen_gtid_prev' AS wsrep_last_seen_gtid_no_match
--enable_query_log

SET DEBUG_SYNC = "now SIGNAL agac_continue_1";
SET DEBUG_SYNC = "now SIGNAL agac_continue_2";

--connection node_1
--reap
--connection node_1a
--reap

--connection ctrl
SET DEBUG_SYNC = "RESET";

DROP TABLE t1;

--disconnect ctrl
--disconnect node_1a
--source include/galera_end.inc
@@ -1,9 +1,18 @@
#
# This test checks that innodb_disallow_writes works as expected
#
# Note that we need to enable binlog for this test: If the commit
# to InnoDB is done in one phase, the transaction is committed in
# memory before it is persisted to disk. This means that the
# innodb_disallow_writes=ON may not prevent transaction to
# become visible to other readers. On the other hand, if the
# commit is two phase (as it is with binlog), the transaction
# will be blocked in prepare phase.
#

--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_log_bin.inc

# Open a separate connection to be used to run SHOW PROCESSLIST
--let $galera_connection_name = node_1a
@@ -0,0 +1,71 @@
#
# Test scenario:
#
# Run an autocommit INSERT and stop the execution after the INSERT
# has released commit order critical section. On another connection,
# run SR transaction which will store one fragment into streaming log.
# If the bug is present, the fragment streaming log commit may
# out of order, and completing INSERT may cause assertion in debug build.
#
# Note that due to nature of this bug, it is may not be possible
# to construct fully deterministic test case which will crash the
# server each time if the bug is present, but will work with fix.
#

--source include/galera_cluster.inc
--source include/have_log_bin.inc
--source include/have_debug_sync.inc

CREATE TABLE t1 (f1 INT PRIMARY KEY);

# Control connection for controlling node_1 debug sync points
--let $galera_connection_name = ctrl
--let $galera_server_number = 1
--source include/galera_connect.inc

# Another connection for SR transaction
--let $galera_connection_name = node_1_sr
--let $galera_server_number = 1
--source include/galera_connect.inc

# Set up sync point and send INSERT
--connection node_1
SET DEBUG_SYNC = "wsrep_after_commit_order_leave SIGNAL acol_reached WAIT_FOR acol_continue";
--send INSERT INTO t1 VALUES (1)

# Wait until INSERT releases commit order
--connection ctrl
SET DEBUG_SYNC = "now WAIT_FOR acol_reached";

# Streaming transaction, will replicate fragment for each row separately.
--connection node_1_sr
SET SESSION wsrep_sync_wait = 0;
SET SESSION wsrep_trx_fragment_unit = 'rows';
SET SESSION wsrep_trx_fragment_size = 1;

START TRANSACTION;
--send INSERT INTO t1 VALUES (2)

--connection ctrl
# Now let the thread node_1 continue after a one second sleep.
# The sleep while not completely deterministic, will allow the SR
# insert to complete the commit out of order in most of the cases if
# the bug is present, leading to assertion in debug build.
--sleep 1
SET DEBUG_SYNC = "now SIGNAL acol_continue";

--connection node_1
--reap
--connection node_1_sr
--reap
ROLLBACK;

--connection ctrl
SET DEBUG_SYNC = "RESET";
--disconnect ctrl
--disconnect node_1_sr

--connection node_1
DROP TABLE t1;

--source include/galera_end.inc

This file was deleted.

0 comments on commit 1ef50a3

Please sign in to comment.