diff --git a/mysql-test/suite/galera/r/galera_nonPK_and_PA.result b/mysql-test/suite/galera/r/galera_nonPK_and_PA.result new file mode 100644 index 0000000000000..5ad55417fd105 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_nonPK_and_PA.result @@ -0,0 +1,63 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 VARCHAR(32) NOT NULL) ENGINE=InnoDB; +INSERT INTO t1 (f1) VALUES ('0e66c5227a8a'); +INSERT INTO t1 (f1) VALUES ('c6c112992c9'); +CREATE TABLE t2 (i int primary key); +connection node_2; +SET SESSION wsrep_sync_wait = 0; +SET GLOBAL wsrep_slave_threads = 2; +*************************************************************** +scenario 1, conflicting UPDATE +*************************************************************** +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_slave_enter_sync'; +connection node_1; +START TRANSACTION; +UPDATE t1 SET f1='5ffceebfada' WHERE t1.f1 = 'c6c112992c9'; +COMMIT; +connection node_2; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +connection node_1; +START TRANSACTION; +UPDATE t1 SET f1='4ffceebfcdc' WHERE t1.f1 = '0e66c5227a8a'; +COMMIT; +connection node_2; +distance +1 +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_slave_enter_sync'; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_slave_enter_sync'; +SET GLOBAL wsrep_provider_options = 'dbug='; +*************************************************************** +scenario 2, conflicting DELETE +*************************************************************** +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_slave_enter_sync'; +connection node_1; +START TRANSACTION; +INSERT INTO t2 VALUES (1); +DELETE FROM t1 WHERE f1='5ffceebfada'; +COMMIT; +connection node_2; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +connection node_1; +START TRANSACTION; +INSERT INTO t2 VALUES (2); +DELETE FROM t1 WHERE f1='4ffceebfcdc'; +COMMIT; +connection node_2; +distance +1 +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_slave_enter_sync'; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_slave_enter_sync'; +SET GLOBAL wsrep_provider_options = 'dbug='; +connection node_1; +SET GLOBAL wsrep_slave_threads = DEFAULT; +DROP TABLE t1; +DROP TABLE t2; +connection node_2; +SET GLOBAL wsrep_slave_threads = DEFAULT; diff --git a/mysql-test/suite/galera/t/galera_nonPK_and_PA.test b/mysql-test/suite/galera/t/galera_nonPK_and_PA.test new file mode 100644 index 0000000000000..8a5173f576bcc --- /dev/null +++ b/mysql-test/suite/galera/t/galera_nonPK_and_PA.test @@ -0,0 +1,168 @@ +# +# This test is a modified version of Gabor Orosz (GOro) test in jira tracker: +# https://jira.mariadb.org/browse/MDEV-25551 +# +# The underlying problem with MDEV-25551 turned out to be that +# transactions having changes for tables with no primary key, +# were not safe to apply in parallel. This is due to excessive locking +# in innodb side, and even non related row modifications could end up +# in lock conflict during applying. +# +# The test creates a table with no primary key definition and executes two +# transactions (in node1) modifying separate rows in the table. In node2 +# first applier is paused before commit phase, and second transaction is +# then submitted to see if it can interfere with the first transaciton. +# The fix for MDEV-25551 has disabled parallel applying for tables with no PK, +# and in the test applying of the send trasnaction should not even start, before +# the fisrt trkansaction is released from the sync point. +# The test also verifies that certification depedency status reflects the fact +# that the two transactions depend on each other. +# +# The test has two scenarios where both UPDATE and DELETE statements are verified +# to disable parallel applying +# + +--source include/galera_cluster.inc +--source include/have_debug_sync.inc +--source include/galera_have_debug_sync.inc + + +# Setup + +CREATE TABLE t1 (f1 VARCHAR(32) NOT NULL) ENGINE=InnoDB; +INSERT INTO t1 (f1) VALUES ('0e66c5227a8a'); +INSERT INTO t1 (f1) VALUES ('c6c112992c9'); + +CREATE TABLE t2 (i int primary key); + +--connection node_2 +SET SESSION wsrep_sync_wait = 0; +--let $wait_condition = SELECT COUNT(*)=2 FROM t1; +--source include/wait_condition.inc + +# Ensure that we have enough applier threads to process transactions in parallel +SET GLOBAL wsrep_slave_threads = 2; + +--echo *************************************************************** +--echo scenario 1, conflicting UPDATE +--echo *************************************************************** + +# Set up a synchronization point to catch the first transaction +--let $galera_sync_point = commit_monitor_slave_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_1 +# Invoke the first transaction +START TRANSACTION; +UPDATE t1 SET f1='5ffceebfada' WHERE t1.f1 = 'c6c112992c9'; +COMMIT; + +--connection node_2 +# Wait for the first transaction to apply until commit phase +--let $galera_sync_point = commit_monitor_slave_enter_sync +--source include/galera_wait_sync_point.inc + +# remember status for received replication counter and certification dependency distance +--let $expected_wsrep_received = `SELECT VARIABLE_VALUE+1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_received'` +--let $cert_deps_distance = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cert_deps_distance'` + +--connection node_1 +# Invoke the second transaction +START TRANSACTION; +UPDATE t1 SET f1='4ffceebfcdc' WHERE t1.f1 = '0e66c5227a8a'; +COMMIT; + +# sleep is probably obsolete here, but it is good to give the latter update time to +# proceed in applying in node 2. In buggy version the update will start applying +# and cause conflict there. +--sleep 5 + +--connection node_2 +# Wait for the second transaction to appear in repliaction queue +--let $wait_condition = SELECT VARIABLE_VALUE= $expected_wsrep_received FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_received'; +--source include/wait_condition.inc + +# verify that certification dependency distance has dropped +--disable_query_log +--eval SELECT VARIABLE_VALUE < $cert_deps_distance as 'distance' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cert_deps_distance' +--enable_query_log + +# if deps distance dropped, it is indirect evidence that parallel applying was not approved + +# Let the first transaction to proceed +--let $galera_sync_point = commit_monitor_slave_enter_sync +--source include/galera_signal_sync_point.inc + +# second applier should now hit sync point +--let $galera_sync_point = commit_monitor_slave_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_signal_sync_point.inc +--source include/galera_clear_sync_point.inc + + +--echo *************************************************************** +--echo scenario 2, conflicting DELETE +--echo *************************************************************** + +# Set up a synchronization point to catch the first transaction +--let $galera_sync_point = commit_monitor_slave_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_1 +# Invoke the first transaction, mix this with insert to table having PK +START TRANSACTION; +INSERT INTO t2 VALUES (1); +DELETE FROM t1 WHERE f1='5ffceebfada'; +COMMIT; + +--connection node_2 +# Wait for the first transaction to apply until commit phase +--let $galera_sync_point = commit_monitor_slave_enter_sync +--source include/galera_wait_sync_point.inc + +# remember status for received replication counter and certification dependency distance +--let $expected_wsrep_received = `SELECT VARIABLE_VALUE+1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_received'` +--let $cert_deps_distance = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cert_deps_distance'` + +--connection node_1 +# Invoke the second transaction, again mix this with insert to table having PK +START TRANSACTION; +INSERT INTO t2 VALUES (2); +DELETE FROM t1 WHERE f1='4ffceebfcdc'; +COMMIT; + +# sleep is probably obsolete here, but it is good to give the latter update time to +# proceed in applying in node 2. In buggy version the update will start applying +# and cause conflict there. +--sleep 5 + +--connection node_2 +# Wait for the second transaction to appear in repliaction queue +--let $wait_condition = SELECT VARIABLE_VALUE= $expected_wsrep_received FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_received'; +--source include/wait_condition.inc + +# verify that certification dependency distance has dropped +--disable_query_log +--eval SELECT VARIABLE_VALUE < $cert_deps_distance as 'distance' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cert_deps_distance' +--enable_query_log + +# if deps distance dropped, it is indirect evidence that parallel applying was not approved + +# Let the first transaction to proceed +--let $galera_sync_point = commit_monitor_slave_enter_sync +--source include/galera_signal_sync_point.inc + +# second applier should now hit sync point +--let $galera_sync_point = commit_monitor_slave_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_signal_sync_point.inc +--source include/galera_clear_sync_point.inc + +# Teardown +--connection node_1 +SET GLOBAL wsrep_slave_threads = DEFAULT; + +DROP TABLE t1; +DROP TABLE t2; +--connection node_2 +SET GLOBAL wsrep_slave_threads = DEFAULT; diff --git a/mysql-test/suite/galera_sr/r/galera_sr_nonPK_and_PA.result b/mysql-test/suite/galera_sr/r/galera_sr_nonPK_and_PA.result new file mode 100644 index 0000000000000..b7e9cf49f9517 --- /dev/null +++ b/mysql-test/suite/galera_sr/r/galera_sr_nonPK_and_PA.result @@ -0,0 +1,46 @@ +connection node_2; +connection node_1; +connection node_2; +SET SESSION wsrep_sync_wait = 0; +SET GLOBAL wsrep_slave_threads = 2; +flush status; +connection node_1; +CREATE TABLE t1 (f1 int, f2 int) ENGINE=InnoDB; +CREATE TABLE t2 (f1 int primary key, f2 int) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1,0); +INSERT INTO t1 VALUES (2,0); +INSERT INTO t2 VALUES (1,0); +INSERT INTO t2 VALUES (2,0); +connection node_2; +connection node_1; +set session wsrep_trx_fragment_size=1; +START TRANSACTION; +UPDATE t1 SET f2=1 where f1=1; +connection node_2; +distance +1 +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_slave_enter_sync'; +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1a; +update t2 set f2=1 where f1=1; +connection node_2; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; +connection node_1; +UPDATE t2 set f2=2 where f1=2; +connection node_2; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_slave_enter_sync'; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; +SET GLOBAL wsrep_provider_options = 'dbug='; +connection node_1; +COMMIT; +connection node_1; +SET GLOBAL wsrep_slave_threads = DEFAULT; +DROP TABLE t1; +DROP TABLE t2; +connection node_2; +SET GLOBAL wsrep_slave_threads = DEFAULT; diff --git a/mysql-test/suite/galera_sr/t/galera_sr_nonPK_and_PA.test b/mysql-test/suite/galera_sr/t/galera_sr_nonPK_and_PA.test new file mode 100644 index 0000000000000..c343cd202bf14 --- /dev/null +++ b/mysql-test/suite/galera_sr/t/galera_sr_nonPK_and_PA.test @@ -0,0 +1,109 @@ +# +# This test is a modified version of Gabor Orosz (GOro) test in jira tracker: +# https://jira.mariadb.org/browse/MDEV-25551 +# +# The underlying problem with MDEV-25551 turned out to be that +# transactions having changes for tables with no primary key, +# were not safe to apply in parallel. This is due to excessive locking +# in innodb side, and even non related row modifications could end up +# in lock conflict during applying. +# +# The test verifies that a transaction executing a streaming replication +# will disable parallel applying if it modifies a table with no primary key. +# And, if PA was disabled temporarily, it will be relaxed if next fragment +# contains changes for table with primary key. +# + +--source include/galera_cluster.inc +--source include/have_debug_sync.inc +--source include/galera_have_debug_sync.inc + + +# Setup +--connection node_2 +SET SESSION wsrep_sync_wait = 0; + +# Ensure that we have enough applier threads to process transactions in parallel +SET GLOBAL wsrep_slave_threads = 2; + +flush status; + +--connection node_1 +CREATE TABLE t1 (f1 int, f2 int) ENGINE=InnoDB; +CREATE TABLE t2 (f1 int primary key, f2 int) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1,0); +INSERT INTO t1 VALUES (2,0); + +INSERT INTO t2 VALUES (1,0); +INSERT INTO t2 VALUES (2,0); + +--connection node_2 +--let $wait_condition = SELECT COUNT(*)=2 FROM t2; +--source include/wait_condition.inc + +# remember status for received replication counter and certification dependency distance +--let $cert_deps_distance = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cert_deps_distance'` + +--connection node_1 +# Invoke the first transaction +set session wsrep_trx_fragment_size=1; +START TRANSACTION; +UPDATE t1 SET f2=1 where f1=1; + +--connection node_2 +# verify that certification dependency distance has dropped +--disable_query_log +--eval SELECT VARIABLE_VALUE < $cert_deps_distance as 'distance' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cert_deps_distance' +--enable_query_log + +# if deps distance dropped, it is indirect evidence that parallel applying was not approved + +# Try next that PA retricting is relaxed, if next fragment updates table t1 with primary key +# wsrep_cert_deps_distance cannot be trsuted in this test phase, we verify parallel applying +# by setting sync point for applier thread + +# Set up a synchronization point to catch update on t2 +--let $galera_sync_point = commit_monitor_slave_enter_sync +--source include/galera_set_sync_point.inc + +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1a +update t2 set f2=1 where f1=1; + +--connection node_2 +# Wait for the update t2 to apply until commit phase +--let $galera_sync_point = commit_monitor_slave_enter_sync +--source include/galera_wait_sync_point.inc + +# Set up a synchronization point to catch the SR trx applying +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_1 +# continue SR transaction, and now update t2, which has PK +UPDATE t2 set f2=2 where f1=2; + +--connection node_2 +# Wait for the update t2 to apply until commit phase +--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_slave_enter_sync +--source include/galera_wait_sync_point.inc + +# Let the first transaction to proceed +--let $galera_sync_point = commit_monitor_slave_enter_sync +--source include/galera_signal_sync_point.inc +--source include/galera_clear_sync_point.inc +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_signal_sync_point.inc +--source include/galera_clear_sync_point.inc + +--connection node_1 +COMMIT; + +# Teardown +--connection node_1 +SET GLOBAL wsrep_slave_threads = DEFAULT; + +DROP TABLE t1; +DROP TABLE t2; +--connection node_2 +SET GLOBAL wsrep_slave_threads = DEFAULT; diff --git a/sql/handler.cc b/sql/handler.cc index ac5b43249db47..99ec0f013200c 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -6808,8 +6808,21 @@ int handler::ha_update_row(const uchar *old_data, const uchar *new_data) rows_changed++; error= binlog_log_row(table, old_data, new_data, log_func); #ifdef WITH_WSREP + THD *thd= ha_thd(); + bool is_wsrep= WSREP(thd); + /* for SR, the followin wsrep_after_row() may replicate a fragment, so we have to + declare potential PA unsafe before that*/ + if (table->s->primary_key == MAX_KEY && + is_wsrep && wsrep_thd_is_local(thd)) + { + WSREP_DEBUG("marking trx as PA unsafe pk %d", table->s->primary_key); + if (thd->wsrep_cs().mark_transaction_pa_unsafe()) + { + WSREP_DEBUG("session does not have active transaction, can not mark as PA unsafe"); + } + } if (table_share->tmp_table == NO_TMP_TABLE && - WSREP(ha_thd()) && (error= wsrep_after_row(ha_thd()))) + is_wsrep && (error= wsrep_after_row(thd))) { return error; } @@ -6870,8 +6883,21 @@ int handler::ha_delete_row(const uchar *buf) rows_changed++; error= binlog_log_row(table, buf, 0, log_func); #ifdef WITH_WSREP + THD *thd= ha_thd(); + bool is_wsrep= WSREP(thd); + /* for SR, the followin wsrep_after_row() may replicate a fragment, so we have to + declare potential PA unsafe before that*/ + if (table->s->primary_key == MAX_KEY && + is_wsrep && wsrep_thd_is_local(thd)) + { + WSREP_DEBUG("marking trx as PA unsafe pk %d", table->s->primary_key); + if (thd->wsrep_cs().mark_transaction_pa_unsafe()) + { + WSREP_DEBUG("session does not have active transaction, can not mark as PA unsafe"); + } + } if (table_share->tmp_table == NO_TMP_TABLE && - WSREP(ha_thd()) && (error= wsrep_after_row(ha_thd()))) + is_wsrep && (error= wsrep_after_row(thd))) { return error; } diff --git a/wsrep-lib b/wsrep-lib index f271ad0c6e3c6..85b815032145e 160000 --- a/wsrep-lib +++ b/wsrep-lib @@ -1 +1 @@ -Subproject commit f271ad0c6e3c647df83c1d5ec9cd26d77cef2337 +Subproject commit 85b815032145ef5c18b7a8931d84becf6df8cd18