Skip to content
Permalink
Browse files
MDEV-30317 Transaction savepoint may cause failure in galera replaying
Created mtr test for reproducing the crash

Developed actual fix for the issue.
Setting THD::system_thread_info.rpl_sql_info for replayer thread,
same way as it is handled for appliers.

Recorded test result, with the fix

Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
  • Loading branch information
sjaakola authored and Jan Lindström committed Jan 13, 2023
1 parent 66c0532 commit cd97523
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 16 deletions.
@@ -0,0 +1,53 @@
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(1));
INSERT INTO t1 VALUES (1, 'a');
INSERT INTO t1 VALUES (2, 'a');
connection node_1;
SET AUTOCOMMIT=ON;
START TRANSACTION;
UPDATE t1 SET f2 = 'b' WHERE f1 = 1;
SELECT * FROM t1 WHERE f1 = 2 FOR UPDATE;
f1 f2
2 a
SAVEPOINT my_sp;
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
SET SESSION wsrep_sync_wait=0;
SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync';
connection node_2;
UPDATE t1 SET f2 = 'c' WHERE f1 = 2;
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync';
connection node_1;
COMMIT;
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end';
SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync';
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end';
SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync';
connection node_1;
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b';
COUNT(*) = 1
1
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c';
COUNT(*) = 1
1
wsrep_local_replays
1
connection node_2;
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b';
COUNT(*) = 1
1
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c';
COUNT(*) = 1
1
DROP TABLE t1;
@@ -0,0 +1,86 @@
#
# This test tests replaying a transaction with savepoint
#

--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug_sync.inc
--source include/galera_have_debug_sync.inc

--let $wsrep_local_replays_old = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'`

CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(1));
INSERT INTO t1 VALUES (1, 'a');
INSERT INTO t1 VALUES (2, 'a');

--connection node_1
SET AUTOCOMMIT=ON;
START TRANSACTION;

UPDATE t1 SET f2 = 'b' WHERE f1 = 1;
SELECT * FROM t1 WHERE f1 = 2 FOR UPDATE;
SAVEPOINT my_sp;

# Block the applier on node #1 and issue a conflicting update on node #2
--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
SET SESSION wsrep_sync_wait=0;
--let $galera_sync_point = apply_monitor_slave_enter_sync
--source include/galera_set_sync_point.inc

--connection node_2
UPDATE t1 SET f2 = 'c' WHERE f1 = 2;

--connection node_1a
--source include/galera_wait_sync_point.inc
--source include/galera_clear_sync_point.inc

# Block the commit, send the COMMIT and wait until it gets blocked

--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_set_sync_point.inc

--connection node_1
--send COMMIT

--connection node_1a

--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync
--source include/galera_wait_sync_point.inc
--source include/galera_clear_sync_point.inc

# Let the conflicting UPDATE proceed and wait until it hits abort_trx_end.
# The victim transaction still sits in commit_monitor_master_sync_point.

--let $galera_sync_point = abort_trx_end
--source include/galera_set_sync_point.inc
--let $galera_sync_point = apply_monitor_slave_enter_sync
--source include/galera_signal_sync_point.inc
--let $galera_sync_point = abort_trx_end commit_monitor_master_enter_sync
--source include/galera_wait_sync_point.inc

# Let the transactions proceed
--source include/galera_clear_sync_point.inc
--let $galera_sync_point = abort_trx_end
--source include/galera_signal_sync_point.inc
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_signal_sync_point.inc

# Commit succeeds
--connection node_1
--reap

SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b';
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c';

# wsrep_local_replays has increased by 1
--let $wsrep_local_replays_new = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'`
--disable_query_log
--eval SELECT $wsrep_local_replays_new - $wsrep_local_replays_old = 1 AS wsrep_local_replays;
--enable_query_log

--connection node_2
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b';
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c';

DROP TABLE t1;

@@ -1,4 +1,4 @@
/* Copyright 2018-2021 Codership Oy <info@codership.com>
/* Copyright 2018-2023 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -619,6 +619,9 @@ Wsrep_replayer_service::Wsrep_replayer_service(THD* replayer_thd, THD* orig_thd)
transactional locks */
DBUG_ASSERT(!orig_thd->mdl_context.has_transactional_locks());

replayer_thd->system_thread_info.rpl_sql_info=
new rpl_sql_thread_info(replayer_thd->wsrep_rgi->rli->mi->rpl_filter);

/* Make a shadow copy of diagnostics area and reset */
m_da_shadow.status= orig_thd->get_stmt_da()->status();
if (m_da_shadow.status == Diagnostics_area::DA_OK)
@@ -657,35 +660,35 @@ Wsrep_replayer_service::Wsrep_replayer_service(THD* replayer_thd, THD* orig_thd)

Wsrep_replayer_service::~Wsrep_replayer_service()
{
THD* replayer_thd= m_thd;
THD* orig_thd= m_orig_thd;

/* Switch execution context back to original. */
wsrep_after_apply(replayer_thd);
wsrep_after_command_ignore_result(replayer_thd);
wsrep_close(replayer_thd);
wsrep_reset_threadvars(replayer_thd);
wsrep_store_threadvars(orig_thd);
wsrep_after_apply(m_thd);
wsrep_after_command_ignore_result(m_thd);
wsrep_close(m_thd);
wsrep_reset_threadvars(m_thd);
wsrep_store_threadvars(m_orig_thd);

DBUG_ASSERT(!orig_thd->get_stmt_da()->is_sent());
DBUG_ASSERT(!orig_thd->get_stmt_da()->is_set());
DBUG_ASSERT(!m_orig_thd->get_stmt_da()->is_sent());
DBUG_ASSERT(!m_orig_thd->get_stmt_da()->is_set());

delete m_thd->system_thread_info.rpl_sql_info;
m_thd->system_thread_info.rpl_sql_info= nullptr;

if (m_replay_status == wsrep::provider::success)
{
DBUG_ASSERT(replayer_thd->wsrep_cs().current_error() == wsrep::e_success);
orig_thd->reset_kill_query();
my_ok(orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id);
DBUG_ASSERT(m_thd->wsrep_cs().current_error() == wsrep::e_success);
m_orig_thd->reset_kill_query();
my_ok(m_orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id);
}
else if (m_replay_status == wsrep::provider::error_certification_failed)
{
wsrep_override_error(orig_thd, ER_LOCK_DEADLOCK);
wsrep_override_error(m_orig_thd, ER_LOCK_DEADLOCK);
}
else
{
DBUG_ASSERT(0);
WSREP_ERROR("trx_replay failed for: %d, schema: %s, query: %s",
m_replay_status,
orig_thd->db.str, wsrep_thd_query(orig_thd));
m_orig_thd->db.str, wsrep_thd_query(m_orig_thd));
unireg_abort(1);
}
}

0 comments on commit cd97523

Please sign in to comment.