Skip to content
Permalink
Browse files
MDEV-22543 : Galera SST donation fails, FLUSH TABLES WITH READ LOCK t…
…imes out

During SST we need to let FTWRL to use normal timeout method
even when client is disconnected.
  • Loading branch information
Jan Lindström committed Aug 11, 2020
1 parent 78ea8ad commit 57d1a5f
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 9 deletions.
@@ -0,0 +1,17 @@
connection node_1;
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INT PRIMARY KEY, f2 INT);
INSERT INTO t1 VALUES (1, 1);
SET DEBUG_SYNC = "before_lock_tables_takes_lock SIGNAL sync_point_reached WAIT_FOR sync_point_continue";
UPDATE t1 SET f2 = 2 WHERE f1 = 1;
connection node_1_ctrl;
SET DEBUG_SYNC = "now WAIT_FOR sync_point_reached";
connection node_2;
connection node_1_ctrl;
SET DEBUG_SYNC = "now SIGNAL sync_point_continue";
connection node_1;
SET DEBUG_SYNC = "RESET";
connection node_2;
connection node_1;
DROP TABLE t1;
@@ -0,0 +1,58 @@
# The test verifies that the FLUSH TABLES WITH READ LOCK does not
# time out if it needs to wait for another MDL lock for short duration
# during SST donation.

--source include/galera_cluster.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc

--let $node_1 = node_1
--let $node_2 = node_2
--source include/auto_increment_offset_save.inc

--let $galera_connection_name = node_1_ctrl
--let $galera_server_number = 1
--source include/galera_connect.inc

#
# Run UPDATE on node_1 and make it block before table locks are taken.
# This should block FTWRL.
#
--connection node_1
CREATE TABLE t1 (f1 INT PRIMARY KEY, f2 INT);
INSERT INTO t1 VALUES (1, 1);
SET DEBUG_SYNC = "before_lock_tables_takes_lock SIGNAL sync_point_reached WAIT_FOR sync_point_continue";
--send UPDATE t1 SET f2 = 2 WHERE f1 = 1

--connection node_1_ctrl
SET DEBUG_SYNC = "now WAIT_FOR sync_point_reached";

#
# Restart node_2, force SST.
#
--connection node_2
--source include/shutdown_mysqld.inc
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat
# Restart without waiting. The UPDATE should block FTWRL on node_1,
# so the SST cannot be completed and node_2 cannot join before
# UPDATE connection is signalled to continue.
--exec echo "restart:$start_mysqld_params" > $_expect_file_name
# If the bug is present, FTWRL times out on node_1 in couple of
# seconds and node_2 fails to join.
--sleep 10

--connection node_1_ctrl
SET DEBUG_SYNC = "now SIGNAL sync_point_continue";

--connection node_1
--reap
SET DEBUG_SYNC = "RESET";

--connection node_2
--enable_reconnect
--source include/wait_until_connected_again.inc

--connection node_1
DROP TABLE t1;

--source include/auto_increment_offset_restore.inc
@@ -25,6 +25,7 @@
#include <mysql/psi/mysql_stage.h>
#include "wsrep_mysqld.h"
#include "wsrep_thd.h"
#include "wsrep_sst.h"

#ifdef HAVE_PSI_INTERFACE
static PSI_mutex_key key_MDL_wait_LOCK_wait_status;
@@ -2137,18 +2138,26 @@ MDL_context::acquire_lock(MDL_request *mdl_request, double lock_wait_timeout)
wait_status= m_wait.timed_wait(m_owner, &abs_shortwait, FALSE,
mdl_request->key.get_wait_state_name());

THD* thd= m_owner->get_thd();

if (wait_status != MDL_wait::EMPTY)
break;
/* Check if the client is gone while we were waiting. */
if (! thd_is_connected(m_owner->get_thd()))
if (! thd_is_connected(thd))
{
/*
* The client is disconnected. Don't wait forever:
* assume it's the same as a wait timeout, this
* ensures all error handling is correct.
*/
wait_status= MDL_wait::TIMEOUT;
break;
#if defined(WITH_WSREP) && !defined(EMBEDDED_LIBRARY)
// During SST client might not be connected
if (!wsrep_is_sst_progress())
#endif
{
/*
* The client is disconnected. Don't wait forever:
* assume it's the same as a wait timeout, this
* ensures all error handling is correct.
*/
wait_status= MDL_wait::TIMEOUT;
break;
}
}

mysql_prlock_wrlock(&lock->m_rwlock);
@@ -192,6 +192,7 @@ bool wsrep_before_SE()

static bool sst_complete = false;
static bool sst_needed = false;
static bool sst_in_progress = false;

#define WSREP_EXTEND_TIMEOUT_INTERVAL 30
#define WSREP_TIMEDWAIT_SECONDS 10
@@ -1542,7 +1543,10 @@ static void* sst_donor_thread (void* a)
char out_buf[out_len];

wsrep_uuid_t ret_uuid= WSREP_UUID_UNDEFINED;
wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; // seqno of complete SST
// seqno of complete SST
wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED;
// SST is now in progress
sst_in_progress= true;

wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can
// operate with wsrep_ready == OFF
@@ -1644,6 +1648,8 @@ static void* sst_donor_thread (void* a)
wsrep->sst_sent (wsrep, &state_id, -err);
proc.wait();

sst_in_progress= false;

return NULL;
}

@@ -1818,3 +1824,8 @@ void wsrep_SE_initialized()
{
SE_initialized = true;
}

bool wsrep_is_sst_progress()
{
return (sst_in_progress);
}
@@ -74,12 +74,14 @@ extern void wsrep_SE_init_grab(); /*! grab init critical section */
extern void wsrep_SE_init_wait(); /*! wait for SE init to complete */
extern void wsrep_SE_init_done(); /*! signal that SE init is complte */
extern void wsrep_SE_initialized(); /*! mark SE initialization complete */
extern bool wsrep_is_sst_progress();

#else
#define wsrep_SE_initialized() do { } while(0)
#define wsrep_SE_init_grab() do { } while(0)
#define wsrep_SE_init_done() do { } while(0)
#define wsrep_sst_continue() (0)
#define wsrep_is_sst_progress() (0)

#endif /* WITH_WSREP */
#endif /* WSREP_SST_H */

0 comments on commit 57d1a5f

Please sign in to comment.