Skip to content
Permalink
Browse files
MDEV-22543 : Galera SST donation fails, FLUSH TABLES WITH READ LOCK t…
…imes out

During SST we need to let FTWRL to use normal timeout method
even when client is disconnected.
  • Loading branch information
Jan Lindström committed Aug 14, 2020
1 parent 2f7b37b commit a7a9f44
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 9 deletions.
@@ -0,0 +1,19 @@
connection node_2;
connection node_1;
connection node_1;
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INT PRIMARY KEY, f2 INT);
INSERT INTO t1 VALUES (1, 1);
SET DEBUG_SYNC = "before_lock_tables_takes_lock SIGNAL sync_point_reached WAIT_FOR sync_point_continue";
UPDATE t1 SET f2 = 2 WHERE f1 = 1;
connection node_1_ctrl;
SET DEBUG_SYNC = "now WAIT_FOR sync_point_reached";
connection node_2;
connection node_1_ctrl;
SET DEBUG_SYNC = "now SIGNAL sync_point_continue";
connection node_1;
SET DEBUG_SYNC = "RESET";
connection node_2;
connection node_1;
DROP TABLE t1;
@@ -0,0 +1,58 @@
# The test verifies that the FLUSH TABLES WITH READ LOCK does not
# time out if it needs to wait for another MDL lock for short duration
# during SST donation.

--source include/galera_cluster.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc

--let $node_1 = node_1
--let $node_2 = node_2
--source include/auto_increment_offset_save.inc

--let $galera_connection_name = node_1_ctrl
--let $galera_server_number = 1
--source include/galera_connect.inc

#
# Run UPDATE on node_1 and make it block before table locks are taken.
# This should block FTWRL.
#
--connection node_1
CREATE TABLE t1 (f1 INT PRIMARY KEY, f2 INT);
INSERT INTO t1 VALUES (1, 1);
SET DEBUG_SYNC = "before_lock_tables_takes_lock SIGNAL sync_point_reached WAIT_FOR sync_point_continue";
--send UPDATE t1 SET f2 = 2 WHERE f1 = 1

--connection node_1_ctrl
SET DEBUG_SYNC = "now WAIT_FOR sync_point_reached";

#
# Restart node_2, force SST.
#
--connection node_2
--source include/shutdown_mysqld.inc
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat
# Restart without waiting. The UPDATE should block FTWRL on node_1,
# so the SST cannot be completed and node_2 cannot join before
# UPDATE connection is signalled to continue.
--exec echo "restart:$start_mysqld_params" > $_expect_file_name
# If the bug is present, FTWRL times out on node_1 in couple of
# seconds and node_2 fails to join.
--sleep 10

--connection node_1_ctrl
SET DEBUG_SYNC = "now SIGNAL sync_point_continue";

--connection node_1
--reap
SET DEBUG_SYNC = "RESET";

--connection node_2
--enable_reconnect
--source include/wait_until_connected_again.inc

--connection node_1
DROP TABLE t1;

--source include/auto_increment_offset_restore.inc
@@ -24,6 +24,7 @@
#include <mysql/plugin.h>
#include <mysql/service_thd_wait.h>
#include <mysql/psi/mysql_stage.h>
#include "wsrep_sst.h"
#ifdef HAVE_PSI_INTERFACE
static PSI_mutex_key key_MDL_wait_LOCK_wait_status;

@@ -2325,18 +2326,26 @@ MDL_context::acquire_lock(MDL_request *mdl_request, double lock_wait_timeout)
wait_status= m_wait.timed_wait(m_owner, &abs_shortwait, FALSE,
mdl_request->key.get_wait_state_name());

THD* thd= m_owner->get_thd();

if (wait_status != MDL_wait::EMPTY)
break;
/* Check if the client is gone while we were waiting. */
if (! thd_is_connected(m_owner->get_thd()))
if (! thd_is_connected(thd))
{
/*
* The client is disconnected. Don't wait forever:
* assume it's the same as a wait timeout, this
* ensures all error handling is correct.
*/
wait_status= MDL_wait::TIMEOUT;
break;
#if defined(WITH_WSREP) && !defined(EMBEDDED_LIBRARY)
// During SST client might not be connected
if (!wsrep_is_sst_progress())
#endif
{
/*
* The client is disconnected. Don't wait forever:
* assume it's the same as a wait timeout, this
* ensures all error handling is correct.
*/
wait_status= MDL_wait::TIMEOUT;
break;
}
}

mysql_prlock_wrlock(&lock->m_rwlock);
@@ -54,6 +54,7 @@ my_bool wsrep_sst_donor_rejects_queries= FALSE;

bool sst_joiner_completed = false;
bool sst_donor_completed = false;
bool sst_needed = false;

struct sst_thread_arg
{
@@ -307,6 +308,7 @@ bool wsrep_before_SE()
&& strcmp (wsrep_sst_method, WSREP_SST_MYSQLDUMP));
}

static bool sst_in_progress = false;
// Signal end of SST
static void wsrep_sst_complete (THD* thd,
int const rcode)
@@ -1623,7 +1625,10 @@ static void* sst_donor_thread (void* a)
char out_buf[out_len];

wsrep_uuid_t ret_uuid= WSREP_UUID_UNDEFINED;
wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; // seqno of complete SST
// seqno of complete SST
wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED;
// SST is now in progress
sst_in_progress= true;

wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can
// operate with wsrep_ready == OFF
@@ -1729,6 +1734,8 @@ static void* sst_donor_thread (void* a)
proc.wait();

wsrep_donor_monitor_end();
sst_in_progress= false;


return NULL;
}
@@ -1882,3 +1889,8 @@ int wsrep_sst_donate(const std::string& msg,

return (ret >= 0 ? 0 : 1);
}

bool wsrep_is_sst_progress()
{
return (sst_in_progress);
}
@@ -77,6 +77,7 @@ extern void wsrep_SE_init_grab(); /*! grab init critical section */
extern void wsrep_SE_init_wait(); /*! wait for SE init to complete */
extern void wsrep_SE_init_done(); /*! signal that SE init is complte */
extern void wsrep_SE_initialized(); /*! mark SE initialization complete */
extern bool wsrep_is_sst_progress();

/**
Return a string containing the state transfer request string.
@@ -102,5 +103,6 @@ int wsrep_sst_donate(const std::string& request,
#define wsrep_SE_init_grab() do { } while(0)
#define wsrep_SE_init_done() do { } while(0)
#define wsrep_sst_continue() (0)
#define wsrep_is_sst_progress() (0)

#endif /* WSREP_SST_H */

0 comments on commit a7a9f44

Please sign in to comment.