Skip to content

Commit

Permalink
MDEV-15607: mysqld crashed few after node is being joined with sst
Browse files Browse the repository at this point in the history
This is a typical systemd response where it tries to shutdown the
joiner (due to "timeout") before the joiner manages to complete SST.

wsrep_sst_wait
wsrep_SE_init_wait
	While waiting the operation to finish use mysql_cond_timedwait
	instead of mysql_cond_wait and if operation is not finished
	extend systemd timeout (if needed).
  • Loading branch information
Jan Lindström committed Jun 27, 2018
1 parent c6392d5 commit be56982
Showing 1 changed file with 37 additions and 4 deletions.
41 changes: 37 additions & 4 deletions sql/wsrep_sst.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
#include <cstdio>
#include <cstdlib>

#if MYSQL_VERSION_ID < 100200
# include <my_service_manager.h>
#endif

static char wsrep_defaults_file[FN_REFLEN * 2 + 10 + 30 +
sizeof(WSREP_SST_OPT_CONF) +
sizeof(WSREP_SST_OPT_CONF_SUFFIX) +
Expand Down Expand Up @@ -186,6 +190,9 @@ bool wsrep_before_SE()
static bool sst_complete = false;
static bool sst_needed = false;

#define WSREP_EXTEND_TIMEOUT_INTERVAL 30
#define WSREP_TIMEDWAIT_SECONDS 10

void wsrep_sst_grab ()
{
WSREP_INFO("wsrep_sst_grab()");
Expand All @@ -197,11 +204,25 @@ void wsrep_sst_grab ()
// Wait for end of SST
bool wsrep_sst_wait ()
{
if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
uint32 total_wtime = 0;

if (mysql_mutex_lock (&LOCK_wsrep_sst))
abort();

WSREP_INFO("Waiting for SST to complete.");

while (!sst_complete)
{
WSREP_INFO("Waiting for SST to complete.");
mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst);
mysql_cond_timedwait (&COND_wsrep_sst, &LOCK_wsrep_sst, &wtime);

if (!sst_complete)
{
total_wtime += wtime.tv_sec;
WSREP_DEBUG("Waiting for SST to complete. waited %u secs.", total_wtime);
service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
"WSREP state transfer ongoing, current seqno: %ld", local_seqno);
}
}

if (local_seqno >= 0)
Expand Down Expand Up @@ -1298,10 +1319,22 @@ void wsrep_SE_init_grab()

void wsrep_SE_init_wait()
{
struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
uint32 total_wtime=0;

while (SE_initialized == false)
{
mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init);
mysql_cond_timedwait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init, &wtime);

if (!SE_initialized)
{
total_wtime += wtime.tv_sec;
WSREP_DEBUG("Waiting for SST to complete. waited %u secs.", total_wtime);
service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
"WSREP SE initialization ongoing.");
}
}

mysql_mutex_unlock (&LOCK_wsrep_sst_init);
}

Expand Down

0 comments on commit be56982

Please sign in to comment.