Skip to content

Commit cced23c

Browse files
author
Nirbhay Choubey
committed
MDEV-9423: cannot add new node to the cluser: Binlog..
.. file '/var/log/mysql/mariadb-bin.000001' not found in binlog index, needed for recovery. Aborting. In Galera cluster, while preparing for rsync/xtrabackup based SST, the donor node takes an FTWRL followed by (REFRESH_ENGINE_LOG in rsync based state transfer and) REFRESH_BINARY_LOG. The latter rotates the binary log and logs Binlog_checkpoint_log_event corresponding to the penultimate binary log file into the new file. The checkpoint event for the current file is later logged synchronously by binlog_background_thread. Now, since in rsync/xtrabackup based snapshot state transfer methods, only the last binary log file is transferred to the joiner node; the file could get transferred even before the checkpoint event for the same file gets written to it. As a result, the joiner node would fail to start complaining about the missing binlog file needed for recovery. In order to fix this, a mechanism has been put in place to make REFRESH_BINARY_LOG operation wait for Binlog_checkpoint_log_event to be logged for the current binary log file if the node is part of a Galera cluster. As further safety, during rsync based state transfer the donor node now acquires and owns LOCK_log for the duration of file transfer during SST.
1 parent 415823a commit cced23c

File tree

4 files changed

+53
-3
lines changed

4 files changed

+53
-3
lines changed

sql/log.cc

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3687,7 +3687,10 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
36873687
new_xid_list_entry->binlog_id= current_binlog_id;
36883688
/* Remove any initial entries with no pending XIDs. */
36893689
while ((b= binlog_xid_count_list.head()) && b->xid_count == 0)
3690+
{
36903691
my_free(binlog_xid_count_list.get());
3692+
}
3693+
mysql_cond_broadcast(&COND_xid_list);
36913694
binlog_xid_count_list.push_back(new_xid_list_entry);
36923695
mysql_mutex_unlock(&LOCK_xid_list);
36933696

@@ -4208,6 +4211,7 @@ bool MYSQL_BIN_LOG::reset_logs(THD* thd, bool create_new_log,
42084211
DBUG_ASSERT(b->xid_count == 0);
42094212
my_free(binlog_xid_count_list.get());
42104213
}
4214+
mysql_cond_broadcast(&COND_xid_list);
42114215
reset_master_pending--;
42124216
mysql_mutex_unlock(&LOCK_xid_list);
42134217
}
@@ -4218,6 +4222,26 @@ bool MYSQL_BIN_LOG::reset_logs(THD* thd, bool create_new_log,
42184222
}
42194223

42204224

4225+
void MYSQL_BIN_LOG::wait_for_last_checkpoint_event()
4226+
{
4227+
mysql_mutex_lock(&LOCK_xid_list);
4228+
for (;;)
4229+
{
4230+
if (binlog_xid_count_list.is_last(binlog_xid_count_list.head()))
4231+
break;
4232+
mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4233+
}
4234+
mysql_mutex_unlock(&LOCK_xid_list);
4235+
4236+
/*
4237+
LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be
4238+
obtained after mark_xid_done() has written the last checkpoint event.
4239+
*/
4240+
mysql_mutex_lock(&LOCK_log);
4241+
mysql_mutex_unlock(&LOCK_log);
4242+
}
4243+
4244+
42214245
/**
42224246
Delete relay log files prior to rli->group_relay_log_name
42234247
(i.e. all logs which are not involved in a non-finished group
@@ -9260,7 +9284,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
92609284
*/
92619285
if (unlikely(reset_master_pending))
92629286
{
9263-
mysql_cond_signal(&COND_xid_list);
9287+
mysql_cond_broadcast(&COND_xid_list);
92649288
mysql_mutex_unlock(&LOCK_xid_list);
92659289
DBUG_VOID_RETURN;
92669290
}
@@ -9298,8 +9322,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
92989322
mysql_mutex_lock(&LOCK_log);
92999323
mysql_mutex_lock(&LOCK_xid_list);
93009324
--mark_xid_done_waiting;
9301-
if (unlikely(reset_master_pending))
9302-
mysql_cond_signal(&COND_xid_list);
9325+
mysql_cond_broadcast(&COND_xid_list);
93039326
/* We need to reload current_binlog_id due to release/re-take of lock. */
93049327
current= current_binlog_id;
93059328

sql/log.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -774,6 +774,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
774774
bool need_mutex);
775775
bool reset_logs(THD* thd, bool create_new_log,
776776
rpl_gtid *init_state, uint32 init_state_len);
777+
void wait_for_last_checkpoint_event();
777778
void close(uint exiting);
778779
void clear_inuse_flag_when_closing(File file);
779780

sql/sql_reload.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,12 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options,
155155
{
156156
if (mysql_bin_log.rotate_and_purge(true))
157157
*write_to_binlog= -1;
158+
159+
if (WSREP_ON)
160+
{
161+
/* Wait for last binlog checkpoint event to be logged. */
162+
mysql_bin_log.wait_for_last_checkpoint_event();
163+
}
158164
}
159165
}
160166
if (options & REFRESH_RELAY_LOG)

sql/wsrep_sst.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,6 +1006,16 @@ static void* sst_donor_thread (void* a)
10061006
if (!err)
10071007
{
10081008
sst_disallow_writes (thd.ptr, true);
1009+
/*
1010+
Lets also keep statements that modify binary logs (like RESET LOGS,
1011+
RESET MASTER) from proceeding until the files have been transferred
1012+
to the joiner node.
1013+
*/
1014+
if (mysql_bin_log.is_open())
1015+
{
1016+
mysql_mutex_lock(mysql_bin_log.get_log_lock());
1017+
}
1018+
10091019
locked= true;
10101020
goto wait_signal;
10111021
}
@@ -1014,6 +1024,11 @@ static void* sst_donor_thread (void* a)
10141024
{
10151025
if (locked)
10161026
{
1027+
if (mysql_bin_log.is_open())
1028+
{
1029+
mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
1030+
mysql_mutex_unlock(mysql_bin_log.get_log_lock());
1031+
}
10171032
sst_disallow_writes (thd.ptr, false);
10181033
thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr);
10191034
locked= false;
@@ -1046,6 +1061,11 @@ static void* sst_donor_thread (void* a)
10461061

10471062
if (locked) // don't forget to unlock server before return
10481063
{
1064+
if (mysql_bin_log.is_open())
1065+
{
1066+
mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
1067+
mysql_mutex_unlock(mysql_bin_log.get_log_lock());
1068+
}
10491069
sst_disallow_writes (thd.ptr, false);
10501070
thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr);
10511071
}

0 commit comments

Comments
 (0)