Skip to content

Commit 4c8af20

Browse files
committed
MDEV-36934: semi sync makes the master unresponsive when a replica is stopped
Ensure that a pending semi-sync transaction can only be signalled on its THD while it is waiting in commit_trx(); not if the wait in commit_trx() is skipped for some reason. There was a bug that if no semi-sync slaves were connected and rpl_semi_sync_master_wait_no_slave was off, the THD pointer could be left in the list of pending transactions after THD was deleted, and an invalid THD/condition variable could be signalled, causing hang or other corruption. Testcase based on work by Brandon Nesterenko <brandon.nesterenko@mariadb.com>. Reviewed-by: Brandon Nesterenko <brandon.nesterenko@mariadb.com> Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
1 parent c4a2688 commit 4c8af20

File tree

4 files changed

+210
-17
lines changed

4 files changed

+210
-17
lines changed

mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ call mtr.add_suppression("Could not read packet");
77
call mtr.add_suppression("Could not write packet");
88
set @save_bgc_count= @@global.binlog_commit_wait_count;
99
set @save_bgc_usec= @@global.binlog_commit_wait_usec;
10+
set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
11+
set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
1012
set @save_debug_dbug= @@global.debug_dbug;
1113
set @@global.binlog_commit_wait_count=3;
1214
set @@global.binlog_commit_wait_usec=10000000;
@@ -46,8 +48,6 @@ drop table t1, t2, t3;
4648
# the binlogging to semi-sync, and starting the wait for ACK; and during
4749
# this pause, semi-sync is manually switched off and on.
4850
connection master;
49-
set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
50-
set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
5151
set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC;
5252
set @@global.rpl_semi_sync_master_wait_no_slave= 1;
5353
set @@global.debug_dbug="+d,semisync_log_skip_trx_wait";
@@ -100,7 +100,66 @@ commit;
100100
# Cleanup
101101
connection master;
102102
drop table tn;
103+
set @@global.debug_dbug=@save_debug_dbug;
104+
#
105+
# MDEV-36934
106+
# The server could indefinitely hang due to a memory leak which tried to
107+
# pthread signal on a destroyed condition variable. In effect, no
108+
# connections could commit transactions because there would be a thread
109+
# stuck on a never-returning call to pthread_cond_signal() while
110+
# holding Repl_semi_sync_master::LOCK_log.
111+
connection master;
112+
set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT;
113+
set @@global.rpl_semi_sync_master_wait_no_slave= 0;
114+
# Ensure servers are in proper state
115+
connection master;
116+
connection slave;
117+
# Test case initial set-up
118+
connection master;
119+
create table t_36934 (a int) engine=innodb;
120+
include/save_master_gtid.inc
121+
connection slave;
122+
include/sync_with_master_gtid.inc
123+
# Pause the user transaction before inserting into Active_tranx
124+
connect user_con,localhost,root,,;
125+
SET debug_sync= 'semisync_at_write_tranx_in_binlog SIGNAL at_write_tranx_in_binlog WAIT_FOR resume_write_tranx_in_binlog';
126+
insert into t_36934 values (1);
127+
connection server_1;
128+
set debug_sync="now wait_for at_write_tranx_in_binlog";
129+
# Disconnect the slave (note that the binlog dump thread won't yet be
130+
# notified of a binlog update from the last transaction, so the slave
131+
# should neither receiver nor ACK the transaction).
132+
connection slave;
133+
include/stop_slave.inc
134+
# Waiting for master to realize the slave has disconnected..
135+
connection server_1;
136+
# ..done
137+
# Resuming transaction (it will exit commit_trx early without waiting)
138+
set debug_sync="now signal resume_write_tranx_in_binlog";
139+
connection user_con;
140+
disconnect user_con;
141+
# Force delete the user thread (FLUSH THREADS ensures the thread won't
142+
# stay in the thread cache)
143+
connection master;
144+
FLUSH THREADS;
145+
# BUG: Re-connect slave. MDEV-36934 reports that the master would hang
146+
# when the slave would re-connect and try to ACK the last transaction
147+
# who's thread has been deleted
148+
connection slave;
149+
include/start_slave.inc
150+
# Try to commit another transaction (prior to MDEV-36934 fixes, this
151+
# would hang indefinitely)
152+
connection master;
153+
set debug_sync="RESET";
154+
insert into t_36934 values (2);
155+
connection server_1;
156+
# Waiting 30s for last query to complete..
157+
connection master;
158+
# ..done
159+
# Cleanup
160+
connection master;
103161
set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point;
104162
set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave;
105163
set @@global.debug_dbug=@save_debug_dbug;
164+
drop table t_36934;
106165
include/rpl_end.inc

mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ call mtr.add_suppression("Could not read packet");
3434
call mtr.add_suppression("Could not write packet");
3535
set @save_bgc_count= @@global.binlog_commit_wait_count;
3636
set @save_bgc_usec= @@global.binlog_commit_wait_usec;
37+
set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
38+
set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
3739
set @save_debug_dbug= @@global.debug_dbug;
3840
set @@global.binlog_commit_wait_count=3;
3941
set @@global.binlog_commit_wait_usec=10000000;
@@ -98,8 +100,6 @@ drop table t1, t2, t3;
98100

99101

100102
--connection master
101-
set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
102-
set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
103103
set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC;
104104
set @@global.rpl_semi_sync_master_wait_no_slave= 1;
105105
--eval set @@global.debug_dbug="+d,semisync_log_skip_trx_wait"
@@ -190,8 +190,108 @@ commit;
190190
--echo # Cleanup
191191
--connection master
192192
drop table tn;
193+
set @@global.debug_dbug=@save_debug_dbug;
194+
195+
196+
--echo #
197+
--echo # MDEV-36934
198+
--echo # The server could indefinitely hang due to a memory leak which tried to
199+
--echo # pthread signal on a destroyed condition variable. In effect, no
200+
--echo # connections could commit transactions because there would be a thread
201+
--echo # stuck on a never-returning call to pthread_cond_signal() while
202+
--echo # holding Repl_semi_sync_master::LOCK_log.
203+
204+
--connection master
205+
set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT;
206+
set @@global.rpl_semi_sync_master_wait_no_slave= 0;
207+
208+
--echo # Ensure servers are in proper state
209+
--connection master
210+
let $status_var= rpl_semi_sync_master_status;
211+
let $status_var_value= ON;
212+
source include/wait_for_status_var.inc;
213+
--connection slave
214+
let $status_var= rpl_semi_sync_slave_status;
215+
let $status_var_value= ON;
216+
source include/wait_for_status_var.inc;
217+
218+
--echo # Test case initial set-up
219+
--connection master
220+
create table t_36934 (a int) engine=innodb;
221+
--source include/save_master_gtid.inc
222+
--connection slave
223+
--source include/sync_with_master_gtid.inc
224+
225+
--echo # Pause the user transaction before inserting into Active_tranx
226+
--connect(user_con,localhost,root,,)
227+
SET debug_sync= 'semisync_at_write_tranx_in_binlog SIGNAL at_write_tranx_in_binlog WAIT_FOR resume_write_tranx_in_binlog';
228+
--send insert into t_36934 values (1)
229+
230+
--connection server_1
231+
set debug_sync="now wait_for at_write_tranx_in_binlog";
232+
233+
--echo # Disconnect the slave (note that the binlog dump thread won't yet be
234+
--echo # notified of a binlog update from the last transaction, so the slave
235+
--echo # should neither receiver nor ACK the transaction).
236+
--connection slave
237+
--source include/stop_slave.inc
238+
239+
--echo # Waiting for master to realize the slave has disconnected..
240+
--connection server_1
241+
let $status_var= rpl_semi_sync_master_clients;
242+
let $status_var_value= 0;
243+
source include/wait_for_status_var.inc;
244+
--echo # ..done
245+
246+
--echo # Resuming transaction (it will exit commit_trx early without waiting)
247+
set debug_sync="now signal resume_write_tranx_in_binlog";
248+
249+
--connection user_con
250+
--reap
251+
--let $user_con_tid= `SELECT connection_id()`
252+
--disconnect user_con
253+
--source include/wait_until_disconnected.inc
254+
255+
--echo # Force delete the user thread (FLUSH THREADS ensures the thread won't
256+
--echo # stay in the thread cache)
257+
--connection master
258+
FLUSH THREADS;
259+
260+
--echo # BUG: Re-connect slave. MDEV-36934 reports that the master would hang
261+
--echo # when the slave would re-connect and try to ACK the last transaction
262+
--echo # who's thread has been deleted
263+
--connection slave
264+
--source include/start_slave.inc
265+
266+
--echo # Try to commit another transaction (prior to MDEV-36934 fixes, this
267+
--echo # would hang indefinitely)
268+
--connection master
269+
set debug_sync="RESET";
270+
--send insert into t_36934 values (2)
271+
272+
--connection server_1
273+
--echo # Waiting 30s for last query to complete..
274+
--let $wait_timeout= 30
275+
--let $wait_condition= SELECT count(*)=0 FROM information_schema.processlist WHERE info LIKE 'insert into t_36934%';
276+
--source include/wait_condition.inc
277+
278+
# Variable `success` is set by wait_condition.inc
279+
if (!$success)
280+
{
281+
--echo # ..error
282+
--die Query is hung
283+
}
284+
285+
--connection master
286+
--reap
287+
--echo # ..done
288+
289+
290+
--echo # Cleanup
291+
--connection master
193292
set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point;
194293
set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave;
195294
set @@global.debug_dbug=@save_debug_dbug;
295+
drop table t_36934;
196296

197297
--source include/rpl_end.inc

sql/semisync_master.cc

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,20 @@ static ulonglong timespec_to_usec(const struct timespec *ts)
6868
return (ulonglong) ts->tv_sec * TIME_MILLION + ts->tv_nsec / TIME_THOUSAND;
6969
}
7070

71-
int signal_waiting_transaction(THD *waiting_thd, const char *binlog_file,
72-
my_off_t binlog_pos)
71+
static int
72+
signal_waiting_transaction(THD *waiting_thd, bool thd_valid,
73+
const char *binlog_file, my_off_t binlog_pos)
7374
{
7475
/*
7576
It is possible that the connection thd waiting for an ACK was killed. In
7677
such circumstance, the connection thread will nullify the thd member of its
7778
Active_tranx node. So before we try to signal, ensure the THD exists.
79+
80+
The thd_valid is only set while the THD is waiting in commit_trx(); this
81+
is defensive coding to not signal an invalid THD if we somewhere
82+
accidentally did not remove the transaction from the list.
7883
*/
79-
if (waiting_thd)
84+
if (waiting_thd && thd_valid)
8085
mysql_cond_signal(&waiting_thd->COND_wakeup_ready);
8186
return 0;
8287
}
@@ -182,6 +187,7 @@ int Active_tranx::insert_tranx_node(THD *thd_to_wait,
182187
ins_node->log_name[FN_REFLEN-1] = 0; /* make sure it ends properly */
183188
ins_node->log_pos = log_file_pos;
184189
ins_node->thd= thd_to_wait;
190+
ins_node->thd_valid= false;
185191

186192
if (!m_trx_front)
187193
{
@@ -263,7 +269,8 @@ void Active_tranx::clear_active_tranx_nodes(
263269
if ((log_file_name != NULL) &&
264270
compare(new_front, log_file_name, log_file_pos) > 0)
265271
break;
266-
pre_delete_hook(new_front->thd, new_front->log_name, new_front->log_pos);
272+
pre_delete_hook(new_front->thd, new_front->thd_valid,
273+
new_front->log_name, new_front->log_pos);
267274
new_front = new_front->next;
268275
}
269276

@@ -355,13 +362,17 @@ void Active_tranx::unlink_thd_as_waiter(const char *log_file_name,
355362
}
356363

357364
if (entry)
365+
{
358366
entry->thd= NULL;
367+
entry->thd_valid= false;
368+
}
359369

360370
DBUG_VOID_RETURN;
361371
}
362372

363-
bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
364-
my_off_t log_file_pos)
373+
Tranx_node *
374+
Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
375+
my_off_t log_file_pos)
365376
{
366377
DBUG_ENTER("Active_tranx::assert_thd_is_waiter");
367378
mysql_mutex_assert_owner(m_lock);
@@ -377,7 +388,7 @@ bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
377388
entry = entry->hash_next;
378389
}
379390

380-
DBUG_RETURN(static_cast<bool>(entry));
391+
DBUG_RETURN(entry);
381392
}
382393

383394
/*******************************************************************************
@@ -863,6 +874,10 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
863874

864875
if (!rpl_semi_sync_master_clients && !rpl_semi_sync_master_wait_no_slave)
865876
{
877+
lock();
878+
m_active_tranxs->unlink_thd_as_waiter(trx_wait_binlog_name,
879+
trx_wait_binlog_pos);
880+
unlock();
866881
rpl_semi_sync_master_no_transactions++;
867882
DBUG_RETURN(0);
868883
}
@@ -922,6 +937,9 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
922937
}
923938
}
924939

940+
Tranx_node *tranx_entry=
941+
m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name,
942+
trx_wait_binlog_pos);
925943
/* In between the binlogging of this transaction and this wait, it is
926944
* possible that our entry in Active_tranx was removed (i.e. if
927945
* semi-sync was switched off and on). It is also possible that the
@@ -932,8 +950,7 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
932950
* rpl_semi_sync_master_yes/no_tx consistent with it, we check for a
933951
* semi-sync restart _after_ checking the reply state.
934952
*/
935-
if (unlikely(!m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name,
936-
trx_wait_binlog_pos)))
953+
if (unlikely(!tranx_entry))
937954
{
938955
DBUG_EXECUTE_IF(
939956
"semisync_log_skip_trx_wait",
@@ -952,6 +969,16 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
952969
break;
953970
}
954971

972+
/*
973+
Mark that our THD is now valid for signalling to by the ack thread.
974+
It is important to ensure that we can never leave a no longer valid
975+
THD in the transaction list and signal it, eg. MDEV-36934. This way,
976+
we ensure the THD will only be signalled while this function is
977+
running, even in case of some incorrect error handling or similar
978+
that might leave a dangling THD in the list.
979+
*/
980+
tranx_entry->thd_valid= true;
981+
955982
/* Let us update the info about the minimum binlog position of waiting
956983
* threads.
957984
*/
@@ -1284,6 +1311,8 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd,
12841311

12851312
DBUG_ENTER("Repl_semi_sync_master::write_tranx_in_binlog");
12861313

1314+
DEBUG_SYNC(current_thd, "semisync_at_write_tranx_in_binlog");
1315+
12871316
lock();
12881317

12891318
/* This is the real check inside the mutex. */
@@ -1317,7 +1346,8 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd,
13171346
m_commit_file_name_inited = true;
13181347
}
13191348

1320-
if (is_on())
1349+
if (is_on() &&
1350+
(rpl_semi_sync_master_clients || rpl_semi_sync_master_wait_no_slave))
13211351
{
13221352
DBUG_ASSERT(m_active_tranxs != NULL);
13231353
if(m_active_tranxs->insert_tranx_node(thd, log_file_name, log_file_pos))

sql/semisync_master.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ extern PSI_cond_key key_COND_binlog_send;
3030

3131
struct Tranx_node {
3232
char log_name[FN_REFLEN];
33+
bool thd_valid; /* thd is valid for signalling */
3334
my_off_t log_pos;
3435
THD *thd; /* The thread awaiting an ACK */
3536
struct Tranx_node *next; /* the next node in the sorted list */
@@ -126,7 +127,9 @@ class Tranx_node_allocator
126127

127128
trx_node= &(current_block->nodes[++last_node]);
128129
trx_node->log_name[0] = '\0';
130+
trx_node->thd_valid= false;
129131
trx_node->log_pos= 0;
132+
trx_node->thd= nullptr;
130133
trx_node->next= 0;
131134
trx_node->hash_next= 0;
132135
return trx_node;
@@ -298,7 +301,8 @@ class Tranx_node_allocator
298301
its invocation. See the context in which it is called to know.
299302
*/
300303

301-
typedef int (*active_tranx_action)(THD *trx_thd, const char *log_file_name,
304+
typedef int (*active_tranx_action)(THD *trx_thd, bool thd_valid,
305+
const char *log_file_name,
302306
my_off_t trx_log_file_pos);
303307

304308
/**
@@ -381,8 +385,8 @@ class Active_tranx
381385
* matches the thread of the respective Tranx_node::thd of the passed in
382386
* log_file_name and log_file_pos.
383387
*/
384-
bool is_thd_waiter(THD *thd_to_check, const char *log_file_name,
385-
my_off_t log_file_pos);
388+
Tranx_node * is_thd_waiter(THD *thd_to_check, const char *log_file_name,
389+
my_off_t log_file_pos);
386390

387391
/* Given a position, check to see whether the position is an active
388392
* transaction's ending position by probing the hash table.

0 commit comments

Comments
 (0)