Skip to content

Commit

Permalink
MDEV-20821 parallel slave server shutdown hang
Browse files Browse the repository at this point in the history
Parallel slave server shutdown found to be hanging in
close_connections() triggered by shutdown due to a slave worker thread
would not be notified to exit in case the worker was sitting idle.

Fixed with destroying the worker pool earlier that is in
slave_prepare_for_shutdown() when all their driver threads have already left.
A test file is added to simulate the bug condition as well as check
multi-sourced and not-idle worker cases.
  • Loading branch information
andrelkin authored and mariadb-SachinSetiya committed May 14, 2021
1 parent ec348f5 commit 3616640
Show file tree
Hide file tree
Showing 4 changed files with 266 additions and 0 deletions.
79 changes: 79 additions & 0 deletions mysql-test/suite/rpl/r/rpl_slave_shutdown_mdev20821.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
include/rpl_init.inc [topology=1->3]
connection server_3;
set default_master_connection = '';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
set default_master_connection = 'm2';
change master to master_host='127.0.0.1', master_port=SERVER_MYPORT_2, master_user='root', master_use_gtid=slave_pos;
include/start_slave.inc
select @@global.slave_parallel_workers as two;
two
2
connection server_3;
SHUTDOWN;
connection server_3;
connection server_3;
connection server_1;
create table t1 (i int primary key) engine=Innodb;
connection server_2;
create table t2 (i int primary key) engine=Innodb;
connection server_3;
set default_master_connection = '';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
set default_master_connection = 'm2';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
connection server_2;
insert into t2 values (1);
connection server_3;
connection server_1;
insert into t1 values (1);
connection server_3;
connection server_3;
SHUTDOWN;
connection server_3;
connection server_3;
connection server_3;
set default_master_connection = '';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
set default_master_connection = 'm2';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
connect conn_block_server3, 127.0.0.1, root,, test, $SERVER_MYPORT_3,;
begin;
insert into t1 values (2);
insert into t2 values (2);
connection server_1;
insert into t1 values (2);
connection server_2;
insert into t2 values (2);
connection server_3;
SHUTDOWN;
connection server_3;
connection server_3;
connection server_3;
set default_master_connection = '';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
set default_master_connection = 'm2';
include/start_slave.inc
Warnings:
Note 1254 Slave is already running
connection server_1;
drop table t1;
connection server_2;
drop table t2;
connection server_3;
set default_master_connection = 'm2';
include/stop_slave.inc
RESET SLAVE ALL;
set default_master_connection = '';
include/rpl_end.inc
19 changes: 19 additions & 0 deletions mysql-test/suite/rpl/t/rpl_slave_shutdown_mdev20821.cnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
!include suite/rpl/rpl_1slave_base.cnf
!include include/default_client.cnf

[mysqld.1]
log-slave-updates
gtid-domain-id=1

[mysqld.2]
log-slave-updates
gtid-domain-id=2

[mysqld.3]
log-slave-updates
gtid-domain-id=3
slave_parallel_threads=2

[ENV]
SERVER_MYPORT_3= @mysqld.3.port
SERVER_MYSOCK_3= @mysqld.3.socket
165 changes: 165 additions & 0 deletions mysql-test/suite/rpl/t/rpl_slave_shutdown_mdev20821.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# MDEV-20821 parallel slave server shutdown hang
#
# Test the bug condition of a parallel slave server shutdown
# hang when the parallel workers were idle.
# The bug reported scenario is extented to cover the multi-sources case as well as
# checking is done for both the idle and busy workers cases.

--source include/have_innodb.inc
--source include/have_binlog_format_mixed.inc
--let $rpl_topology= 1->3
--source include/rpl_init.inc

#
# A. idle workers.
#
--connection server_3
set default_master_connection = '';
--source include/start_slave.inc

set default_master_connection = 'm2';
--replace_result $SERVER_MYPORT_2 SERVER_MYPORT_2
eval change master to master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, master_user='root', master_use_gtid=slave_pos;
--source include/start_slave.inc

select @@global.slave_parallel_workers as two;

# At this point worker threads have no assignement.
# Shutdown must not hang.

--connection server_3
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
wait
EOF
--send SHUTDOWN
--reap
--source include/wait_until_disconnected.inc

--connection server_3
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
restart
EOF

# No hang is *proved* to occur when this point is reached.
--connection server_3
--enable_reconnect
--source include/wait_until_connected_again.inc

#
# B. resting workers after some busy time
#
--connection server_1
create table t1 (i int primary key) engine=Innodb;

--connection server_2
create table t2 (i int primary key) engine=Innodb;

--connection server_3
set default_master_connection = '';
--source include/start_slave.inc

set default_master_connection = 'm2';
--source include/start_slave.inc

--connection server_2
insert into t2 values (1);
--save_master_pos

--connection server_3
--sync_with_master 0,'m2'

--connection server_1
insert into t1 values (1);
--save_master_pos

--connection server_3
--sync_with_master 0,''

# At this point worker threads have no assignement.
# Shutdown must not hang.

--connection server_3
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
wait
EOF
--send SHUTDOWN
--reap
--source include/wait_until_disconnected.inc

--connection server_3
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
restart
EOF

# No hang is *proved* to occur when this point is reached.
--connection server_3
--enable_reconnect
--source include/wait_until_connected_again.inc

#
# C. busy workers
#
--connection server_3
set default_master_connection = '';
--source include/start_slave.inc

set default_master_connection = 'm2';
--source include/start_slave.inc

--connect (conn_block_server3, 127.0.0.1, root,, test, $SERVER_MYPORT_3,)
begin;
insert into t1 values (2);
insert into t2 values (2);

--connection server_1
insert into t1 values (2);
--connection server_2
insert into t2 values (2);


# At this point there's a good chance the worker threads are busy.
# SHUTDOWN must proceed without any delay as above.
--connection server_3
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
wait
EOF
--send SHUTDOWN
--reap
--source include/wait_until_disconnected.inc

--connection server_3
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
restart
EOF

# No hang is *proved* to occur when this point is reached.
--connection server_3
--enable_reconnect
--source include/wait_until_connected_again.inc


# Cleanup

--connection server_3
set default_master_connection = '';
--source include/start_slave.inc

set default_master_connection = 'm2';
--source include/start_slave.inc

--connection server_1
drop table t1;

--connection server_2
drop table t2;
--save_master_pos

# (!) The following block is critical to avoid check-mysqld_3.reject by mtr:
--connection server_3
--sync_with_master 0,'m2'
set default_master_connection = 'm2';
--source include/stop_slave.inc
RESET SLAVE ALL;
set default_master_connection = '';

--source include/rpl_end.inc
3 changes: 3 additions & 0 deletions sql/slave.cc
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,9 @@ void slave_prepare_for_shutdown()
mysql_mutex_lock(&LOCK_active_mi);
master_info_index->free_connections();
mysql_mutex_unlock(&LOCK_active_mi);
// It's safe to destruct worker pool now when
// all driver threads are gone.
global_rpl_thread_pool.destroy();
}

/*
Expand Down

0 comments on commit 3616640

Please sign in to comment.