Skip to content
Permalink
Browse files

MDEV-19746: Galera test failures because of wsrep_slave_threads ident…

…ification

Problem was that tests select INFORMATION_SCHEMA.PROCESSLIST processes
from user system user and empty state. Thus, there is not clear
state for slave threads.

Changes:
- Added new status variables that store current amount of applier threads
(wsrep_applier_thread_count) and rollbacker threads
(wsrep_rollbacker_thread_count). This will make clear how many slave threads
of certain type there is.
- Added THD state "wsrep applier idle" when applier slave thread is
waiting for work. This makes finding slave/applier threads easier.
- Added force-restart option for mtr to always restart servers between tests
to avoid race on start of the test
- Added wait_condition_with_debug to wait until the passed statement returns
true, or the operation times out. If operation times out, the additional error
statement will be executed

Changes to be committed:
	new file:   mysql-test/include/force_restart.inc
	new file:   mysql-test/include/wait_condition_with_debug.inc
	modified:   mysql-test/mysql-test-run.pl
	modified:   mysql-test/suite/galera/disabled.def
	modified:   mysql-test/suite/galera/r/MW-336.result
	modified:   mysql-test/suite/galera/r/galera_kill_applier.result
	modified:   mysql-test/suite/galera/r/galera_var_slave_threads.result
	new file:   mysql-test/suite/galera/t/MW-336.cnf
	modified:   mysql-test/suite/galera/t/MW-336.test
	modified:   mysql-test/suite/galera/t/galera_kill_applier.test
	modified:   mysql-test/suite/galera/t/galera_parallel_autoinc_largetrx.test
	modified:   mysql-test/suite/galera/t/galera_parallel_autoinc_manytrx.test
	modified:   mysql-test/suite/galera/t/galera_var_slave_threads.test
	modified:   mysql-test/suite/wsrep/disabled.def
	modified:   mysql-test/suite/wsrep/r/variables.result
	modified:   mysql-test/suite/wsrep/t/variables.test
	modified:   sql/mysqld.cc
	modified:   sql/wsrep_mysqld.cc
	modified:   sql/wsrep_mysqld.h
	modified:   sql/wsrep_thd.cc
	modified:   sql/wsrep_var.cc
  • Loading branch information...
janlindstrom committed Jul 11, 2019
1 parent b3bd51c commit ec49976e383ae42650fb99e7477c072550d89876
@@ -0,0 +1,14 @@
# ==== Purpose ====
#
# Tell mtr that all servers must be restarted after the test has
# finished.
#
# ==== Usage ====
#
# --source include/force_restart.inc
#

--let $_force_restart_datadir= `SELECT @@datadir`
--append_file $_force_restart_datadir/mtr/force_restart
1
EOF
@@ -0,0 +1,69 @@
# include/wait_condition_with_debug.inc
#
# SUMMARY
#
# Waits until the passed statement returns true, or the operation
# times out. If the operation times out, the additional error
# statement will be executed.
#
# USAGE
#
# let $wait_condition=
# SELECT c = 3 FROM t;
# let $wait_condition_on_error_output= select count(*) from t;
# [let $explicit_default_wait_timeout= N] # to override the default reset
# --source include/wait_condition_with_debug.inc
#
# OR
#
# let $wait_timeout= 60; # Override default 30 seconds with 60.
# let $wait_condition=
# SELECT c = 3 FROM t;
# let $wait_condition_on_error_output= select count(*) from t;
# --source include/wait_condition_with_debug.inc
# --echo Executed the test condition $wait_condition_reps times
#
#
# EXAMPLE
# events_bugs.test, events_time_zone.test
#

let $wait_counter= 300;
if ($wait_timeout)
{
let $wait_counter= `SELECT $wait_timeout * 10`;
}
# Reset $wait_timeout so that its value won't be used on subsequent
# calls, and default will be used instead.
if ($explicit_default_wait_timeout)
{
--let $wait_timeout= $explicit_default_wait_timeout
}
if (!$explicit_default_wait_timeout)
{
--let $wait_timeout= 0
}

# Keep track of how many times the wait condition is tested
# This is used by some tests (e.g., main.status)
let $wait_condition_reps= 0;
while ($wait_counter)
{
--error 0,ER_NO_SUCH_TABLE,ER_LOCK_WAIT_TIMEOUT,ER_UNKNOWN_COM_ERROR,ER_LOCK_DEADLOCK
let $success= `$wait_condition`;
inc $wait_condition_reps;
if ($success)
{
let $wait_counter= 0;
}
if (!$success)
{
real_sleep 0.1;
dec $wait_counter;
}
}
if (!$success)
{
echo Timeout in wait_condition.inc for $wait_condition;
--eval $wait_condition_on_error_output
}
@@ -842,6 +842,8 @@ ($$$)
next if (defined $t->{reserved} and $t->{reserved} != $wid);
if (! defined $t->{reserved})
{
# Force-restart not relevant when comparing *next* test
$t->{criteria} =~ s/force-restart$/no-restart/;
my $criteria= $t->{criteria};
# Reserve similar tests for this worker, but not too many
my $maxres= (@$tests - $i) / $opt_parallel + 1;
@@ -3699,6 +3701,25 @@ sub find_analyze_request
return $analyze;
}

# The test can leave a file in var/tmp/ to signal
# that all servers should be restarted
sub restart_forced_by_test($)
{
my $file = shift;
my $restart = 0;
foreach my $mysqld ( mysqlds() )
{
my $datadir = $mysqld->value('datadir');
my $force_restart_file = "$datadir/mtr/$file";
if ( -f $force_restart_file )
{
mtr_verbose("Restart of servers forced by test");
$restart = 1;
last;
}
}
return $restart;
}

# Return timezone value of tinfo or default value
sub timezone {
@@ -4041,8 +4062,12 @@ ($$)
if ( $res == 0 )
{
my $check_res;
if ( $opt_check_testcases and
$check_res= check_testcase($tinfo, "after"))
if ( restart_forced_by_test('force_restart') )
{
stop_all_servers($opt_shutdown_timeout);
}
elsif ( $opt_check_testcases and
$check_res= check_testcase($tinfo, "after"))
{
if ($check_res == 1) {
# Test case had sideeffects, not fatal error, just continue
@@ -4077,7 +4102,8 @@ ($$)
find_testcase_skipped_reason($tinfo);
mtr_report_test_skipped($tinfo);
# Restart if skipped due to missing perl, it may have had side effects
if ( $tinfo->{'comment'} =~ /^perl not found/ )
if ( restart_forced_by_test('force_restart_if_skipped') ||
$tinfo->{'comment'} =~ /^perl not found/ )
{
stop_all_servers($opt_shutdown_timeout);
}
@@ -5227,6 +5253,11 @@ sub server_need_restart {
return 0;
}

if ( $tinfo->{'force_restart'} ) {
mtr_verbose_restart($server, "forced in .opt file");
return 1;
}

if ( $opt_force_restart ) {
mtr_verbose_restart($server, "forced restart turned on");
return 1;
@@ -6209,6 +6240,7 @@ ($)
servers to exit before finishing the process
fast Run as fast as possible, don't wait for servers
to shutdown etc.
force-restart Always restart servers between tests
parallel=N Run tests in N parallel threads (default 1)
Use parallel=auto for auto-setting of N
repeat=N Run each test N number of times
@@ -15,7 +15,6 @@ MW-328A : MDEV-17847 Galera test failure on MW-328[A|B|C]
MW-328B : MDEV-17847 Galera test failure on MW-328[A|B|C]
MW-328C : MDEV-17847 Galera test failure on MW-328[A|B|C]
MW-329 : MDEV-19962 Galera test failure on MW-329
MW-336 : MDEV-19746 Galera test failures because of wsrep_slave_threads identification
MW-388: MDEV-19803 Long semaphore wait error on galera.MW-388
galera_account_management : MariaDB 10.0 does not support ALTER USER
galera_as_master_gtid : Requires MySQL GTID
@@ -29,12 +28,8 @@ galera_flush : MariaDB does not have global.thread_statistics
galera_gcache_recover_manytrx : MDEV-18834 Galera test failure
galera_ist_mariabackup : MDEV-18829 test leaves port open
galera_ist_progress : MDEV-15236 fails when trying to read transfer status
galera_kill_applier : MDEV-19746 Galera test failures because of wsrep_slave_threads identification
galera_migrate : MariaDB does not support START SLAVE USER
galera_parallel_autoinc_largetrx : MDEV-19746 Galera test failures because of wsrep_slave_threads identification
galera_parallel_autoinc_manytrx : MDEV-19746 Galera test failures because of wsrep_slave_threads identification
galera_ssl_upgrade : MDEV-19950 Galera test failure on galera_ssl_upgrade
galera_sst_mariabackup_encrypt_with_key : MDEV-19926 Galera SST tests fail
galera_var_slave_threads : MDEV-19746 Galera test failures because of wsrep_slave_threads identification
galera_wan : MDEV-17259: Test failure on galera.galera_wan
partition : MDEV-19958 Galera test failure on galera.partition
@@ -1,39 +1,51 @@
CREATE TABLE t1 (f1 INTEGER) Engine=InnoDB;
INSERT INTO t1 values(0);
connection node_1;
CREATE TABLE t1 (f1 INTEGER) Engine=InnoDB;
SET GLOBAL wsrep_slave_threads = 10;
# Set slave threads to 10 step 1
SET GLOBAL wsrep_slave_threads = 1;
# Wait 10 slave threads to start 1
connection node_2;
SET SESSION wsrep_sync_wait=15;
# Generate 100 replication events
INSERT INTO t1 VALUES (1);
connection node_1;
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM t1;
COUNT(*)
101
# Wait 9 slave threads to exit 1
1
SET GLOBAL wsrep_slave_threads = 10;
# Wait 10 slave threads to start 2
# Set slave threads to 10 step 2
SET GLOBAL wsrep_slave_threads = 20;
# Wait 20 slave threads to start 3
# Set slave threads to 20
SET GLOBAL wsrep_slave_threads = 1;
connection node_2;
# Generate 100 replication events
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
INSERT INTO t1 VALUES (3);
INSERT INTO t1 VALUES (4);
INSERT INTO t1 VALUES (5);
INSERT INTO t1 VALUES (6);
INSERT INTO t1 VALUES (7);
INSERT INTO t1 VALUES (8);
INSERT INTO t1 VALUES (9);
INSERT INTO t1 VALUES (10);
connection node_1;
SET GLOBAL wsrep_slave_threads = 10;
SELECT COUNT(*) FROM t1;
COUNT(*)
201
# Wait 10 slave threads to exit 3
SET GLOBAL wsrep_slave_threads = 10;
SET GLOBAL wsrep_slave_threads = 1;
# Wait 10 slave threads to start 3
11
# Set slave threads to 10 step 3
connection node_2;
# Generate 100 replication events
INSERT INTO t1 VALUES (11);
INSERT INTO t1 VALUES (12);
INSERT INTO t1 VALUES (13);
INSERT INTO t1 VALUES (14);
INSERT INTO t1 VALUES (15);
INSERT INTO t1 VALUES (16);
INSERT INTO t1 VALUES (17);
INSERT INTO t1 VALUES (18);
INSERT INTO t1 VALUES (19);
INSERT INTO t1 VALUES (20);
connection node_1;
SELECT COUNT(*) FROM t1;
COUNT(*)
301
# Wait 10 slave threads to exit 4
connection node_1;
21
SET GLOBAL wsrep_slave_threads = 1;
DROP TABLE t1;
@@ -1,5 +1,18 @@
connection node_1;
connection node_2;
SET GLOBAL wsrep_slave_threads=2;
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
SET GLOBAL wsrep_slave_threads=1;
connection node_1;
create table t1(a int not null primary key) engine=innodb;
insert into t1 values (1);
insert into t1 values (2);
connection node_2;
set global wsrep_sync_wait=15;
select count(*) from t1;
count(*)
2
connection node_1;
drop table t1;
@@ -16,22 +16,19 @@ SET GLOBAL wsrep_slave_threads = 1;
SELECT COUNT(*) FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE LIKE '%wsrep aborter%';
COUNT(*)
1
SET GLOBAL wsrep_slave_threads = 64;
connection node_1;
INSERT INTO t1 VALUES (1);
connection node_2;
SELECT COUNT(*) = 1 FROM t1;
COUNT(*) = 1
SELECT COUNT(*) FROM t1;
COUNT(*)
1
SET GLOBAL wsrep_slave_threads = 64;
SET GLOBAL wsrep_slave_threads = 1;
connection node_1;
connection node_2;
SELECT COUNT(*) FROM t2;
COUNT(*)
64
SELECT COUNT(*) FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE LIKE '%wsrep aborter%';
COUNT(*)
1
70
SET GLOBAL wsrep_slave_threads = 1;
DROP TABLE t1;
DROP TABLE t2;
@@ -47,11 +44,12 @@ connection node_1;
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
INSERT INTO t1 VALUES (DEFAULT);
DROP TABLE t1;
connection node_2;
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t%';
NAME
SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE LIKE '%wsrep aborter%';
COUNT(*) = 1
1
# End of tests
@@ -0,0 +1,8 @@
!include ../galera_2nodes.cnf

[mysqld.1]
wsrep-debug=ON

[mysqld.2]
wsrep-debug=ON

0 comments on commit ec49976

Please sign in to comment.
You can’t perform that action at this time.