MDEV-24280 InnoDB triggers too many independent periodic tasks

dr-m · dr-m · commit 657fcdf430f3 · 2020-11-25T16:54:00.000+02:00
A side effect of MDEV-16264 is that a large number of threads will be created at server startup, to be destroyed after a minute or two. One source of such thread creation is srv_start_periodic_timer(). InnoDB is creating 3 periodic tasks: srv_master_callback (1Hz) srv_error_monitor_task (1Hz), and srv_monitor_task (0.2Hz). It appears that we can merge srv_error_monitor_task and srv_monitor_task and have them invoked 4 times per minute (every 15 seconds). This will affect our ability to enforce innodb_fatal_semaphore_wait_threshold and some computations around BUF_LRU_STAT_N_INTERVAL. We could remove srv_master_callback along with the DROP TABLE queue at some point of time in the future. We must keep it independent of the innodb_fatal_semaphore_wait_threshold detection, because the background DROP TABLE queue could get stuck due to dict_sys being locked by another thread. For now, srv_master_callback must be invoked once per second, so that innodb_flush_log_at_timeout=1 can work. BUF_LRU_STAT_N_INTERVAL: Reduce the precision and extend the time from 50*1 second to 4*15 seconds. srv_error_monitor_timer: Remove. MAX_MUTEX_NOWAIT: Increase from 20*1 second to 2*15 seconds. srv_refresh_innodb_monitor_stats(): Avoid a repeated call to time(NULL). Change the interval to less than 60 seconds. srv_monitor(): Renamed from srv_monitor_task. srv_monitor_task(): Renamed from srv_error_monitor_task(). Invoked only once in 15 seconds. Invoke also srv_monitor(). Increase the fatal_cnt threshold from 10*1 second to 1*15 seconds. sync_array_print_long_waits_low(): Invoke time(NULL) only once. Remove a bogus message about printouts for 30 seconds. Those printouts were effectively already disabled in MDEV-16264 (commit 5e62b6a).
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
@@ -76,13 +76,12 @@ uncompressed and compressed data), which must be clean. */
 /* @{ */
 
 /** Number of intervals for which we keep the history of these stats.
-Each interval is 1 second, defined by the rate at which
-srv_error_monitor_thread() calls buf_LRU_stat_update(). */
-static const ulint BUF_LRU_STAT_N_INTERVAL = 50;
+Updated at SRV_MONITOR_INTERVAL (the buf_LRU_stat_update() call rate). */
+static constexpr ulint BUF_LRU_STAT_N_INTERVAL= 4;
 
 /** Co-efficient with which we multiply I/O operations to equate them
 with page_zip_decompress() operations. */
-static const ulint BUF_LRU_IO_TO_UNZIP_FACTOR = 50;
+static constexpr ulint BUF_LRU_IO_TO_UNZIP_FACTOR= 50;
 
 /** Sampled values buf_LRU_stat_cur.
 Not protected by any mutex.  Updated by buf_LRU_stat_update(). */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
@@ -702,13 +702,6 @@ Complete the shutdown tasks such as background DROP TABLE,
 and optionally change buffer merge (on innodb_fast_shutdown=0). */
 void srv_shutdown(bool ibuf_merge);
 
-
-/*************************************************************************
-A task which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs.
-*/
-void srv_error_monitor_task(void*);
-
 } /* extern "C" */
 
 #ifdef UNIV_DEBUG
@@ -900,12 +893,14 @@ struct srv_slot_t{
 
 extern tpool::thread_pool *srv_thread_pool;
 extern std::unique_ptr<tpool::timer> srv_master_timer;
-extern std::unique_ptr<tpool::timer> srv_error_monitor_timer;
 extern std::unique_ptr<tpool::timer> srv_monitor_timer;
 
+/** The interval at which srv_monitor_task is invoked, in milliseconds */
+constexpr unsigned SRV_MONITOR_INTERVAL= 15000; /* 4 times per minute */
+
 static inline void srv_monitor_timer_schedule_now()
 {
-  srv_monitor_timer->set_time(0, 5000);
+  srv_monitor_timer->set_time(0, SRV_MONITOR_INTERVAL);
 }
 static inline void srv_start_periodic_timer(std::unique_ptr<tpool::timer>& t,
                                             void (*func)(void*), int period)
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
@@ -1011,7 +1011,6 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
 		!srv_read_only_mode && srv_fast_shutdown < 2) {
 		buf_dump_start();
 	}
-	srv_error_monitor_timer.reset();
 	srv_monitor_timer.reset();
 	lock_sys.timeout_timer.reset();
 	if (do_srv_shutdown) {
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
@@ -186,7 +186,7 @@ tpool::thread_pool* srv_thread_pool;
 
 /** Maximum number of times allowed to conditionally acquire
 mutex before switching to blocking wait on the mutex */
-#define MAX_MUTEX_NOWAIT	20
+#define MAX_MUTEX_NOWAIT	2
 
 /** Check whether the number of failed nonblocking mutex
 acquisition attempts exceeds maximum allowed value. If so,
@@ -555,8 +555,7 @@ struct purge_coordinator_state
 
 static purge_coordinator_state purge_state;
 
-/** threadpool timer for srv_error_monitor_task(). */
-std::unique_ptr<tpool::timer> srv_error_monitor_timer;
+/** threadpool timer for srv_monitor_task() */
 std::unique_ptr<tpool::timer> srv_monitor_timer;
 
 
@@ -769,16 +768,11 @@ srv_boot(void)
 
 /******************************************************************//**
 Refreshes the values used to calculate per-second averages. */
-static
-void
-srv_refresh_innodb_monitor_stats(void)
-/*==================================*/
+static void srv_refresh_innodb_monitor_stats(time_t current_time)
 {
 	mutex_enter(&srv_innodb_monitor_mutex);
 
-	time_t current_time = time(NULL);
-
-	if (difftime(current_time, srv_last_monitor_time) <= 60) {
+	if (difftime(current_time, srv_last_monitor_time) < 60) {
 		/* We referesh InnoDB Monitor values so that averages are
 		printed from at most 60 last seconds */
 		mutex_exit(&srv_innodb_monitor_mutex);
@@ -1309,26 +1303,18 @@ struct srv_monitor_state_t
 static srv_monitor_state_t monitor_state;
 
 /** A task which prints the info output by various InnoDB monitors.*/
-void srv_monitor_task(void*)
+static void srv_monitor()
 {
-	double		time_elapsed;
-	time_t		current_time;
-
-	ut_ad(!srv_read_only_mode);
-
-	current_time = time(NULL);
-
-	time_elapsed = difftime(current_time, monitor_state.last_monitor_time);
+	time_t current_time = time(NULL);
 
-	if (time_elapsed > 15) {
+	if (difftime(current_time, monitor_state.last_monitor_time) >= 15) {
 		monitor_state.last_monitor_time = current_time;
 
 		if (srv_print_innodb_monitor) {
 			/* Reset mutex_skipped counter everytime
 			srv_print_innodb_monitor changes. This is to
 			ensure we will not be blocked by lock_sys.mutex
-			for short duration information printing,
-			such as requested by sync_array_print_long_waits() */
+			for short duration information printing */
 			if (!monitor_state.last_srv_print_monitor) {
 				monitor_state.mutex_skipped = 0;
 				monitor_state.last_srv_print_monitor = true;
@@ -1366,14 +1352,14 @@ void srv_monitor_task(void*)
 		}
 	}
 
-	srv_refresh_innodb_monitor_stats();
+	srv_refresh_innodb_monitor_stats(current_time);
 }
 
 /*********************************************************************//**
 A task which prints warnings about semaphore waits which have lasted
 too long. These can be used to track bugs which cause hangs.
 */
-void srv_error_monitor_task(void*)
+void srv_monitor_task(void*)
 {
 	/* number of successive fatal timeouts observed */
 	static ulint		fatal_cnt;
@@ -1408,20 +1394,17 @@ void srv_error_monitor_task(void*)
 	if (sync_array_print_long_waits(&waiter, &sema)
 	    && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
 #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
-	  if (os_event_is_set(srv_allow_writes_event)) {
-#endif /* WITH_WSREP */
-		fatal_cnt++;
-#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
-	  } else {
-		fprintf(stderr,
-			"WSREP: avoiding InnoDB self crash due to long "
-			"semaphore wait of  > %lu seconds\n"
-			"Server is processing SST donor operation, "
-			"fatal_cnt now: " ULINTPF,
-			srv_fatal_semaphore_wait_threshold, fatal_cnt);
-	  }
+		if (!os_event_is_set(srv_allow_writes_event)) {
+			fprintf(stderr,
+				"WSREP: avoiding InnoDB self crash due to "
+				"long semaphore wait of  > %lu seconds\n"
+				"Server is processing SST donor operation, "
+				"fatal_cnt now: " ULINTPF,
+				srv_fatal_semaphore_wait_threshold, fatal_cnt);
+			return;
+		}
 #endif /* WITH_WSREP */
-		if (fatal_cnt > 10) {
+		if (fatal_cnt++) {
 			ib::fatal() << "Semaphore wait has lasted > "
 				<< srv_fatal_semaphore_wait_threshold
 				<< " seconds. We intentionally crash the"
@@ -1432,6 +1415,8 @@ void srv_error_monitor_task(void*)
 		old_waiter = waiter;
 		old_sema = sema;
 	}
+
+	srv_monitor();
 }
 
 /******************************************************************//**
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
@@ -1826,8 +1826,8 @@ dberr_t srv_start(bool create_new_db)
 
 		DBUG_EXECUTE_IF("innodb_skip_monitors", goto skip_monitors;);
 		/* Create the task which warns of long semaphore waits */
-		srv_start_periodic_timer(srv_error_monitor_timer, srv_error_monitor_task, 1000);
-		srv_start_periodic_timer(srv_monitor_timer, srv_monitor_task, 5000);
+		srv_start_periodic_timer(srv_monitor_timer, srv_monitor_task,
+					 SRV_MONITOR_INTERVAL);
 
 #ifndef DBUG_OFF
 skip_monitors:
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc
@@ -893,6 +893,7 @@ sync_array_print_long_waits_low(
 #else
 # define SYNC_ARRAY_TIMEOUT	240
 #endif
+	const time_t now = time(NULL);
 
 	for (ulint i = 0; i < arr->n_cells; i++) {
 
@@ -908,7 +909,7 @@ sync_array_print_long_waits_low(
 			continue;
 		}
 
-		double	diff = difftime(time(NULL), cell->reservation_time);
+		double	diff = difftime(now, cell->reservation_time);
 
 		if (diff > SYNC_ARRAY_TIMEOUT) {
 			ib::warn() << "A long semaphore wait:";
@@ -982,12 +983,6 @@ sync_array_print_long_waits(
 	}
 
 	if (noticed) {
-		fprintf(stderr,
-			"InnoDB: ###### Starts InnoDB Monitor"
-			" for 30 secs to print diagnostic info:\n");
-
-		my_bool old_val = srv_print_innodb_monitor;
-
 		/* If some crucial semaphore is reserved, then also the InnoDB
 		Monitor can hang, and we do not get diagnostics. Since in
 		many cases an InnoDB hang is caused by a pwrite() or a pread()
@@ -1000,14 +995,7 @@ sync_array_print_long_waits(
 			MONITOR_VALUE(MONITOR_OS_PENDING_READS),
 			MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
 
-		srv_print_innodb_monitor = TRUE;
-
 		lock_wait_timeout_task(nullptr);
-
-		srv_print_innodb_monitor = static_cast<my_bool>(old_val);
-		fprintf(stderr,
-			"InnoDB: ###### Diagnostic info printed"
-			" to the standard error stream\n");
 	}
 
 	return(fatal);

Original file line number	Diff line number	Diff line change
`@@ -1011,7 +1011,6 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()`
`1011`	`1011`	`!srv_read_only_mode && srv_fast_shutdown < 2) {`
`1012`	`1012`	`buf_dump_start();`
`1013`	`1013`	`}`
`1014`		`- srv_error_monitor_timer.reset();`
`1015`	`1014`	`srv_monitor_timer.reset();`
`1016`	`1015`	`lock_sys.timeout_timer.reset();`
`1017`	`1016`	`if (do_srv_shutdown) {`