Skip to content

Commit 1479273

Browse files
grooverdandr-m
authored andcommitted
MDEV-14705: slow innodb startup/shutdown can exceed systemd timeout
Use systemd EXTEND_TIMEOUT_USEC to advise systemd of progress Move towards progress measures rather than pure time based measures. Progress reporting at numberious shutdown/startup locations incuding: * For innodb_fast_shutdown=0 trx_roll_must_shutdown() for rolling back incomplete transactions. * For merging the change buffer (in srv_shutdown(bool ibuf_merge)) * For purging history, srv_do_purge Thanks Marko for feedback and suggestions.
1 parent e7f4e61 commit 1479273

File tree

18 files changed

+151
-30
lines changed

18 files changed

+151
-30
lines changed

include/my_systemd.h renamed to include/my_service_manager.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
*/
1717

1818

19-
#ifndef MY_SYSTEMD_INCLUDED
20-
#define MY_SYSTEMD_INCLUDED
19+
#ifndef MY_SERVICE_MANAGER_INCLUDED
20+
#define MY_SERVICE_MANAGER_INCLUDED
2121

2222
#if defined(HAVE_SYSTEMD) && !defined(EMBEDDED_LIBRARY)
2323
/*
@@ -26,9 +26,14 @@
2626
*/
2727
#define __STDC_FORMAT_MACROS
2828
#include <systemd/sd-daemon.h>
29+
/** INTERVAL in seconds followed by printf style status */
30+
#define service_manager_extend_timeout(INTERVAL, FMTSTR, ...) \
31+
sd_notifyf(0, "STATUS=" FMTSTR "\nEXTEND_TIMEOUT_USEC=%u\n", ##__VA_ARGS__, INTERVAL * 1000000)
32+
2933
#else
3034
#define sd_notify(X, Y)
3135
#define sd_notifyf(E, F, ...)
36+
#define service_manager_extend_timeout(I, FMTSTR, ...)
3237
#endif
3338

34-
#endif /* MY_SYSTEMD_INCLUDED */
39+
#endif /* MY_SERVICE_MANAGER_INCLUDED */

sql/mysqld.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@
111111
#include <poll.h>
112112
#endif
113113

114-
#include <my_systemd.h>
114+
#include <my_service_manager.h>
115115

116116
#define mysqld_charset &my_charset_latin1
117117

@@ -6482,7 +6482,7 @@ void handle_connections_sockets()
64826482
#endif
64836483

64846484
sd_notify(0, "READY=1\n"
6485-
"STATUS=Taking your SQL requests now...");
6485+
"STATUS=Taking your SQL requests now...\n");
64866486

64876487
DBUG_PRINT("general",("Waiting for connections."));
64886488
MAYBE_BROKEN_SYSCALL;
@@ -6700,7 +6700,7 @@ void handle_connections_sockets()
67006700
set_current_thd(0);
67016701
}
67026702
sd_notify(0, "STOPPING=1\n"
6703-
"STATUS=Shutdown in progress");
6703+
"STATUS=Shutdown in progress\n");
67046704
DBUG_VOID_RETURN;
67056705
}
67066706

storage/innobase/buf/buf0dump.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Created April 08, 2011 Vasil Dimov
4242
#include "ut0byte.h" /* ut_ull_create() */
4343
#include "ut0sort.h" /* UT_SORT_FUNCTION_BODY */
4444
#include "mysql/service_wsrep.h" /* wsrep_recovery */
45+
#include <my_service_manager.h>
4546

4647
enum status_severity {
4748
STATUS_INFO,
@@ -333,6 +334,14 @@ buf_dump(
333334
i + 1, srv_buf_pool_instances,
334335
j + 1, n_pages);
335336
}
337+
if ( (j % 1024) == 0) {
338+
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
339+
"Dumping buffer pool "
340+
ULINTPF "/" ULINTPF ", "
341+
"page " ULINTPF "/" ULINTPF,
342+
i + 1, srv_buf_pool_instances,
343+
j + 1, n_pages);
344+
}
336345
}
337346

338347
ut_free(dump);

storage/innobase/buf/buf0flu.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ Created 11/11/1995 Heikki Tuuri
5454
#include "mysql/service_thd_wait.h"
5555
#include "fil0pagecompress.h"
5656

57+
#include <my_service_manager.h>
58+
5759
/** Number of pages flushed through non flush_list flushes. */
5860
static ulint buf_lru_flush_page_count = 0;
5961

@@ -528,6 +530,14 @@ buf_flush_remove(
528530
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
529531
ulint zip_size;
530532

533+
/* TODO Marko: "I think that it might be useful to have a global time base for the shutdown progress reporting and extending the timeout intervals." */
534+
if (UNIV_UNLIKELY(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE)) {
535+
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
536+
"Flush and remove page with tablespace id %d"
537+
", Poolid %d, flush list length %d",
538+
bpage->space, buf_pool->instance_no, UT_LIST_GET_LEN(buf_pool->flush_list));
539+
}
540+
531541
ut_ad(buf_pool_mutex_own(buf_pool));
532542
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
533543
ut_ad(bpage->in_flush_list);

storage/innobase/handler/ha_innodb.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ MYSQL_PLUGIN_IMPORT extern char mysql_unpacked_real_data_home[];
5555
#include <io.h>
5656
#endif
5757

58-
#include <my_systemd.h>
58+
#include <my_service_manager.h>
5959

6060
/** @file ha_innodb.cc */
6161

storage/innobase/include/univ.i

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ component, i.e. we show M.N.P as M.N */
6565
IB_TO_STR(INNODB_VERSION_MAJOR) "." \
6666
IB_TO_STR(INNODB_VERSION_MINOR) "/en/"
6767

68+
/** How far ahead should we tell the service manager the timeout
69+
(time in seconds) */
70+
#define INNODB_EXTEND_TIMEOUT_INTERVAL 30
71+
6872
#ifdef MYSQL_DYNAMIC_PLUGIN
6973
/* In the dynamic plugin, redefine some externally visible symbols
7074
in order not to conflict with the symbols of a builtin InnoDB. */

storage/innobase/log/log0log.cc

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ Created 12/9/1995 Heikki Tuuri
3939

4040
#ifndef UNIV_HOTBACKUP
4141
#if MYSQL_VERSION_ID < 100200
42-
# include <my_systemd.h> /* sd_notifyf() */
42+
# include <my_service_manager.h>
4343
#endif
4444

4545
#include "mem0mem.h"
@@ -1443,6 +1443,11 @@ log_write_up_to(
14431443
return;
14441444
}
14451445

1446+
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
1447+
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
1448+
"log write up to: %llu", lsn);
1449+
}
1450+
14461451
loop:
14471452
ut_ad(++loop_count < 100);
14481453

@@ -2351,8 +2356,9 @@ log_group_read_log_seg(
23512356
if (recv_sys->report(ut_time())) {
23522357
ib_logf(IB_LOG_LEVEL_INFO, "Read redo log up to LSN=" LSN_PF,
23532358
start_lsn);
2354-
sd_notifyf(0, "STATUS=Read redo log up to LSN=" LSN_PF,
2355-
start_lsn);
2359+
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
2360+
"Read redo log up to LSN=" LSN_PF,
2361+
start_lsn);
23562362
}
23572363

23582364
if (start_lsn != end_lsn) {
@@ -3320,10 +3326,16 @@ logs_empty_and_mark_files_at_shutdown(void)
33203326
before proceeding further. */
33213327

33223328
count = 0;
3329+
#define COUNT_INTERVAL 600
3330+
#define CHECK_INTERVAL 100000
3331+
service_manager_extend_timeout(COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
3332+
"Waiting for page cleaner");
33233333
while (buf_page_cleaner_is_active) {
33243334
++count;
3325-
os_thread_sleep(100000);
3326-
if (srv_print_verbose_log && count > 600) {
3335+
os_thread_sleep(CHECK_INTERVAL);
3336+
if (srv_print_verbose_log && count > COUNT_INTERVAL) {
3337+
service_manager_extend_timeout(COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
3338+
"Waiting for page cleaner");
33273339
ib_logf(IB_LOG_LEVEL_INFO,
33283340
"Waiting for page_cleaner to "
33293341
"finish flushing of buffer pool");
@@ -3408,6 +3420,8 @@ logs_empty_and_mark_files_at_shutdown(void)
34083420
}
34093421

34103422
if (!srv_read_only_mode) {
3423+
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
3424+
"ensuring dirty buffer pool are written to log");
34113425
log_make_checkpoint_at(LSN_MAX, TRUE);
34123426

34133427
mutex_enter(&log_sys->mutex);
@@ -3451,6 +3465,8 @@ logs_empty_and_mark_files_at_shutdown(void)
34513465
srv_thread_type type = srv_get_active_thread_type();
34523466
ut_a(type == SRV_NONE);
34533467

3468+
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
3469+
"Free innodb buffer pool");
34543470
buf_all_freed();
34553471

34563472
ut_a(lsn == log_sys->lsn);

storage/innobase/log/log0recv.cc

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Created 9/20/1997 Heikki Tuuri
3030
#include <stdio.h> // Solaris/x86 header file bug
3131

3232
#include <vector>
33-
#include <my_systemd.h>
33+
#include <my_service_manager.h>
3434

3535
#include "log0recv.h"
3636

@@ -1717,8 +1717,8 @@ recv_recover_page_func(
17171717
if (recv_sys->report(time)) {
17181718
ib_logf(IB_LOG_LEVEL_INFO,
17191719
"To recover: " ULINTPF " pages from log", n);
1720-
sd_notifyf(0, "STATUS=To recover: " ULINTPF
1721-
" pages from log", n);
1720+
service_manager_extend_timeout(
1721+
INNODB_EXTEND_TIMEOUT_INTERVAL, "To recover: " ULINTPF " pages from log", n);
17221722
}
17231723
}
17241724

@@ -2911,6 +2911,9 @@ recv_init_crash_recovery(void)
29112911
and restore them from the doublewrite buffer if
29122912
possible */
29132913

2914+
service_manager_extend_timeout(
2915+
INNODB_EXTEND_TIMEOUT_INTERVAL, "Starting Innodb crash recovery");
2916+
29142917
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
29152918
buf_dblwr_process();
29162919

storage/innobase/srv/srv0srv.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ Created 10/8/1995 Heikki Tuuri
7777
#include "fil0pagecompress.h"
7878
#include "btr0scrub.h"
7979

80+
#include <my_service_manager.h>
81+
82+
#ifdef WITH_WSREP
83+
extern int wsrep_debug;
84+
extern int wsrep_trx_is_aborting(void *thd_ptr);
85+
#endif
8086
/* The following is the maximum allowed duration of a lock wait. */
8187
UNIV_INTERN ulong srv_fatal_semaphore_wait_threshold = DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT;
8288

@@ -2727,6 +2733,10 @@ srv_do_purge(
27272733

27282734
*n_total_purged += n_pages_purged;
27292735

2736+
if (n_pages_purged > 0) {
2737+
service_manager_extend_timeout(
2738+
INNODB_EXTEND_TIMEOUT_INTERVAL, "Innodb %d pages purged", n_pages_purged);
2739+
}
27302740
} while (!srv_purge_should_exit(n_pages_purged)
27312741
&& n_pages_purged > 0
27322742
&& purge_sys->state == PURGE_STATE_RUN);

storage/innobase/trx/trx0roll.cc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Created 3/26/1996 Heikki Tuuri
2525
*******************************************************/
2626

2727
#include "my_config.h"
28-
#include <my_systemd.h>
28+
#include <my_service_manager.h>
2929

3030
#include "trx0roll.h"
3131

@@ -740,11 +740,17 @@ UNIV_INTERN void trx_roll_report_progress()
740740
n_rows += t->undo_no;
741741
}
742742
}
743+
744+
if (n_rows > 0) {
745+
service_manager_extend_timeout(
746+
INNODB_EXTEND_TIMEOUT_INTERVAL,
747+
"To roll back: " ULINTPF " transactions, "
748+
"%llu rows", n_trx, n_rows);
749+
}
750+
743751
ib_logf(IB_LOG_LEVEL_INFO,
744752
"To roll back: " ULINTPF " transactions, "
745753
"%llu rows", n_trx, n_rows);
746-
sd_notifyf(0, "STATUS=To roll back: " ULINTPF " transactions, "
747-
"%llu rows", n_trx, n_rows);
748754
}
749755

750756
mutex_exit(&recv_sys->mutex);

storage/xtradb/buf/buf0dump.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Created April 08, 2011 Vasil Dimov
4242
#include "ut0byte.h" /* ut_ull_create() */
4343
#include "ut0sort.h" /* UT_SORT_FUNCTION_BODY */
4444
#include "mysql/service_wsrep.h" /* wsrep_recovery */
45+
#include <my_service_manager.h>
4546

4647
enum status_severity {
4748
STATUS_INFO,
@@ -333,6 +334,14 @@ buf_dump(
333334
i + 1, srv_buf_pool_instances,
334335
j + 1, n_pages);
335336
}
337+
if ( (j % 1024) == 0) {
338+
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
339+
"Dumping buffer pool "
340+
ULINTPF "/" ULINTPF ", "
341+
"page " ULINTPF "/" ULINTPF,
342+
i + 1, srv_buf_pool_instances,
343+
j + 1, n_pages);
344+
}
336345
}
337346

338347
ut_free(dump);

storage/xtradb/buf/buf0flu.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ Created 11/11/1995 Heikki Tuuri
5454
#include "mysql/service_thd_wait.h"
5555
#include "fil0pagecompress.h"
5656

57+
#include <my_service_manager.h>
58+
5759
/** Number of pages flushed through non flush_list flushes. */
5860
// static ulint buf_lru_flush_page_count = 0;
5961

@@ -575,6 +577,14 @@ buf_flush_remove(
575577
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
576578
ulint zip_size;
577579

580+
/* TODO Marko: "I think that it might be useful to have a global time base for the shutdown progress reporting and extending the timeout intervals." */
581+
if (UNIV_UNLIKELY(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE)) {
582+
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
583+
"Flush and remove page with tablespace id %d"
584+
", Poolid %d, flush list length %d",
585+
bpage->space, buf_pool->instance_no, UT_LIST_GET_LEN(buf_pool->flush_list));
586+
}
587+
578588
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
579589
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
580590
ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_DIRTY

storage/xtradb/handler/ha_innodb.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
5050
#include <io.h>
5151
#endif
5252

53-
#include <my_systemd.h>
53+
#include <my_service_manager.h>
5454

5555
/** @file ha_innodb.cc */
5656

storage/xtradb/include/univ.i

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ component, i.e. we show M.N.P as M.N */
7373
IB_TO_STR(INNODB_VERSION_MAJOR) "." \
7474
IB_TO_STR(INNODB_VERSION_MINOR) "/en/"
7575

76+
/** How far ahead should we tell the service manager the timeout
77+
(time in seconds) */
78+
#define INNODB_EXTEND_TIMEOUT_INTERVAL 30
79+
7680
#ifdef MYSQL_DYNAMIC_PLUGIN
7781
/* In the dynamic plugin, redefine some externally visible symbols
7882
in order not to conflict with the symbols of a builtin InnoDB. */

0 commit comments

Comments
 (0)