Skip to content

Commit 7b16291

Browse files
committed
MDEV-15707 : deadlock in Innodb IO code, caused by change buffering.
In async IO completion code, after reading a page,Innodb can wait for completion of other bufferpool reads. This is for example what happens if change-buffering is active. Innodb on Windows could deadlock, as it did not have dedicated threads for processing change buffer asynchronous reads. The fix for that is to have windows now has the same background threads, including dedicated thread for ibuf, and log AIOs. The ibuf/read completions are now dispatched to their threads with PostQueuedCompletionStatus(), the write and log completions are processed in thread where they arrive.
1 parent b4c2ceb commit 7b16291

File tree

4 files changed

+117
-32
lines changed

4 files changed

+117
-32
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
CREATE TABLE t1(
2+
a INT AUTO_INCREMENT PRIMARY KEY,
3+
b CHAR(255),
4+
INDEX(b))
5+
ENGINE=InnoDB;
6+
INSERT INTO t1(b) SELECT UUID();
7+
BEGIN;
8+
INSERT INTO t1(b) SELECT UUID() FROM t1;
9+
INSERT INTO t1(b) SELECT UUID() FROM t1;
10+
INSERT INTO t1(b) SELECT UUID() FROM t1;
11+
INSERT INTO t1(b) SELECT UUID() FROM t1;
12+
INSERT INTO t1(b) SELECT UUID() FROM t1;
13+
INSERT INTO t1(b) SELECT UUID() FROM t1;
14+
INSERT INTO t1(b) SELECT UUID() FROM t1;
15+
INSERT INTO t1(b) SELECT UUID() FROM t1;
16+
INSERT INTO t1(b) SELECT UUID() FROM t1;
17+
INSERT INTO t1(b) SELECT UUID() FROM t1;
18+
INSERT INTO t1(b) SELECT UUID() FROM t1;
19+
INSERT INTO t1(b) SELECT UUID() FROM t1;
20+
INSERT INTO t1(b) SELECT UUID() FROM t1;
21+
INSERT INTO t1(b) SELECT UUID() FROM t1;
22+
COMMIT;
23+
UPDATE t1 SET b=UUID();
24+
DROP TABLE t1;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--innodb --innodb-buffer-pool-size=5MB --innodb-read-io-threads=1 --innodb-doublewrite=0 --innodb-flush-log-at-trx-commit=0
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
--source include/windows.inc
2+
3+
# Deadlock in conjunction with the innodb change buffering.
4+
5+
# When innodb change buffering kicks in, i.e secondary non-unique index
6+
# does not fit into the bufferpool, then, on Windows, innodb
7+
# background threads could deadlock whenever index page is
8+
# read, and the page needs load/merge change buffer.
9+
# The test tries to reproduce this situation, by creating index
10+
# that does not fit into bufferpool, and doing a large update.
11+
12+
CREATE TABLE t1(
13+
a INT AUTO_INCREMENT PRIMARY KEY,
14+
b CHAR(255),
15+
INDEX(b))
16+
ENGINE=InnoDB;
17+
18+
INSERT INTO t1(b) SELECT UUID();
19+
BEGIN;
20+
let $i=`select cast(log2(@@innodb_buffer_pool_size/255) as int)`;
21+
while ($i)
22+
{
23+
INSERT INTO t1(b) SELECT UUID() FROM t1;
24+
dec $i;
25+
}
26+
COMMIT;
27+
UPDATE t1 SET b=UUID();
28+
DROP TABLE t1;

storage/innobase/os/os0file.cc

Lines changed: 64 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,9 @@ static ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
107107
#else
108108
/** Umask for creating files */
109109
static ulint os_innodb_umask = 0;
110-
static HANDLE completion_port;
111-
static HANDLE read_completion_port;
110+
static HANDLE data_completion_port;
111+
static HANDLE log_completion_port;
112+
112113
static DWORD fls_sync_io = FLS_OUT_OF_INDEXES;
113114
#define IOCP_SHUTDOWN_KEY (ULONG_PTR)-1
114115
#endif /* _WIN32 */
@@ -443,11 +444,17 @@ class AIO {
443444
#endif /* LINUX_NATIVE_AIO */
444445

445446
#ifdef WIN_ASYNC_IO
446-
447+
HANDLE m_completion_port;
447448
/** Wake up all AIO threads in Windows native aio */
448449
static void wake_at_shutdown() {
449-
PostQueuedCompletionStatus(completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
450-
PostQueuedCompletionStatus(read_completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
450+
AIO *all_arrays[] = {s_reads, s_writes, s_log, s_ibuf };
451+
for (size_t i = 0; i < array_elements(all_arrays); i++) {
452+
AIO *a = all_arrays[i];
453+
if (a) {
454+
PostQueuedCompletionStatus(a->m_completion_port, 0,
455+
IOCP_SHUTDOWN_KEY, 0);
456+
}
457+
}
451458
}
452459
#endif /* WIN_ASYNC_IO */
453460

@@ -3520,15 +3527,11 @@ SyncFileIO::execute(Slot* slot)
35203527
struct WinIoInit
35213528
{
35223529
WinIoInit() {
3523-
completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
3524-
read_completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); ut_a(completion_port && read_completion_port);
35253530
fls_sync_io = FlsAlloc(win_free_syncio_event);
35263531
ut_a(fls_sync_io != FLS_OUT_OF_INDEXES);
35273532
}
35283533

35293534
~WinIoInit() {
3530-
CloseHandle(completion_port);
3531-
CloseHandle(read_completion_port);
35323535
FlsFree(fls_sync_io);
35333536
}
35343537
};
@@ -4300,11 +4303,17 @@ os_file_create_func(
43004303
*success = true;
43014304

43024305
if (srv_use_native_aio && ((attributes & FILE_FLAG_OVERLAPPED) != 0)) {
4303-
/* Bind the file handle to completion port */
4304-
ut_a(CreateIoCompletionPort(file, completion_port, 0, 0));
4306+
/* Bind the file handle to completion port. Completion port
4307+
might not be created yet, in some stages of backup, but
4308+
must always be there for the server.*/
4309+
HANDLE port =(type == OS_LOG_FILE)?
4310+
log_completion_port : data_completion_port;
4311+
ut_a(port || srv_operation != SRV_OPERATION_NORMAL);
4312+
if (port) {
4313+
ut_a(CreateIoCompletionPort(file, port, 0, 0));
4314+
}
43054315
}
43064316
}
4307-
43084317
} while (retry);
43094318

43104319
return(file);
@@ -5705,6 +5714,15 @@ os_aio_handler(
57055714
return(err);
57065715
}
57075716

5717+
#ifdef WIN_ASYNC_IO
5718+
static HANDLE new_completion_port()
5719+
{
5720+
HANDLE h = CreateIoCompletionPort(INVALID_HANDLE_VALUE, 0, 0, 0);
5721+
ut_a(h);
5722+
return h;
5723+
}
5724+
#endif
5725+
57085726
/** Constructor
57095727
@param[in] id The latch ID
57105728
@param[in] n Number of AIO slots
@@ -5721,6 +5739,9 @@ AIO::AIO(
57215739
,m_aio_ctx(),
57225740
m_events(m_slots.size())
57235741
# endif /* LINUX_NATIVE_AIO */
5742+
#ifdef WIN_ASYNC_IO
5743+
,m_completion_port(new_completion_port())
5744+
#endif
57245745
{
57255746
ut_a(n > 0);
57265747
ut_a(m_n_segments > 0);
@@ -5887,6 +5908,9 @@ AIO::~AIO()
58875908
ut_free(m_aio_ctx);
58885909
}
58895910
#endif /* LINUX_NATIVE_AIO */
5911+
#if defined(WIN_ASYNC_IO)
5912+
CloseHandle(m_completion_port);
5913+
#endif
58905914

58915915
m_slots.clear();
58925916
}
@@ -5973,6 +5997,12 @@ AIO::start(
59735997
return(false);
59745998
}
59755999

6000+
#ifdef WIN_ASYNC_IO
6001+
data_completion_port = s_writes->m_completion_port;
6002+
log_completion_port =
6003+
s_log ? s_log->m_completion_port : data_completion_port;
6004+
#endif
6005+
59766006
n_segments += n_writers;
59776007

59786008
for (ulint i = start + n_readers; i < n_segments; ++i) {
@@ -6460,8 +6490,7 @@ therefore no other thread is allowed to do the freeing!
64606490
@param[out] type OS_FILE_WRITE or ..._READ
64616491
@return DB_SUCCESS or error code */
64626492

6463-
#define READ_SEGMENT(s) (s < srv_n_read_io_threads)
6464-
#define WRITE_SEGMENT(s) !READ_SEGMENT(s)
6493+
64656494

64666495
static
64676496
dberr_t
@@ -6484,8 +6513,11 @@ os_aio_windows_handler(
64846513
we do not have to acquire the protecting mutex yet */
64856514

64866515
ut_ad(os_aio_validate_skip());
6516+
AIO *my_array;
6517+
AIO::get_array_and_local_segment(&my_array, segment);
64876518

6488-
HANDLE port = READ_SEGMENT(segment) ? read_completion_port : completion_port;
6519+
HANDLE port = my_array->m_completion_port;
6520+
ut_ad(port);
64896521
for (;;) {
64906522
DWORD len;
64916523
ret = GetQueuedCompletionStatus(port, &len, &key,
@@ -6507,25 +6539,26 @@ os_aio_windows_handler(
65076539
}
65086540

65096541
slot->n_bytes= len;
6542+
ut_a(slot->array);
6543+
HANDLE slot_port = slot->array->m_completion_port;
6544+
if (slot_port != port) {
6545+
/* there are no redirections between data and log */
6546+
ut_ad(port == data_completion_port);
6547+
ut_ad(slot_port != log_completion_port);
65106548

6511-
if (WRITE_SEGMENT(segment) && slot->type.is_read()) {
65126549
/*
6513-
Redirect read completions to the dedicated completion port
6514-
and thread. We need to split read and write threads. If we do not
6515-
do that, and just allow all io threads process all IO, it is possible
6516-
to get stuck in a deadlock in buffer pool code,
6517-
6518-
Currently, the problem is solved this way - "write io" threads
6519-
always get all completion notifications, from both async reads and
6520-
writes. Write completion is handled in the same thread that gets it.
6521-
Read completion is forwarded via PostQueueCompletionStatus())
6522-
to the second completion port dedicated solely to reads. One of the
6523-
"read io" threads waiting on this port will finally handle the IO.
6524-
6525-
Forwarding IO completion this way costs a context switch , and this
6526-
seems tolerable since asynchronous reads are by far less frequent.
6550+
Redirect completions to the dedicated completion port
6551+
and threads.
6552+
6553+
"Write array" threads receive write,read and ibuf
6554+
notifications, read and ibuf completions are redirected.
6555+
6556+
Forwarding IO completion this way costs a context switch,
6557+
and this seems tolerable since asynchronous reads are by
6558+
far less frequent.
65276559
*/
6528-
ut_a(PostQueuedCompletionStatus(read_completion_port, len, key, &slot->control));
6560+
ut_a(PostQueuedCompletionStatus(slot_port,
6561+
len, key, &slot->control));
65296562
}
65306563
else {
65316564
break;
@@ -6586,7 +6619,6 @@ os_aio_windows_handler(
65866619
err = AIOHandler::post_io_processing(slot);
65876620
}
65886621

6589-
ut_a(slot->array);
65906622
slot->array->release_with_mutex(slot);
65916623

65926624
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS

0 commit comments

Comments
 (0)