Skip to content

Commit 8a86df3

Browse files
committed
MDEV-31088 Server freeze due to innodb_change_buffering
A 3-thread deadlock has been frequently observed when using innodb_change_buffering!=none and innodb_file_per_table=0: (1) ibuf_merge_or_delete_for_page() holding an exclusive latch on the block and waiting for an exclusive tablespace latch in fseg_page_is_allocated() (2) btr_free_but_not_root() in fseg_free_step() waiting for an exclusive tablespace latch (3) fsp_alloc_free_page() holding the exclusive tablespace latch and waiting for a latch on the block, which it is reallocating for something else While this was reproduced using innodb_file_per_table=0, this hang should be theoretically possible in .ibd files as well, when the recovery or cleanup of a failed DROP INDEX or ADD INDEX is executing concurrently with something that involves page allocation. ibuf_merge_or_delete_for_page(): Avoid invoking fseg_page_is_allocated() when block==nullptr. The call was redundant in this case, and it could cause deadlocks due to latching order violation. ibuf_read_merge_pages(): Acquire an exclusive tablespace latch before invoking buf_page_get_gen(), which may cause fseg_page_is_allocated() to be invoked in ibuf_merge_or_delete_for_page(). Note: This will not fix all latching order violations in this area! Deadlocks involving ibuf_merge_or_delete_for_page(block!=nullptr) are still possible if the caller is not acquiring an exclusive tablespace latch upfront. This would be the case in any read operation that involves a change buffer merge, such as SELECT, CHECK TABLE, or any DML operation that cannot be buffered in the change buffer.
1 parent 548a41c commit 8a86df3

File tree

1 file changed

+16
-10
lines changed

1 file changed

+16
-10
lines changed

storage/innobase/ibuf/ibuf0ibuf.cc

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2363,6 +2363,7 @@ static void ibuf_read_merge_pages(const uint32_t* space_ids,
23632363
}
23642364

23652365
const ulint zip_size = s->zip_size(), size = s->size;
2366+
s->x_lock();
23662367
s->release();
23672368
mtr_t mtr;
23682369

@@ -2380,13 +2381,17 @@ static void ibuf_read_merge_pages(const uint32_t* space_ids,
23802381
|| !page_is_leaf(block->page.frame);
23812382
mtr.commit();
23822383
if (err == DB_TABLESPACE_DELETED) {
2384+
s->x_unlock();
23832385
goto tablespace_deleted;
23842386
}
23852387
if (!remove) {
2388+
s->x_unlock();
23862389
continue;
23872390
}
23882391
}
23892392

2393+
s->x_unlock();
2394+
23902395
if (srv_shutdown_state == SRV_SHUTDOWN_NONE
23912396
|| srv_fast_shutdown) {
23922397
continue;
@@ -2415,7 +2420,7 @@ static void ibuf_read_merge_pages(const uint32_t* space_ids,
24152420
/* Prevent an infinite loop, by removing entries from
24162421
the change buffer in the case the bitmap bits were
24172422
wrongly clear even though buffered changes exist. */
2418-
ibuf_delete_recs(page_id_t(space_ids[i], page_nos[i]));
2423+
ibuf_delete_recs(page_id_t(space_id, page_nos[i]));
24192424
}
24202425
}
24212426

@@ -4193,25 +4198,26 @@ dberr_t ibuf_merge_or_delete_for_page(buf_block_t *block,
41934198

41944199
ibuf_mtr_commit(&mtr);
41954200

4196-
if (bitmap_bits
4197-
&& DB_SUCCESS
4201+
if (!bitmap_bits) {
4202+
done:
4203+
/* No changes are buffered for this page. */
4204+
space->release();
4205+
return DB_SUCCESS;
4206+
}
4207+
4208+
if (!block
4209+
|| DB_SUCCESS
41984210
== fseg_page_is_allocated(space, page_id.page_no())) {
41994211
ibuf_mtr_start(&mtr);
42004212
mtr.set_named_space(space);
42014213
ibuf_reset_bitmap(block, page_id, zip_size, &mtr);
42024214
ibuf_mtr_commit(&mtr);
4203-
bitmap_bits = 0;
42044215
if (!block
42054216
|| btr_page_get_index_id(block->page.frame)
42064217
!= DICT_IBUF_ID_MIN + IBUF_SPACE_ID) {
42074218
ibuf_delete_recs(page_id);
42084219
}
4209-
}
4210-
4211-
if (!bitmap_bits) {
4212-
/* No changes are buffered for this page. */
4213-
space->release();
4214-
return DB_SUCCESS;
4220+
goto done;
42154221
}
42164222
}
42174223

0 commit comments

Comments
 (0)