Skip to content

Commit 16d91f8

Browse files
committed
MDEV-29930 Lock order inversion in ibuf_remove_free_page()
The function ibuf_remove_free_page() was waiting for ibuf_mutex while holding ibuf.index->lock. This constitutes a lock order inversion and may cause InnoDB to hang when innodb_change_buffering is enabled and ibuf_merge_or_delete_for_page() is being executed concurrently. In fact, there is no need for ibuf_remove_free_page() to reacquire ibuf_mutex if we make ibuf.seg_size and ibuf.free_list_len protected by the ibuf.index->lock as well as the root page latch rather than by ibuf_mutex. ibuf.seg_size, ibuf.free_list_len: Instead of ibuf_mutex, let the ibuf.index->lock and the root page latch protect these, like ibuf.empty. ibuf_init_at_db_start(): Acquire the root page latch before updating ibuf.seg_size. (The ibuf.index would be created later.) ibuf_data_enough_free_for_insert(), ibuf_data_too_much_free(): Assert also ibuf.index->lock.have_u_or_x(). ibuf_remove_free_page(): Acquire the ibuf.index->lock and the root page latch before accessing ibuf.free_list_len. Simplify the way how the root page latch is released and reacquired. Acquire and release ibuf_mutex only once. ibuf_free_excess_pages(), ibuf_insert_low(): Acquire also ibuf.index->lock before reading ibuf.free_list_len. ibuf_print(): Acquire ibuf.index->lock before reading ibuf.free_list_len and ibuf.seg_size. Reviewed by: Vladislav Lesin Tested by: Matthias Leich
1 parent 4dcd2d8 commit 16d91f8

File tree

2 files changed

+50
-42
lines changed

2 files changed

+50
-42
lines changed

storage/innobase/ibuf/ibuf0ibuf.cc

Lines changed: 42 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ ibuf_size_update(
371371
const page_t* root) /*!< in: ibuf tree root */
372372
{
373373
mysql_mutex_assert_owner(&ibuf_mutex);
374+
ut_ad(!ibuf.index || ibuf.index->lock.have_u_or_x());
374375

375376
ibuf.free_list_len = flst_get_len(root + PAGE_HEADER
376377
+ PAGE_BTR_IBUF_FREE_LIST);
@@ -414,6 +415,15 @@ ibuf_init_at_db_start(void)
414415
return err;
415416
}
416417

418+
if (buf_block_t* block =
419+
buf_page_get_gen(page_id_t(IBUF_SPACE_ID,
420+
FSP_IBUF_TREE_ROOT_PAGE_NO),
421+
0, RW_X_LATCH, nullptr, BUF_GET, &mtr, &err)) {
422+
root = buf_block_get_frame(block);
423+
} else {
424+
goto err_exit;
425+
}
426+
417427
fseg_n_reserved_pages(*header_page,
418428
IBUF_HEADER + IBUF_TREE_SEG_HEADER
419429
+ header_page->page.frame, &ibuf.seg_size, &mtr);
@@ -424,15 +434,6 @@ ibuf_init_at_db_start(void)
424434
ut_ad(ibuf.seg_size >= 2);
425435
} while (0);
426436

427-
if (buf_block_t* block =
428-
buf_page_get_gen(page_id_t(IBUF_SPACE_ID,
429-
FSP_IBUF_TREE_ROOT_PAGE_NO),
430-
0, RW_X_LATCH, nullptr, BUF_GET, &mtr, &err)) {
431-
root = buf_block_get_frame(block);
432-
} else {
433-
goto err_exit;
434-
}
435-
436437
DBUG_EXECUTE_IF("ibuf_init_corrupt",
437438
err = DB_CORRUPTION;
438439
goto err_exit;);
@@ -1746,27 +1747,24 @@ dare to start a pessimistic insert to the insert buffer.
17461747
static inline bool ibuf_data_enough_free_for_insert()
17471748
{
17481749
mysql_mutex_assert_owner(&ibuf_mutex);
1750+
ut_ad(ibuf.index->lock.have_u_or_x());
17491751

17501752
/* We want a big margin of free pages, because a B-tree can sometimes
17511753
grow in size also if records are deleted from it, as the node pointers
17521754
can change, and we must make sure that we are able to delete the
17531755
inserts buffered for pages that we read to the buffer pool, without
17541756
any risk of running out of free space in the insert buffer. */
1755-
17561757
return(ibuf.free_list_len >= (ibuf.size / 2) + 3 * ibuf.height);
17571758
}
17581759

17591760
/*********************************************************************//**
17601761
Checks if there are enough pages in the free list of the ibuf tree that we
17611762
should remove them and free to the file space management.
1762-
@return TRUE if enough free pages in list */
1763-
UNIV_INLINE
1764-
ibool
1765-
ibuf_data_too_much_free(void)
1766-
/*=========================*/
1763+
@return whether enough free pages in list */
1764+
static inline bool ibuf_data_too_much_free()
17671765
{
17681766
mysql_mutex_assert_owner(&ibuf_mutex);
1769-
1767+
ut_ad(ibuf.index->lock.have_u_or_x());
17701768
return(ibuf.free_list_len >= 3 + (ibuf.size / 2) + 3 * ibuf.height);
17711769
}
17721770

@@ -1844,6 +1842,7 @@ static bool ibuf_add_free_page()
18441842
goto corrupted;
18451843
}
18461844

1845+
ut_ad(ibuf.index->lock.have_u_or_x());
18471846
ibuf.seg_size++;
18481847
ibuf.free_list_len++;
18491848

@@ -1881,7 +1880,7 @@ static dberr_t ibuf_remove_free_page(bool all = false)
18811880
mysql_mutex_lock(&ibuf_pessimistic_insert_mutex);
18821881
mysql_mutex_lock(&ibuf_mutex);
18831882

1884-
if (!header_page || (!all && !ibuf_data_too_much_free())) {
1883+
if (!header_page) {
18851884
early_exit:
18861885
mysql_mutex_unlock(&ibuf_mutex);
18871886
mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
@@ -1897,7 +1896,10 @@ static dberr_t ibuf_remove_free_page(bool all = false)
18971896
goto early_exit;
18981897
}
18991898

1900-
const auto root_savepoint = mtr.get_savepoint() - 1;
1899+
if (!all && !ibuf_data_too_much_free()) {
1900+
goto early_exit;
1901+
}
1902+
19011903
const uint32_t page_no = flst_get_last(PAGE_HEADER
19021904
+ PAGE_BTR_IBUF_FREE_LIST
19031905
+ root->page.frame).page;
@@ -1915,7 +1917,6 @@ static dberr_t ibuf_remove_free_page(bool all = false)
19151917
because in fseg_free_page we access level 1 pages, and the root
19161918
is a level 2 page. */
19171919
root->page.lock.u_unlock();
1918-
mtr.lock_register(root_savepoint, MTR_MEMO_BUF_FIX);
19191920
ibuf_exit(&mtr);
19201921

19211922
/* Since pessimistic inserts were prevented, we know that the
@@ -1931,15 +1932,14 @@ static dberr_t ibuf_remove_free_page(bool all = false)
19311932
header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
19321933
fil_system.sys_space, page_no, &mtr);
19331934

1935+
root->page.lock.u_lock();
1936+
19341937
if (err != DB_SUCCESS) {
19351938
goto func_exit;
19361939
}
19371940

19381941
ibuf_enter(&mtr);
19391942

1940-
mysql_mutex_lock(&ibuf_mutex);
1941-
mtr.upgrade_buffer_fix(root_savepoint, RW_X_LATCH);
1942-
19431943
/* Remove the page from the free list and update the ibuf size data */
19441944
if (buf_block_t* block =
19451945
buf_page_get_gen(page_id, 0, RW_X_LATCH, nullptr, BUF_GET,
@@ -1951,6 +1951,7 @@ static dberr_t ibuf_remove_free_page(bool all = false)
19511951
}
19521952

19531953
mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
1954+
ut_ad(ibuf.index->lock.have_u_or_x());
19541955

19551956
if (err == DB_SUCCESS) {
19561957
ibuf.seg_size--;
@@ -1959,8 +1960,6 @@ static dberr_t ibuf_remove_free_page(bool all = false)
19591960
}
19601961

19611962
func_exit:
1962-
mysql_mutex_unlock(&ibuf_mutex);
1963-
19641963
if (bitmap_page) {
19651964
/* Set the bit indicating that this page is no more an
19661965
ibuf tree page (level 2 page) */
@@ -1988,12 +1987,11 @@ ibuf_free_excess_pages(void)
19881987
requested service too much */
19891988

19901989
for (ulint i = 0; i < 4; i++) {
1991-
1992-
ibool too_much_free;
1993-
19941990
mysql_mutex_lock(&ibuf_mutex);
1995-
too_much_free = ibuf_data_too_much_free();
1991+
ibuf.index->lock.u_lock(SRW_LOCK_CALL);
1992+
bool too_much_free = ibuf_data_too_much_free();
19961993
mysql_mutex_unlock(&ibuf_mutex);
1994+
ibuf.index->lock.u_unlock();
19971995

19981996
if (!too_much_free) {
19991997
return;
@@ -3175,8 +3173,11 @@ ibuf_insert_low(
31753173
for (;;) {
31763174
mysql_mutex_lock(&ibuf_pessimistic_insert_mutex);
31773175
mysql_mutex_lock(&ibuf_mutex);
3176+
ibuf.index->lock.u_lock(SRW_LOCK_CALL);
3177+
bool enough = ibuf_data_enough_free_for_insert();
3178+
ibuf.index->lock.u_unlock();
31783179

3179-
if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
3180+
if (UNIV_LIKELY(enough)) {
31803181

31813182
break;
31823183
}
@@ -4485,21 +4486,28 @@ ibuf_print(
44854486
if (UNIV_UNLIKELY(!ibuf.index)) return;
44864487
mysql_mutex_lock(&ibuf_mutex);
44874488

4489+
const ulint size = ibuf.size;
4490+
ibuf.index->lock.u_lock(SRW_LOCK_CALL);
4491+
/* Any modification of these fields is enclosed between
4492+
ibuf_tree_root_get() and mtr_t::commit(), and thus
4493+
protected by ibuf.index->lock. */
4494+
const ulint free_list_len = ibuf.free_list_len;
4495+
const ulint seg_size = ibuf.seg_size;
4496+
ibuf.index->lock.u_unlock();
4497+
4498+
mysql_mutex_unlock(&ibuf_mutex);
4499+
44884500
fprintf(file,
44894501
"Ibuf: size " ULINTPF ", free list len " ULINTPF ","
44904502
" seg size " ULINTPF ", " ULINTPF " merges\n",
4491-
ulint{ibuf.size},
4492-
ibuf.free_list_len,
4493-
ibuf.seg_size,
4503+
size, free_list_len, seg_size,
44944504
ulint{ibuf.n_merges});
44954505

44964506
fputs("merged operations:\n ", file);
44974507
ibuf_print_ops(ibuf.n_merged_ops, file);
44984508

44994509
fputs("discarded operations:\n ", file);
45004510
ibuf_print_ops(ibuf.n_discarded_ops, file);
4501-
4502-
mysql_mutex_unlock(&ibuf_mutex);
45034511
}
45044512

45054513
/** Check the insert buffer bitmaps on IMPORT TABLESPACE.

storage/innobase/include/ibuf0ibuf.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,14 @@ struct ibuf_t{
6868
ibuf index tree, in pages */
6969
ulint seg_size; /*!< allocated pages of the file
7070
segment containing ibuf header and
71-
tree */
72-
bool empty; /*!< Protected by the page
73-
latch of the root page of the
74-
insert buffer tree
75-
(FSP_IBUF_TREE_ROOT_PAGE_NO). true
76-
if and only if the insert
77-
buffer tree is empty. */
78-
ulint free_list_len; /*!< length of the free list */
71+
tree; protected by ibuf.index->lock
72+
and the root page latch */
73+
bool empty; /*!< whether the change buffer is
74+
empty; protected by ibuf.index->lock
75+
and the root page latch */
76+
uint32_t free_list_len; /*!< length of the free list;
77+
protected by ibuf.index->lock and
78+
the root page latch */
7979
ulint height; /*!< tree height */
8080
dict_index_t* index; /*!< insert buffer index */
8181

0 commit comments

Comments
 (0)