Skip to content

Commit 525e79b

Browse files
committed
MDEV-19022: InnoDB fails to cleanup useless B-tree pages
The test case for reproducing MDEV-14126 demonstrates that InnoDB can end up with an index tree where a non-leaf page has only one child page. The test case innodb.innodb_bug14676111 demonstrates that such pages are sometimes unavoidable, because InnoDB does not implement any sort of B-tree rotation. But, there is no reason to allow a root page with only one child page. btr_cur_node_ptr_delete(): Replaces btr_node_ptr_delete(). btr_page_get_father(): Declare globally. btr_discard_only_page_on_level(): Declare with ATTRIBUTE_COLD. It turns out that this function is not covered by the innodb.innodb_bug14676111 test case after all. btr_discard_page(): If the root page ends up having only one child page, shrink the tree by invoking btr_lift_page_up().
1 parent ade0a0e commit 525e79b

File tree

6 files changed

+63
-72
lines changed

6 files changed

+63
-72
lines changed

mysql-test/suite/innodb/r/innodb_bug14676111.result

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ Table Op Msg_type Msg_text
3939
test.t1 analyze status OK
4040
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
4141
CLUST_INDEX_SIZE
42-
5
42+
4
4343
set global innodb_limit_optimistic_insert_debug = 10000;
4444
connection con2;
4545
rollback;
@@ -50,7 +50,7 @@ Table Op Msg_type Msg_text
5050
test.t1 analyze status OK
5151
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
5252
CLUST_INDEX_SIZE
53-
3
53+
2
5454
begin;
5555
insert into t1 values (2);
5656
rollback;
@@ -59,7 +59,7 @@ Table Op Msg_type Msg_text
5959
test.t1 analyze status OK
6060
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
6161
CLUST_INDEX_SIZE
62-
2
62+
1
6363
begin;
6464
insert into t1 values (2);
6565
rollback;

storage/innobase/btr/btr0btr.cc

Lines changed: 19 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,18 +1062,13 @@ btr_page_get_father_block(
10621062
return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
10631063
}
10641064

1065-
/************************************************************//**
1066-
Seeks to the upper level node pointer to a page.
1067-
It is assumed that mtr holds an x-latch on the tree. */
1068-
static
1069-
void
1070-
btr_page_get_father(
1071-
/*================*/
1072-
dict_index_t* index, /*!< in: b-tree index */
1073-
buf_block_t* block, /*!< in: child page in the index */
1074-
mtr_t* mtr, /*!< in: mtr */
1075-
btr_cur_t* cursor) /*!< out: cursor on node pointer record,
1076-
its page x-latched */
1065+
/** Seek to the parent page of a B-tree page.
1066+
@param[in,out] index b-tree
1067+
@param[in] block child page
1068+
@param[in,out] mtr mini-transaction
1069+
@param[out] cursor cursor pointing to the x-latched parent page */
1070+
void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
1071+
btr_cur_t* cursor)
10771072
{
10781073
mem_heap_t* heap;
10791074
rec_t* rec
@@ -3463,33 +3458,6 @@ btr_set_min_rec_mark(
34633458
}
34643459
}
34653460

3466-
/*************************************************************//**
3467-
Deletes on the upper level the node pointer to a page. */
3468-
void
3469-
btr_node_ptr_delete(
3470-
/*================*/
3471-
dict_index_t* index, /*!< in: index tree */
3472-
buf_block_t* block, /*!< in: page whose node pointer is deleted */
3473-
mtr_t* mtr) /*!< in: mtr */
3474-
{
3475-
btr_cur_t cursor;
3476-
ibool compressed;
3477-
dberr_t err;
3478-
3479-
ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
3480-
3481-
/* Delete node pointer on father page */
3482-
btr_page_get_father(index, block, mtr, &cursor);
3483-
3484-
compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
3485-
BTR_CREATE_FLAG, false, mtr);
3486-
ut_a(err == DB_SUCCESS);
3487-
3488-
if (!compressed) {
3489-
btr_cur_compress_if_useful(&cursor, FALSE, mtr);
3490-
}
3491-
}
3492-
34933461
/*************************************************************//**
34943462
If page is the only on its level, this function moves its records to the
34953463
father page, thus reducing the tree height.
@@ -3939,7 +3907,7 @@ btr_compress(
39393907
lock_rec_free_all_from_discard_page(block);
39403908
lock_mutex_exit();
39413909
} else {
3942-
btr_node_ptr_delete(index, block, mtr);
3910+
btr_cur_node_ptr_delete(&father_cursor, mtr);
39433911
if (!dict_table_is_locking_disabled(index->table)) {
39443912
lock_update_merge_left(
39453913
merge_block, orig_pred, block);
@@ -4217,8 +4185,9 @@ btr_compress(
42174185
/*************************************************************//**
42184186
Discards a page that is the only page on its level. This will empty
42194187
the whole B-tree, leaving just an empty root page. This function
4220-
should never be reached, because btr_compress(), which is invoked in
4188+
should almost never be reached, because btr_compress(), which is invoked in
42214189
delete operations, calls btr_lift_page_up() to flatten the B-tree. */
4190+
ATTRIBUTE_COLD
42224191
static
42234192
void
42244193
btr_discard_only_page_on_level(
@@ -4318,10 +4287,7 @@ btr_discard_page(
43184287
buf_block_t* block;
43194288
page_t* page;
43204289
rec_t* node_ptr;
4321-
#ifdef UNIV_DEBUG
43224290
btr_cur_t parent_cursor;
4323-
bool parent_is_different = false;
4324-
#endif
43254291

43264292
block = btr_cur_get_block(cursor);
43274293
index = btr_cur_get_index(cursor);
@@ -4337,21 +4303,19 @@ btr_discard_page(
43374303

43384304
MONITOR_INC(MONITOR_INDEX_DISCARD);
43394305

4340-
#ifdef UNIV_DEBUG
43414306
if (dict_index_is_spatial(index)) {
43424307
rtr_page_get_father(index, block, mtr, cursor, &parent_cursor);
43434308
} else {
43444309
btr_page_get_father(index, block, mtr, &parent_cursor);
43454310
}
4346-
#endif
43474311

43484312
/* Decide the page which will inherit the locks */
43494313

43504314
left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
43514315
right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
43524316

43534317
const page_size_t page_size(dict_table_page_size(index->table));
4354-
4318+
ut_d(bool parent_is_different = false);
43554319
if (left_page_no != FIL_NULL) {
43564320
merge_block = btr_block_get(
43574321
page_id_t(space, left_page_no), page_size,
@@ -4407,15 +4371,9 @@ btr_discard_page(
44074371
}
44084372

44094373
if (dict_index_is_spatial(index)) {
4410-
btr_cur_t father_cursor;
4411-
4412-
/* Since rtr_node_ptr_delete doesn't contain get father
4413-
node ptr, so, we need to get father node ptr first and then
4414-
delete it. */
4415-
rtr_page_get_father(index, block, mtr, cursor, &father_cursor);
4416-
rtr_node_ptr_delete(index, &father_cursor, block, mtr);
4374+
rtr_node_ptr_delete(index, &parent_cursor, block, mtr);
44174375
} else {
4418-
btr_node_ptr_delete(index, block, mtr);
4376+
btr_cur_node_ptr_delete(&parent_cursor, mtr);
44194377
}
44204378

44214379
/* Remove the page from the level list */
@@ -4453,6 +4411,12 @@ btr_discard_page(
44534411
we cannot use btr_check_node_ptr() */
44544412
ut_ad(parent_is_different
44554413
|| btr_check_node_ptr(index, merge_block, mtr));
4414+
4415+
if (btr_cur_get_block(&parent_cursor)->page.id.page_no() == index->page
4416+
&& !page_has_siblings(btr_cur_get_page(&parent_cursor))
4417+
&& page_get_n_recs(btr_cur_get_page(&parent_cursor)) == 1) {
4418+
btr_lift_page_up(index, merge_block, mtr);
4419+
}
44564420
}
44574421

44584422
#ifdef UNIV_BTR_PRINT

storage/innobase/btr/btr0cur.cc

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5358,8 +5358,10 @@ btr_cur_pessimistic_delete(
53585358
on the page */
53595359
ulint level = btr_page_get_level(page, mtr);
53605360

5361-
btr_node_ptr_delete(index, block, mtr);
5362-
5361+
btr_cur_t cursor;
5362+
btr_page_get_father(index, block, mtr, &cursor);
5363+
btr_cur_node_ptr_delete(&cursor, mtr);
5364+
// FIXME: reuse the node_ptr from above
53635365
dtuple_t* node_ptr = dict_index_build_node_ptr(
53645366
index, next_rec, block->page.id.page_no(),
53655367
heap, level);
@@ -5428,6 +5430,23 @@ btr_cur_pessimistic_delete(
54285430
return(ret);
54295431
}
54305432

5433+
/** Delete the node pointer in a parent page.
5434+
@param[in,out] parent cursor pointing to parent record
5435+
@param[in,out] mtr mini-transaction */
5436+
void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr)
5437+
{
5438+
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(parent),
5439+
MTR_MEMO_PAGE_X_FIX));
5440+
dberr_t err;
5441+
ibool compressed = btr_cur_pessimistic_delete(&err, TRUE, parent,
5442+
BTR_CREATE_FLAG, false,
5443+
mtr);
5444+
ut_a(err == DB_SUCCESS);
5445+
if (!compressed) {
5446+
btr_cur_compress_if_useful(parent, FALSE, mtr);
5447+
}
5448+
}
5449+
54315450
/*******************************************************************//**
54325451
Adds path information to the cursor for the current page, for which
54335452
the binary search has been performed. */

storage/innobase/btr/btr0defragment.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,14 +484,16 @@ btr_defragment_merge_pages(
484484
ULINT_UNDEFINED);
485485
}
486486
}
487+
btr_cur_t parent;
487488
if (n_recs_to_move == n_recs) {
488489
/* The whole page is merged with the previous page,
489490
free it. */
490491
lock_update_merge_left(to_block, orig_pred,
491492
from_block);
492493
btr_search_drop_page_hash_index(from_block);
493494
btr_level_list_remove(space, page_size, (page_t*)from_page, index, mtr);
494-
btr_node_ptr_delete(index, from_block, mtr);
495+
btr_page_get_father(index, from_block, mtr, &parent);
496+
btr_cur_node_ptr_delete(&parent, mtr);
495497
/* btr_blob_dbg_remove(from_page, index,
496498
"btr_defragment_n_pages"); */
497499
btr_page_free(index, from_block, mtr);
@@ -509,7 +511,9 @@ btr_defragment_merge_pages(
509511
lock_update_split_and_merge(to_block,
510512
orig_pred,
511513
from_block);
512-
btr_node_ptr_delete(index, from_block, mtr);
514+
// FIXME: reuse the node_ptr!
515+
btr_page_get_father(index, from_block, mtr, &parent);
516+
btr_cur_node_ptr_delete(&parent, mtr);
513517
rec = page_rec_get_next(
514518
page_get_infimum_rec(from_page));
515519
node_ptr = dict_index_build_node_ptr(

storage/innobase/include/btr0btr.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2012, Facebook Inc.
5-
Copyright (c) 2014, 2018, MariaDB Corporation.
5+
Copyright (c) 2014, 2019, MariaDB Corporation.
66
77
This program is free software; you can redistribute it and/or modify it under
88
the terms of the GNU General Public License as published by the Free Software
@@ -545,14 +545,13 @@ btr_set_min_rec_mark(
545545
rec_t* rec, /*!< in/out: record */
546546
mtr_t* mtr) /*!< in: mtr */
547547
MY_ATTRIBUTE((nonnull));
548-
/*************************************************************//**
549-
Deletes on the upper level the node pointer to a page. */
550-
void
551-
btr_node_ptr_delete(
552-
/*================*/
553-
dict_index_t* index, /*!< in: index tree */
554-
buf_block_t* block, /*!< in: page whose node pointer is deleted */
555-
mtr_t* mtr) /*!< in: mtr */
548+
/** Seek to the parent page of a B-tree page.
549+
@param[in,out] index b-tree
550+
@param[in] block child page
551+
@param[in,out] mtr mini-transaction
552+
@param[out] cursor cursor pointing to the x-latched parent page */
553+
void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
554+
btr_cur_t* cursor)
556555
MY_ATTRIBUTE((nonnull));
557556
#ifdef UNIV_DEBUG
558557
/************************************************************//**

storage/innobase/include/btr0cur.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22
33
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
4-
Copyright (c) 2017, MariaDB Corporation.
4+
Copyright (c) 2017, 2019, MariaDB Corporation.
55
66
This program is free software; you can redistribute it and/or modify it under
77
the terms of the GNU General Public License as published by the Free Software
@@ -529,6 +529,11 @@ btr_cur_pessimistic_delete(
529529
bool rollback,/*!< in: performing rollback? */
530530
mtr_t* mtr) /*!< in: mtr */
531531
MY_ATTRIBUTE((nonnull));
532+
/** Delete the node pointer in a parent page.
533+
@param[in,out] parent cursor pointing to parent record
534+
@param[in,out] mtr mini-transaction */
535+
void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr)
536+
MY_ATTRIBUTE((nonnull));
532537
/***********************************************************//**
533538
Parses a redo log record of updating a record in-place.
534539
@return end of log record or NULL */

0 commit comments

Comments
 (0)