Skip to content

Commit 0806592

Browse files
committed
MDEV-28422 Page split breaks a gap lock
btr_insert_into_right_sibling(): Inherit any gap lock from the left sibling to the right sibling before inserting the record to the right sibling and updating the node pointer(s). lock_update_node_pointer(): Update locks in case a node pointer will move. Based on mysql/mysql-server@c7d93c2
1 parent b208030 commit 0806592

File tree

5 files changed

+118
-7
lines changed

5 files changed

+118
-7
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
SET @save_frequency=@@GLOBAL.innodb_purge_rseg_truncate_frequency;
2+
SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
3+
CREATE TABLE t1(id INT PRIMARY key, val VARCHAR(16000)) ENGINE=InnoDB;
4+
INSERT INTO t1 (id,val) SELECT 2*seq,'x' FROM seq_0_to_1023;
5+
connect con1,localhost,root,,;
6+
START TRANSACTION WITH CONSISTENT SNAPSHOT;
7+
connection default;
8+
DELETE FROM t1 WHERE id=1788;
9+
BEGIN;
10+
SELECT * FROM t1 WHERE id=1788 FOR UPDATE;
11+
id val
12+
connection con1;
13+
COMMIT;
14+
InnoDB 0 transactions not purged
15+
connection default;
16+
INSERT INTO t1 (id,val) VALUES (1787, REPEAT('x',2000));
17+
connection con1;
18+
SET innodb_lock_wait_timeout=0;
19+
INSERT INTO t1 (id,val) VALUES (1788, 'x');
20+
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
21+
SELECT * FROM t1 WHERE id=1788 FOR UPDATE;
22+
id val
23+
disconnect con1;
24+
connection default;
25+
COMMIT;
26+
DROP TABLE t1;
27+
SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency;
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
--source include/have_innodb.inc
2+
--source include/have_sequence.inc
3+
--source include/have_debug.inc
4+
5+
SET @save_frequency=@@GLOBAL.innodb_purge_rseg_truncate_frequency;
6+
SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
7+
8+
CREATE TABLE t1(id INT PRIMARY key, val VARCHAR(16000)) ENGINE=InnoDB;
9+
INSERT INTO t1 (id,val) SELECT 2*seq,'x' FROM seq_0_to_1023;
10+
11+
connect(con1,localhost,root,,);
12+
# Prevent purge.
13+
START TRANSACTION WITH CONSISTENT SNAPSHOT;
14+
connection default;
15+
16+
DELETE FROM t1 WHERE id=1788;
17+
18+
BEGIN;
19+
# This will return no result, but should acquire a gap lock.
20+
SELECT * FROM t1 WHERE id=1788 FOR UPDATE;
21+
22+
connection con1;
23+
COMMIT;
24+
source include/wait_all_purged.inc;
25+
connection default;
26+
27+
INSERT INTO t1 (id,val) VALUES (1787, REPEAT('x',2000));
28+
29+
connection con1;
30+
SET innodb_lock_wait_timeout=0;
31+
--error ER_LOCK_WAIT_TIMEOUT
32+
INSERT INTO t1 (id,val) VALUES (1788, 'x');
33+
SELECT * FROM t1 WHERE id=1788 FOR UPDATE;
34+
disconnect con1;
35+
36+
connection default;
37+
COMMIT;
38+
DROP TABLE t1;
39+
SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency;

storage/innobase/btr/btr0btr.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2012, Facebook Inc.
5-
Copyright (c) 2014, 2021, MariaDB Corporation.
5+
Copyright (c) 2014, 2022, MariaDB Corporation.
66
77
This program is free software; you can redistribute it and/or modify it under
88
the terms of the GNU General Public License as published by the Free Software
@@ -2688,8 +2688,8 @@ btr_insert_into_right_sibling(
26882688
max_size = page_get_max_insert_size_after_reorganize(next_page, 1);
26892689

26902690
/* Extends gap lock for the next page */
2691-
if (!dict_table_is_locking_disabled(cursor->index->table)) {
2692-
lock_update_split_left(next_block, block);
2691+
if (is_leaf && !dict_table_is_locking_disabled(cursor->index->table)) {
2692+
lock_update_node_pointer(block, next_block);
26932693
}
26942694

26952695
rec = page_cur_tuple_insert(

storage/innobase/include/lock0lock.h

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22
3-
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4-
Copyright (c) 2017, 2020, MariaDB Corporation.
3+
Copyright (c) 1996, 2022, Oracle and/or its affiliates.
4+
Copyright (c) 2017, 2022, MariaDB Corporation.
55
66
This program is free software; you can redistribute it and/or modify it under
77
the terms of the GNU General Public License as published by the Free Software
@@ -151,6 +151,40 @@ lock_update_copy_and_discard(
151151
which copied */
152152
const buf_block_t* block); /*!< in: index page;
153153
NOT the root! */
154+
/** Update gap locks between the last record of the left_block and the
155+
first record of the right_block when a record is about to be inserted
156+
at the start of the right_block, even though it should "naturally" be
157+
inserted as the last record of the left_block according to the
158+
current node pointer in the parent page.
159+
160+
That is, we assume that the lowest common ancestor of the left_block
161+
and right_block routes the key of the new record to the left_block,
162+
but a heuristic which tries to avoid overflowing left_block has chosen
163+
to insert the record into right_block instead. Said ancestor performs
164+
this routing by comparing the key of the record to a "split point" -
165+
all records greater or equal to than the split point (node pointer)
166+
are in right_block, and smaller ones in left_block.
167+
The split point may be smaller than the smallest key in right_block.
168+
169+
The gap between the last record on the left_block and the first record
170+
on the right_block is represented as a gap lock attached to the supremum
171+
pseudo-record of left_block, and a gap lock attached to the new first
172+
record of right_block.
173+
174+
Thus, inserting the new record, and subsequently adjusting the node
175+
pointers in parent pages to values smaller or equal to the new
176+
records' key, will mean that gap will be sliced at a different place
177+
("moved to the left"): fragment of the 1st gap will now become treated
178+
as 2nd. Therefore, we must copy any GRANTED locks from 1st gap to the
179+
2nd gap. Any WAITING locks must be of INSERT_INTENTION type (as no
180+
other GAP locks ever wait for anything) and can stay at 1st gap, as
181+
their only purpose is to notify the requester they can retry
182+
insertion, and there's no correctness requirement to avoid waking them
183+
up too soon.
184+
@param left_block left page
185+
@param right_block right page */
186+
void lock_update_node_pointer(const buf_block_t *left_block,
187+
const buf_block_t *right_block);
154188
/*************************************************************//**
155189
Updates the lock table when a page is split to the left. */
156190
void

storage/innobase/lock/lock0lock.cc

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22
3-
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
4-
Copyright (c) 2014, 2021, MariaDB Corporation.
3+
Copyright (c) 1996, 2022, Oracle and/or its affiliates.
4+
Copyright (c) 2014, 2022, MariaDB Corporation.
55
66
This program is free software; you can redistribute it and/or modify it under
77
the terms of the GNU General Public License as published by the Free Software
@@ -3044,6 +3044,17 @@ lock_update_split_right(
30443044
lock_mutex_exit();
30453045
}
30463046

3047+
void lock_update_node_pointer(const buf_block_t *left_block,
3048+
const buf_block_t *right_block)
3049+
{
3050+
const ulint h= lock_get_min_heap_no(right_block);
3051+
3052+
lock_mutex_enter();
3053+
lock_rec_inherit_to_gap(right_block, left_block,
3054+
h, PAGE_HEAP_NO_SUPREMUM);
3055+
lock_mutex_exit();
3056+
}
3057+
30473058
/*************************************************************//**
30483059
Updates the lock table when a page is merged to the right. */
30493060
void

0 commit comments

Comments
 (0)