Skip to content

Commit

Permalink
MDEV-20605 Awaken transaction can miss inserted by other transaction …
Browse files Browse the repository at this point in the history
…records due to wrong persistent cursor restoration

sel_restore_position_for_mysql() moves forward persistent cursor
position after btr_pcur_restore_position() call if cursor relative position
is BTR_PCUR_ON and the cursor points to the record with NOT the same field
values as in a stored record(and some other not important for this case
conditions).

It was done because btr_pcur_restore_position() sets
page_cur_mode_t mode  to PAGE_CUR_LE for cursor->rel_pos ==  BTR_PCUR_ON
before opening cursor. So we are searching for the record less or equal
to stored one. And if the found record is not equal to stored one, then
it is less and we need to move cursor forward.

But there can be a situation when the stored record was purged, but the
new one with the same key but different value was inserted while
row_search_mvcc() was suspended. In this case, when the thread is
awaken, it will invoke sel_restore_position_for_mysql(), which, in turns,
invoke btr_pcur_restore_position(), which will return false because found
record don't match stored record, and
sel_restore_position_for_mysql() will move forward cursor position.

The above can lead to the case when awaken row_search_mvcc() do not see
records inserted by other transactions while it slept. The mtr test case
shows the example how it can be.

The fix is to return special value from persistent cursor restoring
function which would notify its caller that uniq fields of restored
record and stored record are the same, and in this case
sel_restore_position_for_mysql() don't move cursor forward.

Delete-marked records are correctly processed in row_search_mvcc().
Non-unique secondary indexes are "uniquified" by adding the PK, the
index->n_uniq should then be index->n_fields. So there is no need in
additional checks in the fix.

If transaction's readview can't see the changes made in secondary index
record, it requests clustered index record in row_search_mvcc() to check
its transaction id and get the correspondent record version. After this
row_search_mvcc() commits mtr to preserve clustered index latching
order, and starts mtr. Between those mtr commit and start secondary
index pages are unlatched, and purge has the ability to remove stored in
the cursor record, what causes rows duplication in result set for
non-locking reads, as cursor position is restored to the previously
visited record.

To solve this the changes are just switched off for non-locking reads,
it's quite simple solution, besides the changes don't make sense for
non-locking reads.

The more complex and effective from performance perspective solution is
to create mtr savepoint before clustered record requesting and rolling
back to that savepoint after that. See MDEV-27557.

One more solution is to have per-record transaction id for secondary
indexes. See MDEV-17598.

If any of those is implemented, just remove select_lock_type argument in
sel_restore_position_for_mysql().
  • Loading branch information
vlad-lesin committed Feb 14, 2022
1 parent 52b32c6 commit 20e9e80
Show file tree
Hide file tree
Showing 14 changed files with 396 additions and 207 deletions.
35 changes: 35 additions & 0 deletions mysql-test/suite/innodb/r/cursor-restore-locking.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
CREATE TABLE t (a int PRIMARY KEY, b int NOT NULL UNIQUE) engine = InnoDB;
connect prevent_purge,localhost,root,,;
start transaction with consistent snapshot;
connect con_del_1,localhost,root,,;
INSERT INTO t VALUES (20,20);
SET DEBUG_SYNC = 'innodb_row_search_for_mysql_exit SIGNAL first_del_row_search_mvcc_finished WAIT_FOR first_del_cont';
DELETE FROM t WHERE b = 20;
connect con_ins_1,localhost,root,,;
SET DEBUG_SYNC = 'now WAIT_FOR first_del_row_search_mvcc_finished';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL first_ins_locked';
SET DEBUG_SYNC = 'ib_after_row_insert SIGNAL first_ins_row_inserted WAIT_FOR first_ins_cont';
INSERT INTO t VALUES(10, 20);
connect con_del_2,localhost,root,,;
SET DEBUG_SYNC = 'now WAIT_FOR first_ins_locked';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL second_del_locked';
DELETE FROM t WHERE b = 20;
connection default;
SET DEBUG_SYNC = 'now WAIT_FOR second_del_locked';
SET DEBUG_SYNC = 'now SIGNAL first_del_cont';
SET DEBUG_SYNC = 'now WAIT_FOR first_ins_row_inserted';
connection con_del_1;
connection default;
disconnect prevent_purge;
InnoDB 0 transactions not purged
SET DEBUG_SYNC = 'now SIGNAL first_ins_cont';
connection con_del_2;
connection con_ins_1;
connection default;
INSERT INTO t VALUES(30, 20);
disconnect con_ins_1;
disconnect con_del_1;
disconnect con_del_2;
connection default;
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
26 changes: 26 additions & 0 deletions mysql-test/suite/innodb/r/cursor-restore-non-locking-read.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
SET @saved_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency;
SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
CREATE TABLE t1 (pk int PRIMARY KEY, c int UNIQUE) ENGINE=InnoDB;
INSERT INTO t1 VALUES (10,10),(20,20),(30,30);
connect prevent_purge,localhost,root,,;
start transaction with consistent snapshot;
UPDATE t1 SET c=300 WHERE pk = 30;
connection default;
DELETE FROM t1 WHERE pk = 10;
INSERT INTO t1 VALUES(5,10);
SET DEBUG_SYNC = "row_search_clust_unlatched SIGNAL unlatched WAIT_FOR cont";
SELECT pk FROM t1 FORCE INDEX (c);
connect con1,localhost,root,,;
SET DEBUG_SYNC = "now WAIT_FOR unlatched";
disconnect prevent_purge;
InnoDB 1 transactions not purged
SET DEBUG_SYNC = 'now SIGNAL cont';
disconnect con1;
connection default;
pk
5
20
30
SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
79 changes: 79 additions & 0 deletions mysql-test/suite/innodb/t/cursor-restore-locking.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
--source include/have_innodb.inc
--source include/count_sessions.inc
source include/have_debug.inc;
source include/have_debug_sync.inc;

CREATE TABLE t (a int PRIMARY KEY, b int NOT NULL UNIQUE) engine = InnoDB;

--connect(prevent_purge,localhost,root,,)
start transaction with consistent snapshot;

--connect(con_del_1,localhost,root,,)
INSERT INTO t VALUES (20,20);
SET DEBUG_SYNC = 'innodb_row_search_for_mysql_exit SIGNAL first_del_row_search_mvcc_finished WAIT_FOR first_del_cont';
--send DELETE FROM t WHERE b = 20

--connect(con_ins_1,localhost,root,,)
SET DEBUG_SYNC = 'now WAIT_FOR first_del_row_search_mvcc_finished';
# It's supposed the following INSERT will be suspended just after
# lock_wait_suspend_thread_enter syncpoint, and will be awaken
# after the previous DELETE commits. ib_after_row_insert will be executed
# after the INSERT is woken up. The previous DELETE will wait for
# first_del_cont signal before commit, and this signal will be sent later.
# So it's safe to use two signals in a row here, it's guaranted the first
# signal will be received before the second signal is sent.
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL first_ins_locked';
SET DEBUG_SYNC = 'ib_after_row_insert SIGNAL first_ins_row_inserted WAIT_FOR first_ins_cont';
--send INSERT INTO t VALUES(10, 20)

--connect(con_del_2,localhost,root,,)
SET DEBUG_SYNC = 'now WAIT_FOR first_ins_locked';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL second_del_locked';
###############################################################################
# This DELETE is locked by the previous DELETE, after that DELETE is
# committed, it will still be locked by the next INSERT on delete-marked
# heap_no 2 record. After that INSERT inserted the record with heap_no 3,
# and after heap_no 2 record is purged, this DELETE will be unlocked and
# must restore persistent cursor position at heap_no 3 record, as it has the
# same secondary key value as former heap_no 2 record. Then it must be blocked
# by the previous INSERT, and after the INSERT is committed, it must
# delete the record, inserted by the previous INSERT, and the last INSERT(see
# below) must be finished without error. But instead this DELETE restores
# persistent cursor position to supremum, as a result, it does not delete the
# record, inserted by the previous INSERT, and the last INSERT is finished with
# duplicate key check error.
###############################################################################
--send DELETE FROM t WHERE b = 20

--connection default
SET DEBUG_SYNC = 'now WAIT_FOR second_del_locked';
SET DEBUG_SYNC = 'now SIGNAL first_del_cont';
SET DEBUG_SYNC = 'now WAIT_FOR first_ins_row_inserted';
--connection con_del_1
--reap

--connection default
--disconnect prevent_purge
--source include/wait_all_purged.inc
SET DEBUG_SYNC = 'now SIGNAL first_ins_cont';

--connection con_del_2
--reap

--connection con_ins_1
--reap

--connection default
###############################################################################
# Duplicate key error is expected if the bug is not fixed.
###############################################################################
INSERT INTO t VALUES(30, 20);

--disconnect con_ins_1
--disconnect con_del_1
--disconnect con_del_2
--connection default

SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
--source include/wait_until_count_sessions.inc
44 changes: 44 additions & 0 deletions mysql-test/suite/innodb/t/cursor-restore-non-locking-read.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/count_sessions.inc

SET @saved_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency;
SET GLOBAL innodb_purge_rseg_truncate_frequency=1;

CREATE TABLE t1 (pk int PRIMARY KEY, c int UNIQUE) ENGINE=InnoDB;

INSERT INTO t1 VALUES (10,10),(20,20),(30,30);

--connect(prevent_purge,localhost,root,,)
start transaction with consistent snapshot;
# We need this to update page's transaction id for secondary index.
UPDATE t1 SET c=300 WHERE pk = 30;

--connection default
DELETE FROM t1 WHERE pk = 10;
INSERT INTO t1 VALUES(5,10);
SET DEBUG_SYNC = "row_search_clust_unlatched SIGNAL unlatched WAIT_FOR cont";
# With the above sync point row_search_mvcc() will be blocked on delete-marked
# record (10,10) in secondary index just after all page latches are released.
# After this record is purged, row_searc_mvcc() will be unblocked, and cursor
# will be restored to the secondary index record (10,5). As the unique field is
# the same as in the cursor's stored record, and the bug is not fixed, there
# value 5 will be doubled in the result set.
--send SELECT pk FROM t1 FORCE INDEX (c)

--connect(con1,localhost,root,,)
SET DEBUG_SYNC = "now WAIT_FOR unlatched";
--disconnect prevent_purge
let $wait_all_purged= 1;
--source include/wait_all_purged.inc
SET DEBUG_SYNC = 'now SIGNAL cont';
--disconnect con1

--connection default
--reap

SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
--source include/wait_until_count_sessions.inc
8 changes: 3 additions & 5 deletions storage/innobase/btr/btr0cur.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7193,11 +7193,9 @@ struct btr_blob_log_check_t {
buf_block_buf_fix_dec(m_pcur->btr_cur.page_cur.block);
} else {
ut_ad(m_pcur->rel_pos == BTR_PCUR_ON);
bool ret = btr_pcur_restore_position(
BTR_MODIFY_LEAF | BTR_MODIFY_EXTERNAL,
m_pcur, m_mtr);

ut_a(ret);
ut_a(btr_pcur_restore_position(
BTR_MODIFY_LEAF | BTR_MODIFY_EXTERNAL, m_pcur,
m_mtr) == btr_pcur_t::SAME_ALL);
}

*m_block = btr_pcur_get_block(m_pcur);
Expand Down
Loading

0 comments on commit 20e9e80

Please sign in to comment.