Skip to content

Commit 5bbda97

Browse files
SongLibingdr-m
authored andcommitted
MDEV-33853 Async rollback prepared transactions during binlog
crash recovery Summary ======= When doing server recovery, the active transactions will be rolled back by InnoDB background rollback thread automatically. The prepared transactions will be committed or rolled back accordingly by binlog recovery. Binlog recovery is done in main thread before the server can provide service to users. If there is a big transaction to rollback, the server will not available for a long time. This patch provides a way to rollback the prepared transactions asynchronously. Thus the rollback will not block server startup. Design ====== - Handler::recover_rollback_by_xid() This patch provides a new handler interface to rollback transactions in recover phase. InnoDB just set the transaction's state to active. Then the transaction will be rolled back by the background rollback thread. - Handler::signal_tc_log_recover_done() This function is called after tc log is opened(typically binlog opened) has done. When this function is called, all transactions will be rolled back have been reverted to ACTIVE state. Thus it starts rollback thread to rollback the transactions. - Background rollback thread With this patch, background rollback thread is defered to run until binlog recovery is finished. It is started by innobase_tc_log_recovery_done().
1 parent db5d1cd commit 5bbda97

File tree

14 files changed

+376
-13
lines changed

14 files changed

+376
-13
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
CREATE TABLE t1 (id int primary key, data int) ENGINE = InnoDB;
2+
INSERT INTO t1 VALUES (0, 1);
3+
#
4+
# 1. Check DML in prepared state can rollback correctly.
5+
#
6+
connect con1, localhost, root,,;
7+
SET debug_sync = "ha_commit_trans_after_prepare SIGNAL prepared1 WAIT_FOR continue";
8+
INSERT INTO t1 VALUES(1, 1);;
9+
connect con2, localhost, root,,;
10+
SET debug_sync = "now WAIT_FOR prepared1";
11+
SET debug_sync = "ha_commit_trans_after_prepare SIGNAL prepared2 WAIT_FOR continue";
12+
UPDATE t1 SET data = data + 1 WHERE id = 0;
13+
connection default;
14+
SET debug_sync = "now WAIT_FOR prepared2";
15+
# Kill the server
16+
disconnect con1;
17+
disconnect con2;
18+
# restart
19+
# Expect (0, 1)
20+
SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
21+
SELECT * FROM t1;
22+
id data
23+
0 1
24+
INSERT INTO t1 VALUES(1, 1);
25+
UPDATE t1 SET data = data + 1 WHERE id = 0;
26+
# Expect (0, 2), (1, 1)
27+
SELECT * FROM t1;
28+
id data
29+
0 2
30+
1 1
31+
#
32+
# 2. Test that innodb shutdown as expected if any error happens before
33+
# normal rollback task is started. In the situation, rollback task
34+
# should be started at preshutdown accordingly to rollback or
35+
# deregister all recovered active transactions.
36+
#
37+
INSERT INTO t1 SELECT seq + 2, 1 FROM seq_1_to_1024;
38+
BEGIN;
39+
UPDATE t1 SET data = 10;
40+
SET GLOBAL innodb_log_checkpoint_now = 1;
41+
# Kill the server
42+
# restart: --innodb-read-only
43+
SELECT count(*) FROM information_schema.innodb_trx;
44+
count(*)
45+
1
46+
# Kill the server
47+
# restart: --innodb-read-only
48+
SELECT count(*) FROM information_schema.innodb_trx;
49+
count(*)
50+
1
51+
# restart
52+
DROP TABLE t1;
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
################################################################################
2+
# Async rollback prepared transactions during binlog crash recovery
3+
#
4+
# It verifies that binlog recovery just set the prepared transactions to
5+
# active and the background recovery rollback thread will rollback the
6+
# transactions asynchronously.
7+
################################################################################
8+
--source include/have_debug.inc
9+
--source include/have_debug_sync.inc
10+
--source include/have_innodb.inc
11+
--source include/have_binlog_format_row.inc
12+
--source include/have_sequence.inc
13+
14+
CREATE TABLE t1 (id int primary key, data int) ENGINE = InnoDB;
15+
INSERT INTO t1 VALUES (0, 1);
16+
17+
--echo #
18+
--echo # 1. Check DML in prepared state can rollback correctly.
19+
--echo #
20+
21+
--connect(con1, localhost, root,,)
22+
SET debug_sync = "ha_commit_trans_after_prepare SIGNAL prepared1 WAIT_FOR continue";
23+
--send INSERT INTO t1 VALUES(1, 1);
24+
25+
--connect(con2, localhost, root,,)
26+
SET debug_sync = "now WAIT_FOR prepared1";
27+
SET debug_sync = "ha_commit_trans_after_prepare SIGNAL prepared2 WAIT_FOR continue";
28+
--send UPDATE t1 SET data = data + 1 WHERE id = 0
29+
30+
--connection default
31+
SET debug_sync = "now WAIT_FOR prepared2";
32+
--source include/kill_mysqld.inc
33+
34+
--disconnect con1
35+
--disconnect con2
36+
37+
# With the debug option, recovery rollback thread just rolls back the
38+
# first prepared transaction and then goes to sleep.
39+
--source include/start_mysqld.inc
40+
--let $wait_condition= SELECT count(*) = 0 FROM information_schema.innodb_trx
41+
--source include/wait_condition.inc
42+
43+
--echo # Expect (0, 1)
44+
SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
45+
SELECT * FROM t1;
46+
47+
# The previous INSERT is rolled back, so this INSERT will succeed.
48+
INSERT INTO t1 VALUES(1, 1);
49+
50+
# The previous UPDATE is rolled back, so this UPDATE will succeed.
51+
UPDATE t1 SET data = data + 1 WHERE id = 0;
52+
53+
--echo # Expect (0, 2), (1, 1)
54+
SELECT * FROM t1;
55+
56+
--echo #
57+
--echo # 2. Test that innodb shutdown as expected if any error happens before
58+
--echo # normal rollback task is started. In the situation, rollback task
59+
--echo # should be started at preshutdown accordingly to rollback or
60+
--echo # deregister all recovered active transactions.
61+
--echo #
62+
# Generate an large active transaction
63+
INSERT INTO t1 SELECT seq + 2, 1 FROM seq_1_to_1024;
64+
65+
BEGIN;
66+
UPDATE t1 SET data = 10;
67+
68+
# Make sure above update is persisted.
69+
SET GLOBAL innodb_log_checkpoint_now = 1;
70+
--source include/kill_mysqld.inc
71+
72+
# tc-heuristic-recover triggers an error before innodb rollback task start
73+
# Rollback task will not be started at preshutdown of read only mode. Active
74+
# transactions are not expected to rollback.
75+
--error 1
76+
--exec $MYSQLD_LAST_CMD --tc-heuristic-recover=ROLLBACK --innodb-read-only --log-error=$MYSQLTEST_VARDIR/tmp/log.err
77+
78+
# Rollback task will not be started at preshutdown if recovery mode is greater
79+
# to 2. Active transactions are not expected to rollback.
80+
--error 1
81+
--exec $MYSQLD_LAST_CMD --tc-heuristic-recover=ROLLBACK --innodb-force-recovery=3 --log-error=$MYSQLTEST_VARDIR/tmp/log.err
82+
83+
# Rollback task will be started at preshutdown of fast shutdown if force
84+
# recovery is 2. But the transaction is deregistered instead of rollback.
85+
--error 1
86+
--exec $MYSQLD_LAST_CMD --tc-heuristic-recover=ROLLBACK --innodb-fast-shutdown=1 --innodb-force-recovery=2 --log-error=$MYSQLTEST_VARDIR/tmp/log.err
87+
88+
--let $restart_parameters= --innodb-read-only
89+
--source include/start_mysqld.inc
90+
91+
# Verify that the transaction is still there.
92+
SELECT count(*) FROM information_schema.innodb_trx;
93+
94+
--source include/kill_mysqld.inc
95+
96+
# Rollback task will be started at preshutdown of fast shutdown. The
97+
# active transaction is rolled back.
98+
--error 1
99+
--exec $MYSQLD_LAST_CMD --tc-heuristic-recover=ROLLBACK --innodb-fast-shutdown=1 --log-error=$MYSQLTEST_VARDIR/tmp/log.err
100+
101+
--let $restart_parameters= --innodb-read-only
102+
--source include/start_mysqld.inc
103+
# Verify that the transaction is still there.
104+
SELECT count(*) FROM information_schema.innodb_trx;
105+
106+
--remove_file $MYSQLTEST_VARDIR/tmp/log.err
107+
108+
--let $restart_parameters=
109+
--source include/restart_mysqld.inc
110+
111+
# There should be no any transaction
112+
--let $wait_condition= SELECT count(*) = 0 FROM information_schema.innodb_trx
113+
--source include/wait_condition.inc
114+
115+
# Cleanup.
116+
DROP TABLE t1;
117+

sql/handler.cc

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2682,8 +2682,12 @@ static void xarecover_do_commit_or_rollback(handlerton *hton,
26822682
else
26832683
x= *member->full_xid;
26842684

2685-
rc= xarecover_decide_to_commit(member, ptr_commit_max) ?
2686-
hton->commit_by_xid(hton, &x) : hton->rollback_by_xid(hton, &x);
2685+
if (xarecover_decide_to_commit(member, ptr_commit_max))
2686+
rc= hton->commit_by_xid(hton, &x);
2687+
else if (hton->recover_rollback_by_xid)
2688+
rc= hton->recover_rollback_by_xid(&x);
2689+
else
2690+
rc= hton->rollback_by_xid(hton, &x);
26872691

26882692
/*
26892693
It's fine to have non-zero rc which would be from transaction
@@ -2750,6 +2754,21 @@ static my_bool xarecover_complete_and_count(void *member_arg,
27502754
return false;
27512755
}
27522756

2757+
static my_bool tc_log_recover_done_handlerton(THD*, plugin_ref plugin, void *arg)
2758+
{
2759+
handlerton *hton= plugin_hton(plugin);
2760+
2761+
if (hton->signal_tc_log_recovery_done)
2762+
hton->signal_tc_log_recovery_done();
2763+
return false;
2764+
}
2765+
2766+
void ha_signal_tc_log_recovery_done()
2767+
{
2768+
std::ignore = plugin_foreach(nullptr, tc_log_recover_done_handlerton,
2769+
MYSQL_STORAGE_ENGINE_PLUGIN, 0);
2770+
}
2771+
27532772
/*
27542773
Completes binlog recovery to invoke decider functions for
27552774
each xid.

sql/handler.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,6 +1427,23 @@ struct handlerton
14271427
int (*recover)(handlerton *hton, XID *xid_list, uint len);
14281428
int (*commit_by_xid)(handlerton *hton, XID *xid);
14291429
int (*rollback_by_xid)(handlerton *hton, XID *xid);
1430+
/*
1431+
recover_rollback_by_xid is optional. If set, it will be called instead of
1432+
rollback_by_xid when transactions should be rolled back at server startup.
1433+
1434+
This function should just change the transaction's state from prepared to
1435+
active before returing. The actual rollback should then happen
1436+
asynchroneously (eg. in a background thread). This way, rollbacks that
1437+
take a long time to complete will not block server startup, and the
1438+
database becomes available sooner to serve user queries.
1439+
*/
1440+
int (*recover_rollback_by_xid)(const XID *xid);
1441+
/*
1442+
It is called after binlog recovery has done commit/rollback of
1443+
all transactions. It is used together with recover_rollback_by_xid()
1444+
together to rollback prepared transactions asynchronously.
1445+
*/
1446+
void (*signal_tc_log_recovery_done)();
14301447
/*
14311448
The commit_checkpoint_request() handlerton method is used to checkpoint
14321449
the XA recovery process for storage engines that support two-phase
@@ -5622,6 +5639,7 @@ int ha_panic(enum ha_panic_function flag);
56225639
void ha_close_connection(THD* thd);
56235640
void ha_kill_query(THD* thd, enum thd_kill_levels level);
56245641
void ha_signal_ddl_recovery_done();
5642+
void ha_signal_tc_log_recovery_done();
56255643
bool ha_flush_logs();
56265644
void ha_drop_database(const char* path);
56275645
void ha_checkpoint_state(bool disable);

sql/mysqld.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5602,6 +5602,8 @@ static int init_server_components()
56025602
if (ha_recover(0))
56035603
unireg_abort(1);
56045604

5605+
ha_signal_tc_log_recovery_done();
5606+
56055607
if (opt_bin_log)
56065608
{
56075609
int error;

storage/innobase/handler/ha_innodb.cc

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4108,6 +4108,11 @@ static int innodb_init(void* p)
41084108
innobase_hton->recover = innobase_xa_recover;
41094109
innobase_hton->commit_by_xid = innobase_commit_by_xid;
41104110
innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
4111+
#ifndef EMBEDDED_LIBRARY
4112+
innobase_hton->recover_rollback_by_xid =
4113+
innobase_recover_rollback_by_xid;
4114+
innobase_hton->signal_tc_log_recovery_done = innobase_tc_log_recovery_done;
4115+
#endif
41114116
innobase_hton->commit_checkpoint_request = innodb_log_flush_request;
41124117
innobase_hton->create = innobase_create_handler;
41134118

@@ -17171,6 +17176,80 @@ int innobase_rollback_by_xid(handlerton* hton, XID* xid)
1717117176
}
1717217177
}
1717317178

17179+
#ifndef EMBEDDED_LIBRARY
17180+
/**
17181+
This function is used to rollback one X/Open XA distributed transaction
17182+
which is in the prepared state asynchronously.
17183+
17184+
It only set the transaction's status to ACTIVE and persist the status.
17185+
The transaction will be rolled back by background rollback thread.
17186+
17187+
@param xid X/Open XA transaction identification
17188+
17189+
@return 0 or error number
17190+
*/
17191+
int innobase_recover_rollback_by_xid(const XID *xid)
17192+
{
17193+
DBUG_EXECUTE_IF("innobase_xa_fail", return XAER_RMFAIL;);
17194+
17195+
if (high_level_read_only)
17196+
return XAER_RMFAIL;
17197+
17198+
/*
17199+
trx_get_trx_by_xid() sets trx's xid to null. Thus only one call for any
17200+
given XID can find the transaction. Subsequent calls by other threads
17201+
would return nullptr. That is what guarantees that no other thread can be
17202+
modifying the state of the transaction at this point.
17203+
*/
17204+
trx_t *trx= trx_get_trx_by_xid(xid);
17205+
if (!trx)
17206+
return XAER_RMFAIL;
17207+
17208+
// ddl should not be rolled back through recovery
17209+
ut_ad(!trx->dict_operation);
17210+
ut_ad(trx->is_recovered);
17211+
ut_ad(trx->state == TRX_STATE_PREPARED);
17212+
17213+
#ifdef WITH_WSREP
17214+
ut_ad(!wsrep_is_wsrep_xid(&trx->xid));
17215+
#endif
17216+
17217+
if (trx->rsegs.m_redo.undo)
17218+
{
17219+
ut_ad(trx->rsegs.m_redo.undo->rseg == trx->rsegs.m_redo.rseg);
17220+
17221+
mtr_t mtr;
17222+
mtr.start();
17223+
trx_undo_set_state_at_prepare(trx, trx->rsegs.m_redo.undo, true, &mtr);
17224+
mtr.commit();
17225+
17226+
ut_ad(mtr.commit_lsn() > 0);
17227+
}
17228+
17229+
/* The above state change from XA PREPARE will be made durable in
17230+
innobase_tc_log_recovery_done(), which will also initiate
17231+
trx_rollback_recovered() to roll back this transaction. */
17232+
trx->state= TRX_STATE_ACTIVE;
17233+
return 0;
17234+
}
17235+
17236+
void innobase_tc_log_recovery_done()
17237+
{
17238+
if (high_level_read_only)
17239+
return;
17240+
17241+
/* Make durable any innobase_recover_rollback_by_xid(). */
17242+
log_buffer_flush_to_disk(true);
17243+
17244+
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO)
17245+
{
17246+
/* Rollback incomplete non-DDL transactions */
17247+
trx_rollback_is_active= true;
17248+
srv_thread_pool->submit_task(&rollback_all_recovered_task);
17249+
}
17250+
}
17251+
#endif // EMBEDDED_LIBRARY
17252+
1717417253
bool
1717517254
ha_innobase::check_if_incompatible_data(
1717617255
/*====================================*/

storage/innobase/handler/ha_innodb.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,3 +943,24 @@ which is in the prepared state
943943
944944
@return 0 or error number */
945945
int innobase_rollback_by_xid(handlerton* hton, XID* xid);
946+
947+
/**
948+
This function is used to rollback one X/Open XA distributed transaction
949+
which is in the prepared state asynchronously.
950+
951+
It only set the transaction's status to ACTIVE and persist the status.
952+
The transaction will be rolled back by background rollback thread.
953+
954+
@param xid X/Open XA transaction identification
955+
956+
@return 0 or error number
957+
*/
958+
int innobase_recover_rollback_by_xid(const XID *xid);
959+
/**
960+
This function is called after tc log is opened(typically binlog recovery)
961+
has done. It starts rollback thread to rollback the transactions
962+
have been changed from PREPARED to ACTIVE.
963+
964+
@return 0 or error number
965+
*/
966+
void innobase_tc_log_recovery_done();

storage/innobase/include/trx0roll.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ Created 3/26/1996 Heikki Tuuri
3131
#include "mtr0mtr.h"
3232
#include "trx0sys.h"
3333

34-
extern bool trx_rollback_is_active;
34+
extern tpool::task_group rollback_all_recovered_group;
35+
extern tpool::waitable_task rollback_all_recovered_task;
3536
extern const trx_t* trx_roll_crash_recv_trx;
3637

3738
/** Report progress when rolling back a row of a recovered transaction. */

0 commit comments

Comments
 (0)