Skip to content

Commit 2c4844c

Browse files
committed
MDEV-17813 Crash in instant ALTER TABLE due to purge concurrently emptying table
Several race conditions between MDEV-15562 instant ALTER TABLE and purge were observed. The most obvious race condition resulted in a reported assertion failure in dict_index_t::instant_add_field(): instant.n_core_fields == n_core_fields would not hold if the table was emptied by purge after the time dict_table_t::prepare_instant() was called. During purge, it can turn out that the table is logically empty, only containing a metadata record. If the metadata record is of the type created by MDEV-11369 instant ADD COLUMN, it can be removed and dict_index_t::clear_instant_add() can be called. This will convert the table to the canonical non-instant format. (If the metadata record is of the MDEV-15562 type, then it can only be deleted if the table becomes empty as the result of rollback of an instant ALTER TABLE operation.) row_purge_remove_clust_if_poss_low(): Add a debug check that ensures that purge can never remove a MDEV-15562 metadata record. ha_innobase::open(): Add a comment about the necessity of rolling back any recovered instant ALTER TABLE transaction on the table. instant_metadata_lock(): An auxiliary function to acquire a page latch on the metadata record, to prevent race conditions. dict_table_t::prepare_instant(), dict_index_t::instant_add_field(), dict_table_t::rollback_instant(), innobase_instant_try(): Invoke instant_metadata_lock() in order to prevent race conditions. dict_index_t::instant_add_field(): Correct debug assertions. The == was guaranteed by code in dict_table_t::prepare_instant() that was introduced in MDEV-15562. Due to the race condition, we could occasionally have <=, but never >= like the code was after MDEV-11369. ha_innobase_inplace_ctx::instant_column(): Wrapper for dict_table_t::instant_column(). Add debug assertions.
1 parent 46a4110 commit 2c4844c

File tree

5 files changed

+148
-21
lines changed

5 files changed

+148
-21
lines changed

mysql-test/suite/innodb/r/instant_alter_purge.result

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,26 @@ ALTER TABLE t1 DROP extra;
2121
disconnect prevent_purge;
2222
InnoDB 0 transactions not purged
2323
DROP TABLE t1;
24+
#
25+
# MDEV-17813 Crash in instant ALTER TABLE due to purge
26+
# concurrently emptying table
27+
#
28+
CREATE TABLE t1 (f2 INT) ENGINE=InnoDB;
29+
INSERT INTO t1 SET f2=1;
30+
ALTER TABLE t1 ADD COLUMN f1 INT;
31+
connect purge_control,localhost,root;
32+
START TRANSACTION WITH CONSISTENT SNAPSHOT;
33+
connection default;
34+
DELETE FROM t1;
35+
SET DEBUG_SYNC='innodb_commit_inplace_alter_table_enter SIGNAL go WAIT_FOR do';
36+
ALTER TABLE t1 ADD COLUMN f3 INT;
37+
connection purge_control;
38+
SET DEBUG_SYNC='now WAIT_FOR go';
39+
COMMIT;
40+
InnoDB 0 transactions not purged
41+
SET DEBUG_SYNC='now SIGNAL do';
42+
disconnect purge_control;
43+
connection default;
44+
SET DEBUG_SYNC=RESET;
45+
DROP TABLE t1;
2446
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;

mysql-test/suite/innodb/t/instant_alter_purge.test

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,34 @@ disconnect prevent_purge;
3030
let $wait_all_purged= 0;
3131
--source include/wait_all_purged.inc
3232
DROP TABLE t1;
33+
34+
--echo #
35+
--echo # MDEV-17813 Crash in instant ALTER TABLE due to purge
36+
--echo # concurrently emptying table
37+
--echo #
38+
CREATE TABLE t1 (f2 INT) ENGINE=InnoDB;
39+
INSERT INTO t1 SET f2=1;
40+
ALTER TABLE t1 ADD COLUMN f1 INT;
41+
42+
connect (purge_control,localhost,root);
43+
START TRANSACTION WITH CONSISTENT SNAPSHOT;
44+
45+
connection default;
46+
DELETE FROM t1;
47+
48+
SET DEBUG_SYNC='innodb_commit_inplace_alter_table_enter SIGNAL go WAIT_FOR do';
49+
send ALTER TABLE t1 ADD COLUMN f3 INT;
50+
51+
connection purge_control;
52+
SET DEBUG_SYNC='now WAIT_FOR go';
53+
COMMIT;
54+
--source include/wait_all_purged.inc
55+
SET DEBUG_SYNC='now SIGNAL do';
56+
disconnect purge_control;
57+
58+
connection default;
59+
reap;
60+
SET DEBUG_SYNC=RESET;
61+
DROP TABLE t1;
62+
3363
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;

storage/innobase/handler/ha_innodb.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6046,6 +6046,14 @@ initialize_auto_increment(dict_table_t* table, const Field* field)
60466046
int
60476047
ha_innobase::open(const char* name, int, uint)
60486048
{
6049+
/* TODO: If trx_rollback_recovered(bool all=false) is ever
6050+
removed, the first-time open() must hold (or acquire and release)
6051+
a table lock that conflicts with trx_resurrect_table_locks(),
6052+
to ensure that any recovered incomplete ALTER TABLE will have been
6053+
rolled back. Otherwise, dict_table_t::instant could be cleared by
6054+
the rollback invoking dict_index_t::clear_instant_alter() while
6055+
open table handles exist in client connections. */
6056+
60496057
dict_table_t* ib_table;
60506058
char norm_name[FN_REFLEN];
60516059
dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE;

storage/innobase/handler/handler0alter.cc

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,30 @@ static const alter_table_operations INNOBASE_ALTER_INSTANT
136136
| ALTER_COLUMN_UNVERSIONED
137137
| ALTER_DROP_VIRTUAL_COLUMN;
138138

139+
/** Acquire a page latch on the possible metadata record,
140+
to prevent concurrent invocation of dict_index_t::clear_instant_alter()
141+
by purge when the table turns out to be empty.
142+
@param[in,out] index clustered index
143+
@param[in,out] mtr mini-transaction */
144+
static void instant_metadata_lock(dict_index_t& index, mtr_t& mtr)
145+
{
146+
DBUG_ASSERT(index.is_primary());
147+
148+
if (!index.is_instant()) {
149+
/* dict_index_t::clear_instant_alter() cannot be called.
150+
No need for a latch. */
151+
return;
152+
}
153+
154+
btr_cur_t btr_cur;
155+
btr_cur_open_at_index_side(true, &index, BTR_SEARCH_LEAF,
156+
&btr_cur, 0, &mtr);
157+
ut_ad(page_cur_is_before_first(btr_cur_get_page_cur(&btr_cur)));
158+
ut_ad(page_is_leaf(btr_cur_get_page(&btr_cur)));
159+
ut_ad(!page_has_prev(btr_cur_get_page(&btr_cur)));
160+
ut_ad(!buf_block_get_page_zip(btr_cur_get_block(&btr_cur)));
161+
}
162+
139163
/** Set is_instant() before instant_column().
140164
@param[in] old previous table definition
141165
@param[in] col_map map from old.cols[] and old.v_cols[] to this
@@ -152,10 +176,16 @@ inline void dict_table_t::prepare_instant(const dict_table_t& old,
152176
DBUG_ASSERT(old.supports_instant());
153177
DBUG_ASSERT(supports_instant());
154178

155-
const dict_index_t& oindex = *old.indexes.start;
179+
dict_index_t& oindex = *old.indexes.start;
156180
dict_index_t& index = *indexes.start;
157181
first_alter_pos = 0;
158182

183+
mtr_t mtr;
184+
mtr.start();
185+
/* Prevent oindex.n_core_fields and others, so that
186+
purge cannot invoke dict_index_t::clear_instant_alter(). */
187+
instant_metadata_lock(oindex, mtr);
188+
159189
for (unsigned i = 0; i + DATA_N_SYS_COLS < old.n_cols;
160190
i++) {
161191
if (col_map[i] != i) {
@@ -315,6 +345,7 @@ inline void dict_table_t::prepare_instant(const dict_table_t& old,
315345
DBUG_ASSERT(n_dropped() >= old.n_dropped());
316346
DBUG_ASSERT(index.n_core_fields == oindex.n_core_fields);
317347
DBUG_ASSERT(index.n_core_null_bytes == oindex.n_core_null_bytes);
348+
mtr.commit();
318349
}
319350

320351

@@ -335,8 +366,15 @@ inline void dict_index_t::instant_add_field(const dict_index_t& instant)
335366
DBUG_ASSERT(n_uniq == instant.n_uniq);
336367
DBUG_ASSERT(instant.n_fields >= n_fields);
337368
DBUG_ASSERT(instant.n_nullable >= n_nullable);
338-
DBUG_ASSERT(instant.n_core_fields == n_core_fields);
339-
DBUG_ASSERT(instant.n_core_null_bytes == n_core_null_bytes);
369+
/* dict_table_t::prepare_instant() initialized n_core_fields
370+
to be equal. However, after that purge could have emptied the
371+
table and invoked dict_index_t::clear_instant_alter(). */
372+
DBUG_ASSERT(instant.n_core_fields <= n_core_fields);
373+
DBUG_ASSERT(instant.n_core_null_bytes <= n_core_null_bytes);
374+
DBUG_ASSERT(instant.n_core_fields == n_core_fields
375+
|| (!is_instant() && instant.is_instant()));
376+
DBUG_ASSERT(instant.n_core_null_bytes == n_core_null_bytes
377+
|| (!is_instant() && instant.is_instant()));
340378

341379
/* instant will have all fields (including ones for columns
342380
that have been or are being instantly dropped) in the same position
@@ -627,7 +665,13 @@ inline void dict_table_t::rollback_instant(
627665
const ulint* col_map)
628666
{
629667
ut_ad(mutex_own(&dict_sys->mutex));
668+
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
630669
dict_index_t* index = indexes.start;
670+
mtr_t mtr;
671+
mtr.start();
672+
/* Prevent concurrent execution of dict_index_t::clear_instant_alter()
673+
by acquiring a latch on the leftmost leaf page. */
674+
instant_metadata_lock(*index, mtr);
631675
/* index->is_instant() does not necessarily hold here, because
632676
the table may have been emptied */
633677
DBUG_ASSERT(old_n_cols >= DATA_N_SYS_COLS);
@@ -667,6 +711,7 @@ inline void dict_table_t::rollback_instant(
667711
n_t_def = n_t_cols = n_cols + n_v_cols;
668712

669713
index->fields = old_fields;
714+
mtr.commit();
670715

671716
while ((index = dict_table_get_next_index(index)) != NULL) {
672717
if (index->to_be_dropped) {
@@ -924,6 +969,15 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
924969
first_alter_pos);
925970
}
926971

972+
/** Adjust table metadata for instant ADD/DROP/reorder COLUMN. */
973+
void instant_column()
974+
{
975+
DBUG_ASSERT(is_instant());
976+
DBUG_ASSERT(old_n_fields
977+
== old_table->indexes.start->n_fields);
978+
old_table->instant_column(*instant_table, col_map);
979+
}
980+
927981
/** Revert prepare_instant() if the transaction is rolled back. */
928982
void rollback_instant()
929983
{
@@ -5287,13 +5341,24 @@ static bool innobase_instant_try(
52875341
dict_table_t* user_table = ctx->old_table;
52885342

52895343
dict_index_t* index = dict_table_get_first_index(user_table);
5290-
uint n_old_fields = index->n_fields;
5344+
mtr_t mtr;
5345+
mtr.start();
5346+
/* Prevent purge from calling dict_index_t::clear_instant_alter(),
5347+
to protect index->n_core_fields, index->table->instant and others
5348+
from changing during ctx->instant_column(). */
5349+
instant_metadata_lock(*index, mtr);
5350+
const unsigned n_old_fields = index->n_fields;
52915351
const dict_col_t* old_cols = user_table->cols;
52925352
DBUG_ASSERT(user_table->n_cols == ctx->old_n_cols);
52935353

5294-
user_table->instant_column(*ctx->instant_table, ctx->col_map);
5354+
ctx->instant_column();
52955355

52965356
DBUG_ASSERT(index->n_fields >= n_old_fields);
5357+
/* Release the page latch. Between this and the next
5358+
btr_pcur_open_at_index_side(), data fields such as
5359+
index->n_core_fields and index->table->instant could change,
5360+
but we would handle that in empty_table: below. */
5361+
mtr.commit();
52975362
/* The table may have been emptied and may have lost its
52985363
'instantness' during this ALTER TABLE. */
52995364

@@ -5441,7 +5506,6 @@ static bool innobase_instant_try(
54415506
memset(roll_ptr, 0, sizeof roll_ptr);
54425507

54435508
dtuple_t* entry = index->instant_metadata(*row, ctx->heap);
5444-
mtr_t mtr;
54455509
mtr.start();
54465510
index->set_modified(mtr);
54475511
btr_pcur_t pcur;

storage/innobase/row/row0purge.cc

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -127,33 +127,32 @@ row_purge_remove_clust_if_poss_low(
127127
purge_node_t* node, /*!< in/out: row purge node */
128128
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
129129
{
130-
dict_index_t* index;
131-
bool success = true;
132-
mtr_t mtr;
133-
rec_t* rec;
134-
mem_heap_t* heap = NULL;
135-
ulint* offsets;
136-
ulint offsets_[REC_OFFS_NORMAL_SIZE];
137-
rec_offs_init(offsets_);
138-
139130
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S)
140131
|| node->vcol_info.is_used());
141132

142-
index = dict_table_get_first_index(node->table);
133+
dict_index_t* index = dict_table_get_first_index(node->table);
143134

144135
log_free_check();
145-
mtr_start(&mtr);
146-
index->set_modified(mtr);
136+
137+
mtr_t mtr;
138+
mtr.start();
147139

148140
if (!row_purge_reposition_pcur(mode, node, &mtr)) {
149141
/* The record was already removed. */
150-
goto func_exit;
142+
mtr.commit();
143+
return true;
151144
}
152145

153-
rec = btr_pcur_get_rec(&node->pcur);
146+
ut_d(const bool was_instant = !!index->table->instant);
147+
index->set_modified(mtr);
154148

155-
offsets = rec_get_offsets(
149+
rec_t* rec = btr_pcur_get_rec(&node->pcur);
150+
ulint offsets_[REC_OFFS_NORMAL_SIZE];
151+
rec_offs_init(offsets_);
152+
mem_heap_t* heap = NULL;
153+
ulint* offsets = rec_get_offsets(
156154
rec, index, offsets_, true, ULINT_UNDEFINED, &heap);
155+
bool success = true;
157156

158157
if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
159158
/* Someone else has modified the record later: do not remove */
@@ -186,6 +185,10 @@ row_purge_remove_clust_if_poss_low(
186185
}
187186
}
188187

188+
/* Prove that dict_index_t::clear_instant_alter() was
189+
not called with index->table->instant != NULL. */
190+
ut_ad(!was_instant || index->table->instant);
191+
189192
func_exit:
190193
if (heap) {
191194
mem_heap_free(heap);

0 commit comments

Comments
 (0)