Skip to content

Commit 57ec42b

Browse files
committed
MDEV-23190 InnoDB data file extension is not crash-safe
When InnoDB is extending a data file, it is updating the FSP_SIZE field in the first page of the data file. In commit 8451e09 (MDEV-11556) we removed a work-around for this bug and made recovery stricter, by making it track changes to FSP_SIZE via redo log records, and extend the data files before any changes are being applied to them. It turns out that the function fsp_fill_free_list() is not crash-safe with respect to this when it is initializing the change buffer bitmap page (page 1, or generally, N*innodb_page_size+1). It uses a separate mini-transaction that is committed (and will be written to the redo log file) before the mini-transaction that actually extended the data file. Hence, recovery can observe a reference to a page that is beyond the current end of the data file. fsp_fill_free_list(): Initialize the change buffer bitmap page in the same mini-transaction. The rest of the changes are fixing a bug that the use of the separate mini-transaction was attempting to work around. Namely, we must ensure that no other thread will access the change buffer bitmap page before our mini-transaction has been committed and all page latches have been released. That is, for read-ahead as well as neighbour flushing, we must avoid accessing pages that might not yet be durably part of the tablespace. fil_space_t::committed_size: The size of the tablespace as persisted by mtr_commit(). fil_space_t::max_page_number_for_io(): Limit the highest page number for I/O batches to committed_size. MTR_MEMO_SPACE_X_LOCK: Replaces MTR_MEMO_X_LOCK for fil_space_t::latch. mtr_x_space_lock(): Replaces mtr_x_lock() for fil_space_t::latch. mtr_memo_slot_release_func(): When releasing MTR_MEMO_SPACE_X_LOCK, copy space->size to space->committed_size. In this way, read-ahead or flushing will never be invoked on pages that do not yet exist according to FSP_SIZE.
1 parent 98e2c17 commit 57ec42b

File tree

24 files changed

+313
-197
lines changed

24 files changed

+313
-197
lines changed

storage/innobase/buf/buf0flu.cc

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22
33
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4-
Copyright (c) 2013, 2019, MariaDB Corporation.
4+
Copyright (c) 2013, 2020, MariaDB Corporation.
55
Copyright (c) 2013, 2014, Fusion-io
66
77
This program is free software; you can redistribute it and/or modify it under
@@ -1241,8 +1241,11 @@ buf_flush_try_neighbors(
12411241
/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
12421242
#endif
12431243

1244-
if (high > fil_space_get_size(space)) {
1245-
high = fil_space_get_size(space);
1244+
if (fil_space_t *s = fil_space_acquire_for_io(space)) {
1245+
high = s->max_page_number_for_io(high);
1246+
fil_space_release_for_io(s);
1247+
} else {
1248+
return 0;
12461249
}
12471250

12481251
ulint count = 0;

storage/innobase/buf/buf0rea.cc

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22
33
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
4-
Copyright (c) 2015, 2017, MariaDB Corporation.
4+
Copyright (c) 2015, 2020, MariaDB Corporation.
55
66
This program is free software; you can redistribute it and/or modify it under
77
the terms of the GNU General Public License as published by the Free Software
@@ -292,19 +292,22 @@ buf_read_ahead_random(
292292
return(0);
293293
}
294294

295-
/* Remember the tablespace version before we ask te tablespace size
296-
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
297-
do not try to read outside the bounds of the tablespace! */
295+
if (fil_space_t *s = fil_space_acquire_for_io(space)) {
296+
/* Remember the tablespace version along with the
297+
tablespace size: if DISCARD + IMPORT changes the
298+
actual .ibd file meanwhile, we do not try to read
299+
outside the bounds of the tablespace! */
300+
tablespace_version = s->tablespace_version;
298301

299-
tablespace_version = fil_space_get_version(space);
300-
301-
low = (offset / buf_read_ahead_random_area)
302-
* buf_read_ahead_random_area;
303-
high = (offset / buf_read_ahead_random_area + 1)
304-
* buf_read_ahead_random_area;
305-
if (high > fil_space_get_size(space)) {
302+
low = (offset / buf_read_ahead_random_area)
303+
* buf_read_ahead_random_area;
304+
high = (offset / buf_read_ahead_random_area + 1)
305+
* buf_read_ahead_random_area;
306+
high = s->max_page_number_for_io(high);
306307

307-
high = fil_space_get_size(space);
308+
fil_space_release_for_io(s);
309+
} else {
310+
return 0;
308311
}
309312

310313
buf_pool_mutex_enter(buf_pool);
@@ -435,22 +438,16 @@ buf_read_page(
435438
ulint zip_size,
436439
ulint offset)
437440
{
438-
ib_int64_t tablespace_version;
439-
ulint count;
440441
dberr_t err = DB_SUCCESS;
441442

442-
tablespace_version = fil_space_get_version(space_id);
443-
444443
FilSpace space(space_id, true);
445444

446445
if (space()) {
447-
448-
/* We do the i/o in the synchronous aio mode to save thread
449-
switches: hence TRUE */
450-
count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space_id,
451-
zip_size, FALSE,
452-
tablespace_version, offset);
453-
446+
ulint count = buf_read_page_low(&err, /*sync=*/true,
447+
BUF_READ_ANY_PAGE,
448+
space_id, zip_size, FALSE,
449+
space()->tablespace_version,
450+
offset);
454451
srv_stats.buf_pool_reads.add(count);
455452
}
456453

@@ -619,21 +616,30 @@ buf_read_ahead_linear(
619616
return(0);
620617
}
621618

622-
/* Remember the tablespace version before we ask te tablespace size
623-
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
624-
do not try to read outside the bounds of the tablespace! */
619+
uint32_t space_high_limit = 0;
625620

626-
tablespace_version = fil_space_get_version(space);
621+
if (fil_space_t *s = fil_space_acquire_for_io(space)) {
622+
/* Remember the tablespace version along with the
623+
tablespace size: if DISCARD + IMPORT changes the
624+
actual .ibd file meanwhile, we do not try to read
625+
outside the bounds of the tablespace! */
626+
tablespace_version = s->tablespace_version;
627627

628-
buf_pool_mutex_enter(buf_pool);
628+
space_high_limit = s->max_page_number_for_io(ULINT_UNDEFINED);
629629

630-
if (high > fil_space_get_size(space)) {
631-
buf_pool_mutex_exit(buf_pool);
630+
fil_space_release_for_io(s);
631+
} else {
632+
return 0;
633+
}
634+
635+
if (high > space_high_limit) {
632636
/* The area is not whole, return */
633637

634638
return(0);
635639
}
636640

641+
buf_pool_mutex_enter(buf_pool);
642+
637643
if (buf_pool->n_pend_reads
638644
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
639645
buf_pool_mutex_exit(buf_pool);
@@ -754,7 +760,7 @@ buf_read_ahead_linear(
754760
return(0);
755761
}
756762

757-
if (high > fil_space_get_size(space)) {
763+
if (high > space_high_limit) {
758764
/* The area is not whole, return */
759765

760766
return(0);

storage/innobase/fil/fil0fil.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,7 @@ fil_node_open_file(
676676
#ifdef UNIV_HOTBACKUP
677677
add_size:
678678
#endif /* UNIV_HOTBACKUP */
679-
space->size += node->size;
679+
space->committed_size = space->size += node->size;
680680
}
681681

682682
ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(space->flags);
@@ -1151,6 +1151,9 @@ fil_mutex_enter_and_prepare_for_io(
11511151
ut_a(success);
11521152
/* InnoDB data files cannot shrink. */
11531153
ut_a(space->size >= size);
1154+
if (size > space->committed_size) {
1155+
space->committed_size = size;
1156+
}
11541157

11551158
/* There could be multiple concurrent I/O requests for
11561159
this tablespace (multiple threads trying to extend

0 commit comments

Comments
 (0)