Skip to content

Commit

Permalink
MDEV-23651 InnoDB: Failing assertion: !space->referenced()
Browse files Browse the repository at this point in the history
commit de942c9 (MDEV-15983)
introduced a race condition that we inadequately fixed in
commit 93b6982 (MDEV-16169).

Because fil_space_t::release() or fil_space_t::acquire() are
not protected by fil_system.mutex like their predecessors,
it is possible that stop_new_ops was set between the time
a thread checked fil_space_t::is_stopping() and invoked
fil_space_t::acquire().

In an execution trace, this happened in fil_system_t::keyrotate_next(),
causing an assertion failure in fil_delete_tablespace()
in the other thread that seeked to stop new operations.

We fix this bug by merging the flag fil_space_t::stop_new_ops
and the reference count fil_space_t::n_pending_ops into a
single word that is only being accessed by atomic memory operations.

fil_space_t::set_stopping(): Accessor for changing the state of
the former stop_new_ops flag.

fil_space_t::acquire(): Return whether the acquisition succeeded.
It would fail between set_stopping(true) and set_stopping(false).
  • Loading branch information
dr-m committed Sep 3, 2020
1 parent 33ae161 commit a7dd7c8
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 118 deletions.
39 changes: 21 additions & 18 deletions storage/innobase/fil/fil0crypt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1418,12 +1418,15 @@ inline fil_space_t *fil_system_t::keyrotate_next(fil_space_t *space,
}
}

if (it == end)
return NULL;
while (it != end)
{
space= &*it;
if (space->acquire())
return space;
while (++it != end && (!UT_LIST_GET_LEN(it->chain) || it->is_stopping()));
}

space= &*it;
space->acquire();
return space;
return NULL;
}

/** Return the next tablespace.
Expand All @@ -1445,12 +1448,14 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck,
space= UT_LIST_GET_FIRST(fil_system.space_list);
/* We can trust that space is not NULL because at least the
system tablespace is always present and loaded first. */
space->acquire();
if (!space->acquire())
goto next;
}
else
{
/* Move on to the next fil_space_t */
space->release();
next:
space= UT_LIST_GET_NEXT(space_list, space);

/* Skip abnormal tablespaces or those that are being created by
Expand All @@ -1460,8 +1465,8 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck,
space->is_stopping() || space->purpose != FIL_TYPE_TABLESPACE))
space= UT_LIST_GET_NEXT(space_list, space);

if (space)
space->acquire();
if (space && !space->acquire())
goto next;
}

mutex_exit(&fil_system.mutex);
Expand Down Expand Up @@ -2330,51 +2335,49 @@ static void fil_crypt_rotation_list_fill()
space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose != FIL_TYPE_TABLESPACE
|| space->is_in_rotation_list
|| space->is_stopping()
|| UT_LIST_GET_LEN(space->chain) == 0) {
|| UT_LIST_GET_LEN(space->chain) == 0
|| !space->acquire()) {
continue;
}

/* Ensure that crypt_data has been initialized. */
if (!space->size) {
/* Protect the tablespace while we may
release fil_system.mutex. */
ut_d(space->acquire());
ut_d(const fil_space_t* s=)
fil_system.read_page0(space->id);
ut_ad(!s || s == space);
ut_d(space->release());
if (!space->size) {
/* Page 0 was not loaded.
Skip this tablespace. */
continue;
goto next;
}
}

/* Skip ENCRYPTION!=DEFAULT tablespaces. */
if (space->crypt_data
&& !space->crypt_data->is_default_encryption()) {
continue;
goto next;
}

if (srv_encrypt_tables) {
/* Skip encrypted tablespaces if
innodb_encrypt_tables!=OFF */
if (space->crypt_data
&& space->crypt_data->min_key_version) {
continue;
goto next;
}
} else {
/* Skip unencrypted tablespaces if
innodb_encrypt_tables=OFF */
if (!space->crypt_data
|| !space->crypt_data->min_key_version) {
continue;
goto next;
}
}

fil_system.rotation_list.push_back(*space);
space->is_in_rotation_list = true;
next:
space->release();
}
}

Expand Down
51 changes: 13 additions & 38 deletions storage/innobase/fil/fil0fil.cc
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,7 @@ fil_try_to_close_file_in_LRU(
static void fil_flush_low(fil_space_t* space, bool metadata = false)
{
ut_ad(mutex_own(&fil_system.mutex));
ut_ad(space);
ut_ad(!space->stop_new_ops);
ut_ad(!space->is_stopping());

if (fil_buffering_disabled(space)) {

Expand Down Expand Up @@ -1934,10 +1933,8 @@ fil_space_acquire_low(ulint id, bool silent)
ib::warn() << "Trying to access missing"
" tablespace " << id;
}
} else if (space->is_stopping()) {
} else if (!space->acquire()) {
space = NULL;
} else {
space->acquire();
}

mutex_exit(&fil_system.mutex);
Expand Down Expand Up @@ -2254,14 +2251,14 @@ fil_check_pending_ops(const fil_space_t* space, ulint count)
{
ut_ad(mutex_own(&fil_system.mutex));

if (space == NULL) {
if (!space) {
return 0;
}

if (ulint n_pending_ops = my_atomic_loadlint(&space->n_pending_ops)) {
if (uint32_t n_pending_ops = space->referenced()) {

/* Give a warning every 10 second, starting after 1 second */
if ((count % 500) == 50) {
/* Give a warning every 10 second, starting after 1 second */
if ((count % 500) == 50) {
ib::warn() << "Trying to close/delete/truncate"
" tablespace '" << space->name
<< "' but there are " << n_pending_ops
Expand Down Expand Up @@ -2347,14 +2344,13 @@ fil_check_pending_operations(
fil_space_t* sp = fil_space_get_by_id(id);

if (sp) {
sp->stop_new_ops = true;
if (sp->crypt_data) {
sp->acquire();
if (sp->crypt_data && sp->acquire()) {
mutex_exit(&fil_system.mutex);
fil_space_crypt_close_tablespace(sp);
mutex_enter(&fil_system.mutex);
sp->release();
}
sp->set_stopping(true);
}

/* Check for pending operations. */
Expand Down Expand Up @@ -2875,7 +2871,7 @@ fil_rename_tablespace(
multiple datafiles per tablespace. */
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
node = UT_LIST_GET_FIRST(space->chain);
space->n_pending_ops++;
ut_a(space->acquire());

mutex_exit(&fil_system.mutex);

Expand All @@ -2897,8 +2893,7 @@ fil_rename_tablespace(
/* log_sys.mutex is above fil_system.mutex in the latching order */
ut_ad(log_mutex_own());
mutex_enter(&fil_system.mutex);
ut_ad(space->n_pending_ops);
space->n_pending_ops--;
space->release();
ut_ad(space->name == old_space_name);
ut_ad(node->name == old_file_name);
bool success;
Expand Down Expand Up @@ -4204,7 +4199,7 @@ fil_io(
if (space == NULL
|| (req_type.is_read()
&& !sync
&& space->stop_new_ops
&& space->is_stopping()
&& !space->is_being_truncated)) {

mutex_exit(&fil_system.mutex);
Expand Down Expand Up @@ -4844,7 +4839,7 @@ fil_space_validate_for_mtr_commit(
fil_space_acquire() before mtr_start() and
fil_space_t::release() after mtr_commit(). This is why
n_pending_ops should not be zero if stop_new_ops is set. */
ut_ad(!space->stop_new_ops
ut_ad(!space->is_stopping()
|| space->is_being_truncated /* fil_truncate_prepare() */
|| space->referenced());
}
Expand Down Expand Up @@ -5070,7 +5065,7 @@ truncate_t::truncate(
err = DB_ERROR;
}

space->stop_new_ops = false;
space->set_stopping(false);

/* If we opened the file in this function, close it. */
if (!already_open) {
Expand Down Expand Up @@ -5171,26 +5166,6 @@ fil_space_get_block_size(const fil_space_t* space, unsigned offset)
return block_size;
}

/*******************************************************************//**
Returns the table space by a given id, NULL if not found. */
fil_space_t*
fil_space_found_by_id(
/*==================*/
ulint id) /*!< in: space id */
{
fil_space_t* space = NULL;
mutex_enter(&fil_system.mutex);
space = fil_space_get_by_id(id);

/* Not found if space is being deleted */
if (space && space->stop_new_ops) {
space = NULL;
}

mutex_exit(&fil_system.mutex);
return space;
}

/**
Get should we punch hole to tablespace.
@param[in] node File node
Expand Down
6 changes: 2 additions & 4 deletions storage/innobase/handler/i_s.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8579,8 +8579,7 @@ i_s_tablespaces_encryption_fill_table(
for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
space; space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose == FIL_TYPE_TABLESPACE
&& !space->is_stopping()) {
space->acquire();
&& space->acquire()) {
mutex_exit(&fil_system.mutex);
if (int err = i_s_dict_fill_tablespaces_encryption(
thd, space, tables->table)) {
Expand Down Expand Up @@ -8842,8 +8841,7 @@ i_s_tablespaces_scrubbing_fill_table(
for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
space; space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose == FIL_TYPE_TABLESPACE
&& !space->is_stopping()) {
space->acquire();
&& space->acquire()) {
mutex_exit(&fil_system.mutex);
if (int err = i_s_dict_fill_tablespaces_scrubbing(
thd, space, tables->table)) {
Expand Down
Loading

0 comments on commit a7dd7c8

Please sign in to comment.