From a7dd7c899356b2d3a7f79e6ebba5d854ed63ae9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 3 Sep 2020 14:49:11 +0300 Subject: [PATCH] MDEV-23651 InnoDB: Failing assertion: !space->referenced() commit de942c9f618b590a01a7960c171d7e50e435708f (MDEV-15983) introduced a race condition that we inadequately fixed in commit 93b69825adf5e55601ba44b4c89ec36ce62f8112 (MDEV-16169). Because fil_space_t::release() or fil_space_t::acquire() are not protected by fil_system.mutex like their predecessors, it is possible that stop_new_ops was set between the time a thread checked fil_space_t::is_stopping() and invoked fil_space_t::acquire(). In an execution trace, this happened in fil_system_t::keyrotate_next(), causing an assertion failure in fil_delete_tablespace() in the other thread that seeked to stop new operations. We fix this bug by merging the flag fil_space_t::stop_new_ops and the reference count fil_space_t::n_pending_ops into a single word that is only being accessed by atomic memory operations. fil_space_t::set_stopping(): Accessor for changing the state of the former stop_new_ops flag. fil_space_t::acquire(): Return whether the acquisition succeeded. It would fail between set_stopping(true) and set_stopping(false). --- storage/innobase/fil/fil0crypt.cc | 39 ++++---- storage/innobase/fil/fil0fil.cc | 51 +++------- storage/innobase/handler/i_s.cc | 6 +- storage/innobase/include/fil0fil.h | 144 ++++++++++++++++++----------- storage/innobase/trx/trx0purge.cc | 6 +- 5 files changed, 128 insertions(+), 118 deletions(-) diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 4f80b7e409f73..c9663837fa65f 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -1418,12 +1418,15 @@ inline fil_space_t *fil_system_t::keyrotate_next(fil_space_t *space, } } - if (it == end) - return NULL; + while (it != end) + { + space= &*it; + if (space->acquire()) + return space; + while (++it != end && (!UT_LIST_GET_LEN(it->chain) || it->is_stopping())); + } - space= &*it; - space->acquire(); - return space; + return NULL; } /** Return the next tablespace. @@ -1445,12 +1448,14 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck, space= UT_LIST_GET_FIRST(fil_system.space_list); /* We can trust that space is not NULL because at least the system tablespace is always present and loaded first. */ - space->acquire(); + if (!space->acquire()) + goto next; } else { /* Move on to the next fil_space_t */ space->release(); +next: space= UT_LIST_GET_NEXT(space_list, space); /* Skip abnormal tablespaces or those that are being created by @@ -1460,8 +1465,8 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck, space->is_stopping() || space->purpose != FIL_TYPE_TABLESPACE)) space= UT_LIST_GET_NEXT(space_list, space); - if (space) - space->acquire(); + if (space && !space->acquire()) + goto next; } mutex_exit(&fil_system.mutex); @@ -2330,31 +2335,27 @@ static void fil_crypt_rotation_list_fill() space = UT_LIST_GET_NEXT(space_list, space)) { if (space->purpose != FIL_TYPE_TABLESPACE || space->is_in_rotation_list - || space->is_stopping() - || UT_LIST_GET_LEN(space->chain) == 0) { + || UT_LIST_GET_LEN(space->chain) == 0 + || !space->acquire()) { continue; } /* Ensure that crypt_data has been initialized. */ if (!space->size) { - /* Protect the tablespace while we may - release fil_system.mutex. */ - ut_d(space->acquire()); ut_d(const fil_space_t* s=) fil_system.read_page0(space->id); ut_ad(!s || s == space); - ut_d(space->release()); if (!space->size) { /* Page 0 was not loaded. Skip this tablespace. */ - continue; + goto next; } } /* Skip ENCRYPTION!=DEFAULT tablespaces. */ if (space->crypt_data && !space->crypt_data->is_default_encryption()) { - continue; + goto next; } if (srv_encrypt_tables) { @@ -2362,19 +2363,21 @@ static void fil_crypt_rotation_list_fill() innodb_encrypt_tables!=OFF */ if (space->crypt_data && space->crypt_data->min_key_version) { - continue; + goto next; } } else { /* Skip unencrypted tablespaces if innodb_encrypt_tables=OFF */ if (!space->crypt_data || !space->crypt_data->min_key_version) { - continue; + goto next; } } fil_system.rotation_list.push_back(*space); space->is_in_rotation_list = true; +next: + space->release(); } } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 73208d4dddc79..f354f7905bb65 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -763,8 +763,7 @@ fil_try_to_close_file_in_LRU( static void fil_flush_low(fil_space_t* space, bool metadata = false) { ut_ad(mutex_own(&fil_system.mutex)); - ut_ad(space); - ut_ad(!space->stop_new_ops); + ut_ad(!space->is_stopping()); if (fil_buffering_disabled(space)) { @@ -1934,10 +1933,8 @@ fil_space_acquire_low(ulint id, bool silent) ib::warn() << "Trying to access missing" " tablespace " << id; } - } else if (space->is_stopping()) { + } else if (!space->acquire()) { space = NULL; - } else { - space->acquire(); } mutex_exit(&fil_system.mutex); @@ -2254,14 +2251,14 @@ fil_check_pending_ops(const fil_space_t* space, ulint count) { ut_ad(mutex_own(&fil_system.mutex)); - if (space == NULL) { + if (!space) { return 0; } - if (ulint n_pending_ops = my_atomic_loadlint(&space->n_pending_ops)) { + if (uint32_t n_pending_ops = space->referenced()) { - /* Give a warning every 10 second, starting after 1 second */ - if ((count % 500) == 50) { + /* Give a warning every 10 second, starting after 1 second */ + if ((count % 500) == 50) { ib::warn() << "Trying to close/delete/truncate" " tablespace '" << space->name << "' but there are " << n_pending_ops @@ -2347,14 +2344,13 @@ fil_check_pending_operations( fil_space_t* sp = fil_space_get_by_id(id); if (sp) { - sp->stop_new_ops = true; - if (sp->crypt_data) { - sp->acquire(); + if (sp->crypt_data && sp->acquire()) { mutex_exit(&fil_system.mutex); fil_space_crypt_close_tablespace(sp); mutex_enter(&fil_system.mutex); sp->release(); } + sp->set_stopping(true); } /* Check for pending operations. */ @@ -2875,7 +2871,7 @@ fil_rename_tablespace( multiple datafiles per tablespace. */ ut_a(UT_LIST_GET_LEN(space->chain) == 1); node = UT_LIST_GET_FIRST(space->chain); - space->n_pending_ops++; + ut_a(space->acquire()); mutex_exit(&fil_system.mutex); @@ -2897,8 +2893,7 @@ fil_rename_tablespace( /* log_sys.mutex is above fil_system.mutex in the latching order */ ut_ad(log_mutex_own()); mutex_enter(&fil_system.mutex); - ut_ad(space->n_pending_ops); - space->n_pending_ops--; + space->release(); ut_ad(space->name == old_space_name); ut_ad(node->name == old_file_name); bool success; @@ -4204,7 +4199,7 @@ fil_io( if (space == NULL || (req_type.is_read() && !sync - && space->stop_new_ops + && space->is_stopping() && !space->is_being_truncated)) { mutex_exit(&fil_system.mutex); @@ -4844,7 +4839,7 @@ fil_space_validate_for_mtr_commit( fil_space_acquire() before mtr_start() and fil_space_t::release() after mtr_commit(). This is why n_pending_ops should not be zero if stop_new_ops is set. */ - ut_ad(!space->stop_new_ops + ut_ad(!space->is_stopping() || space->is_being_truncated /* fil_truncate_prepare() */ || space->referenced()); } @@ -5070,7 +5065,7 @@ truncate_t::truncate( err = DB_ERROR; } - space->stop_new_ops = false; + space->set_stopping(false); /* If we opened the file in this function, close it. */ if (!already_open) { @@ -5171,26 +5166,6 @@ fil_space_get_block_size(const fil_space_t* space, unsigned offset) return block_size; } -/*******************************************************************//** -Returns the table space by a given id, NULL if not found. */ -fil_space_t* -fil_space_found_by_id( -/*==================*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space = NULL; - mutex_enter(&fil_system.mutex); - space = fil_space_get_by_id(id); - - /* Not found if space is being deleted */ - if (space && space->stop_new_ops) { - space = NULL; - } - - mutex_exit(&fil_system.mutex); - return space; -} - /** Get should we punch hole to tablespace. @param[in] node File node diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 9da81f96e8800..d3e951ce5a050 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -8579,8 +8579,7 @@ i_s_tablespaces_encryption_fill_table( for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list); space; space = UT_LIST_GET_NEXT(space_list, space)) { if (space->purpose == FIL_TYPE_TABLESPACE - && !space->is_stopping()) { - space->acquire(); + && space->acquire()) { mutex_exit(&fil_system.mutex); if (int err = i_s_dict_fill_tablespaces_encryption( thd, space, tables->table)) { @@ -8842,8 +8841,7 @@ i_s_tablespaces_scrubbing_fill_table( for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list); space; space = UT_LIST_GET_NEXT(space_list, space)) { if (space->purpose == FIL_TYPE_TABLESPACE - && !space->is_stopping()) { - space->acquire(); + && space->acquire()) { mutex_exit(&fil_system.mutex); if (int err = i_s_dict_fill_tablespaces_scrubbing( thd, space, tables->table)) { diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 0a1d78230c750..898caf1a81373 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -93,18 +93,6 @@ struct fil_space_t : ilist_node, /** Log sequence number of the latest MLOG_INDEX_LOAD record that was found while parsing the redo log */ lsn_t enable_lsn; - bool stop_new_ops; - /*!< we set this true when we start - deleting a single-table tablespace. - When this is set following new ops - are not allowed: - * read IO request - * ibuf merge - * file flush - Note that we can still possibly have - new write operations because we don't - check this flag when doing flush - batches. */ /** whether undo tablespace truncation is in progress */ bool is_being_truncated; #ifdef UNIV_DEBUG @@ -142,14 +130,24 @@ struct fil_space_t : ilist_node, ulint n_pending_flushes; /*!< this is positive when flushing the tablespace to disk; dropping of the tablespace is forbidden if this is positive */ - /** Number of pending buffer pool operations accessing the tablespace - without holding a table lock or dict_operation_lock S-latch - that would prevent the table (and tablespace) from being - dropped. An example is change buffer merge. - The tablespace cannot be dropped while this is nonzero, - or while fil_node_t::n_pending is nonzero. - Protected by fil_system.mutex and my_atomic_loadlint() and friends. */ - ulint n_pending_ops; +private: + /** Number of pending buffer pool operations accessing the + tablespace without holding a table lock or dict_operation_lock + S-latch that would prevent the table (and tablespace) from being + dropped. An example is change buffer merge. + + The tablespace cannot be dropped while this is nonzero, or while + fil_node_t::n_pending is nonzero. + + The most significant bit contains the STOP_NEW_OPS flag. + + Protected by my_atomic. */ + uint32_t n_pending_ops; + + /** Flag in n_pending_ops that indicates that the tablespace is being + deleted, and no further operations should be performed */ + static const uint32_t STOP_NEW_OPS= 1U << 31; +public: /** Number of pending block read or write operations (when a write is imminent or a read has recently completed). The tablespace object cannot be freed while this is nonzero, @@ -183,9 +181,6 @@ struct fil_space_t : ilist_node, ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ - /** @return whether the tablespace is about to be dropped */ - bool is_stopping() const { return stop_new_ops; } - /** Clamp a page number for batched I/O, such as read-ahead. @param offset page number limit @return offset clamped to the tablespace size */ @@ -267,39 +262,78 @@ struct fil_space_t : ilist_node, /** Close each file. Only invoked on fil_system.temp_space. */ void close(); - /** Acquire a tablespace reference. */ - void acquire() { my_atomic_addlint(&n_pending_ops, 1); } - /** Release a tablespace reference. - @return whether this was the last reference */ - bool release() - { - ulint n = my_atomic_addlint(&n_pending_ops, ulint(-1)); - ut_ad(n); - return n == 1; - } - /** @return whether references are being held */ - bool referenced() { return my_atomic_loadlint(&n_pending_ops); } - /** @return whether references are being held */ - bool referenced() const - { - return const_cast(this)->referenced(); - } + /** @return whether the tablespace is about to be dropped or is referenced */ + uint32_t is_stopping_or_referenced() + { + return my_atomic_load32(&n_pending_ops); + } - /** Acquire a tablespace reference for I/O. */ - void acquire_for_io() { my_atomic_addlint(&n_pending_ios, 1); } - /** Release a tablespace reference for I/O. */ - void release_for_io() - { - ut_ad(pending_io()); - my_atomic_addlint(&n_pending_ios, ulint(-1)); - } - /** @return whether I/O is pending */ - bool pending_io() { return my_atomic_loadlint(&n_pending_ios); } - /** @return whether I/O is pending */ - bool pending_io() const - { - return const_cast(this)->pending_io(); - } + /** @return whether the tablespace is about to be dropped or is referenced */ + uint32_t is_stopping_or_referenced() const + { + return const_cast(this)->is_stopping_or_referenced(); + } + + /** @return whether the tablespace is about to be dropped */ + bool is_stopping() const + { + return is_stopping_or_referenced() & STOP_NEW_OPS; + } + + /** @return number of references being held */ + uint32_t referenced() const + { + return is_stopping_or_referenced() & ~STOP_NEW_OPS; + } + + /** Note that operations on the tablespace must stop or can resume */ + void set_stopping(bool stopping) + { + /* Note: starting with 10.4 this should be std::atomic::fetch_xor() */ + uint32_t n= stopping ? 0 : STOP_NEW_OPS; + while (!my_atomic_cas32_strong_explicit(&n_pending_ops, &n, + n ^ STOP_NEW_OPS, + MY_MEMORY_ORDER_ACQUIRE, + MY_MEMORY_ORDER_RELAXED)) + ut_ad(!(n & STOP_NEW_OPS) == stopping); + } + + MY_ATTRIBUTE((warn_unused_result)) + /** @return whether a tablespace reference was successfully acquired */ + bool acquire() + { + uint32_t n= 0; + while (!my_atomic_cas32_strong_explicit(&n_pending_ops, &n, n + 1, + MY_MEMORY_ORDER_ACQUIRE, + MY_MEMORY_ORDER_RELAXED)) + if (UNIV_UNLIKELY(n & STOP_NEW_OPS)) + return false; + return true; + } + /** Release a tablespace reference. + @return whether this was the last reference */ + bool release() + { + uint32_t n= my_atomic_add32(&n_pending_ops, uint32_t(-1)); + ut_ad(n & ~STOP_NEW_OPS); + return (n & ~STOP_NEW_OPS) == 1; + } + + /** Acquire a tablespace reference for I/O. */ + void acquire_for_io() { my_atomic_addlint(&n_pending_ios, 1); } + /** Release a tablespace reference for I/O. */ + void release_for_io() + { + ut_d(ulint n=) my_atomic_addlint(&n_pending_ios, ulint(-1)); + ut_ad(n); + } + /** @return whether I/O is pending */ + bool pending_io() { return my_atomic_loadlint(&n_pending_ios); } + /** @return whether I/O is pending */ + bool pending_io() const + { + return const_cast(this)->pending_io(); + } }; /** Value of fil_space_t::magic_n */ diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 279ba21d4db2d..02a524d6850fd 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -1030,13 +1030,13 @@ trx_purge_initiate_truncate( /* This is only executed by the srv_purge_coordinator_thread. */ export_vars.innodb_undo_truncations++; - /* TODO: PUNCH_HOLE the garbage (with write-ahead logging) */ + /* In MDEV-8319 (10.5) we will PUNCH_HOLE the garbage + (with write-ahead logging). */ mutex_enter(&fil_system.mutex); - ut_ad(space->stop_new_ops); ut_ad(space->is_being_truncated); - space->stop_new_ops = false; space->is_being_truncated = false; + space->set_stopping(false); mutex_exit(&fil_system.mutex); if (purge_sys.rseg != NULL