From 364970fb2bf1782a31113d3c8543b4bfa3eabf06 Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Mon, 27 Apr 2026 21:27:34 +0530 Subject: [PATCH 1/2] MDEV-39261 MariaDB crash on startup in presence of indexed virtual columns Problem: ======== A single InnoDB purge worker thread can process undo logs from different tables within the same batch. But get_purge_table(), open_purge_table() incorrectly assumes that a 1:1 relationship between a purge worker thread and a table within a single batch. Based on this wrong assumtion, InnoDB attempts to reuse TABLE objects cached in thd->open_tables for virtual column computation. 1) Purge worker opens Table A and caches the TABLE pointer in thd->open_tables. 2) Same purge worker moves to Table B in the same batch, get_purge_table() retrieves the cached pointer for Table A instead of opening Table B. 3) Because innobase::open() is ignored for Table B, the virtual column template is never initialized. 4) virtual column computation for Table B aborts the server Solution: ======== - Introduced purge_table class which has the following purge_table: Stores either TABLE* (for tables with indexed virtual columns) or MDL_ticket* (for tables without) in a single union using LSB as a flag. For tables with indexed virtual columns: opens TABLE*, accesses MDL_ticket* via TABLE->mdl_ticket For tables without indexed virtual columns: stores only MDL_ticket*. trx_purge_attach_undo_recs(): Coordinator opens both dict_table_t* and TABLE* with proper MDL protection. Workers access cached table pointers from purge_node_t->tables without opening their own handles purge_sys.coordinator_thd: Distinguish coordinator from workers in cleanup logic. Skip innobase_reset_background_thd() for coordinator thread to prevent premature table closure during batch processing. Workers still call cleanup to release their thread-local resources trx_purge_close_tables(): Rewrite for purge coordinator thread 1) Close all dict_table_t* objects first 2) Call close_thread_tables() once for all TABLE* objects 3) Release MDL tickets last, after tables are closed Added table->lock_mutex protection when reading (or) writing vc_templ->mysql_table and mysql_table_query_id. Clear cached TABLE* pointers before closing tables to prevent stale pointer access Declared open_purge_table() and close_thread_tables() in trx0purge.cc Declared reset_thd() in row0purge.cc and dict0stats_bg.cc. Removed innobase_reset_background_thd() --- .../suite/gcol/r/innodb_virtual_basic.result | 14 +- .../suite/gcol/t/innodb_virtual_basic.test | 5 + mysql-test/suite/vcol/r/races.result | 14 ++ mysql-test/suite/vcol/t/races.test | 56 +++++ sql/sql_base.cc | 2 +- sql/sql_base.h | 2 +- sql/sql_class.cc | 14 +- storage/innobase/dict/dict0dict.cc | 4 +- storage/innobase/dict/dict0stats_bg.cc | 3 +- storage/innobase/handler/ha_innodb.cc | 57 ++--- storage/innobase/include/dict0mem.h | 11 + storage/innobase/include/ha_prototypes.h | 5 - storage/innobase/include/row0purge.h | 51 +++- storage/innobase/include/row0vers.h | 10 +- storage/innobase/include/trx0purge.h | 27 ++- storage/innobase/row/row0purge.cc | 76 +++--- storage/innobase/row/row0vers.cc | 9 +- storage/innobase/trx/trx0purge.cc | 221 ++++++++++++------ storage/innobase/trx/trx0trx.cc | 2 +- 19 files changed, 398 insertions(+), 185 deletions(-) diff --git a/mysql-test/suite/gcol/r/innodb_virtual_basic.result b/mysql-test/suite/gcol/r/innodb_virtual_basic.result index 35534d68e632a..2e3de6045bf27 100644 --- a/mysql-test/suite/gcol/r/innodb_virtual_basic.result +++ b/mysql-test/suite/gcol/r/innodb_virtual_basic.result @@ -48,9 +48,9 @@ INSERT INTO t VALUES (1290, 212, DEFAULT, "xmx"); ROLLBACK; SELECT c FROM t; c -NULL 13 29 +NULL SELECT * FROM t; a b c h 10 3 13 mm @@ -303,23 +303,23 @@ END| CALL UPDATE_t(); SELECT c FROM t; c -NULL 19 -29 2103 +29 +NULL CALL DELETE_insert_t(); SELECT c FROM t; c -NULL 19 -29 2103 +29 +NULL DROP INDEX idx ON t; CALL UPDATE_t(); SELECT c FROM t; c -2103 19 +2103 29 NULL DROP PROCEDURE DELETE_insert_t; @@ -523,10 +523,10 @@ UPDATE t SET h = "e" WHERE h="a"; ROLLBACK; SELECT a, c, h FROM t; a c h -NULL NULL d 11 14 a 18 19 b 28 29 c +NULL NULL d DROP TABLE t; CREATE TABLE `t1` ( `col1` int(11) NOT NULL, diff --git a/mysql-test/suite/gcol/t/innodb_virtual_basic.test b/mysql-test/suite/gcol/t/innodb_virtual_basic.test index dd0409a373c27..280b0100bb37c 100644 --- a/mysql-test/suite/gcol/t/innodb_virtual_basic.test +++ b/mysql-test/suite/gcol/t/innodb_virtual_basic.test @@ -42,6 +42,7 @@ INSERT INTO t VALUES (128, 22, DEFAULT, "xx"); INSERT INTO t VALUES (1290, 212, DEFAULT, "xmx"); ROLLBACK; +--sorted_result SELECT c FROM t; SELECT * FROM t; @@ -356,13 +357,16 @@ END| delimiter ;| CALL UPDATE_t(); +--sorted_result SELECT c FROM t; CALL DELETE_insert_t(); +--sorted_result SELECT c FROM t; DROP INDEX idx ON t; CALL UPDATE_t(); +--sorted_result SELECT c FROM t; DROP PROCEDURE DELETE_insert_t; @@ -537,6 +541,7 @@ START TRANSACTION; UPDATE t SET m =10 WHERE m = 1; UPDATE t SET h = "e" WHERE h="a"; ROLLBACK; +--sorted_result SELECT a, c, h FROM t; DROP TABLE t; diff --git a/mysql-test/suite/vcol/r/races.result b/mysql-test/suite/vcol/r/races.result index c46ed5ba2ef53..c93c8b01e2eeb 100644 --- a/mysql-test/suite/vcol/r/races.result +++ b/mysql-test/suite/vcol/r/races.result @@ -14,3 +14,17 @@ disconnect con1; connection default; drop table t1; set debug_sync='reset'; +# +# MDEV-39261 MariaDB crash on startup in presence of +# indexed virtual columns +# +# Create 33 tables with virtual index +InnoDB 0 transactions not purged +connect purge_control,localhost,root; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +connection default; +# Do update on all 33 tables +# restart: --innodb_purge_threads=1 --debug_dbug=d,ib_purge_virtual_index_callback +InnoDB 0 transactions not purged +# Drop all 33 tables +# restart diff --git a/mysql-test/suite/vcol/t/races.test b/mysql-test/suite/vcol/t/races.test index 1bf4e43dec919..b6b42b1771da9 100644 --- a/mysql-test/suite/vcol/t/races.test +++ b/mysql-test/suite/vcol/t/races.test @@ -20,3 +20,59 @@ disconnect con1; connection default; drop table t1; set debug_sync='reset'; + +--echo # +--echo # MDEV-39261 MariaDB crash on startup in presence of +--echo # indexed virtual columns +--echo # +# To make purge thread to work on multiple tables on the same batch, +# we need 33 tables because there are 32 pre-existing purge_node exists. + +--echo # Create 33 tables with virtual index +--disable_query_log +let $i = 33; +while ($i) +{ + eval CREATE TABLE t$i( + a INT PRIMARY KEY, + b INT DEFAULT 1, INDEX(b), + c INT GENERATED ALWAYS AS (a + b) VIRTUAL, + INDEX(c) + ) ENGINE=InnoDB; + eval INSERT INTO t$i(a) VALUES(1); + dec $i; +} +--enable_query_log +--source ../../innodb/include/wait_all_purged.inc +--connect purge_control,localhost,root +START TRANSACTION WITH CONSISTENT SNAPSHOT; + +--connection default +--echo # Do update on all 33 tables +--disable_query_log +let $i = 33; +while ($i) +{ + eval UPDATE t$i SET b = 11 WHERE a = 1; + dec $i; +} +--enable_query_log + +let $shutdown_timeout=0; +let $restart_parameters=--innodb_purge_threads=1 --debug_dbug=d,ib_purge_virtual_index_callback; +--source include/restart_mysqld.inc +--source ../../innodb/include/wait_all_purged.inc + +--echo # Drop all 33 tables +--disable_query_log +let $i = 33; +while ($i) +{ + eval DROP TABLE t$i; + dec $i; +} +--enable_query_log + +let $restart_parameters=; +let $shutdown_timeout=; +--source include/restart_mysqld.inc diff --git a/sql/sql_base.cc b/sql/sql_base.cc index e3f900e370dcd..ed442ffe7ab3f 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -832,7 +832,7 @@ int close_thread_tables_for_query(THD *thd) leave prelocked mode if needed. */ -int close_thread_tables(THD *thd) +int close_thread_tables(THD *thd) noexcept { TABLE *table; int error= 0; diff --git a/sql/sql_base.h b/sql/sql_base.h index 5c75a3f8ca388..f05a94afe719e 100644 --- a/sql/sql_base.h +++ b/sql/sql_base.h @@ -164,7 +164,7 @@ TABLE_LIST *find_table_in_list(TABLE_LIST *table, TABLE_LIST *TABLE_LIST::*link, const LEX_CSTRING *db_name, const LEX_CSTRING *table_name); -int close_thread_tables(THD *thd); +int close_thread_tables(THD *thd) noexcept; int close_thread_tables_for_query(THD *thd); void switch_to_nullable_trigger_fields(List &items, TABLE *); void switch_defaults_to_nullable_trigger_fields(TABLE *table); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 73d41382e8549..39a58a8f3365e 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5023,10 +5023,10 @@ extern "C" const char *thd_priv_user(MYSQL_THD thd, size_t *length) have only one table open at any given time. */ TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, - const char *tb, size_t tblen) + const char *tb, size_t tblen, + MDL_ticket *mdl_ticket) noexcept { DBUG_ENTER("open_purge_table"); - DBUG_ASSERT(thd->open_tables == NULL); DBUG_ASSERT(thd->locked_tables_mode < LTM_PRELOCKED); /* Purge already hold the MDL for the table */ @@ -5037,6 +5037,7 @@ TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, tl->init_one_table(&db_name, &table_name, 0, TL_READ); tl->i_s_requested_object= OPEN_TABLE_ONLY; + tl->mdl_request.ticket= mdl_ticket; bool error= open_table(thd, tl, &ot_ctx); @@ -5049,11 +5050,9 @@ TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, DBUG_RETURN(error ? NULL : tl->table); } -TABLE *get_purge_table(THD *thd) +MDL_ticket *get_mdl_ticket(TABLE *table) { - /* see above, at most one table can be opened */ - DBUG_ASSERT(thd->open_tables == NULL || thd->open_tables->next == NULL); - return thd->open_tables; + return table->mdl_ticket; } /** Find an open table in the list of prelocked tabled @@ -5216,10 +5215,13 @@ void destroy_background_thd(MYSQL_THD thd) void reset_thd(MYSQL_THD thd) { + const char *proc_info= thd->proc_info; + thd->proc_info="reset"; close_thread_tables(thd); thd->release_transactional_locks(); thd->free_items(); free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC)); + thd->proc_info= proc_info; } /** diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 76b5b86fea646..194f6cd18d5f2 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -567,13 +567,13 @@ bool dict_table_t::parse_name(char (&db_name)[NAME_LEN + 1], dict_sys.unfreeze(); *db_name_len= filename_to_tablename(db_buf, db_name, - MAX_DATABASE_NAME_LEN + 1, true); + NAME_LEN + 1, true); if (is_temp) return false; *tbl_name_len= filename_to_tablename(tbl_buf, tbl_name, - MAX_TABLE_NAME_LEN + 1, true); + NAME_LEN + 1, true); return true; } diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc index c5dca24fcfca3..47789529303d6 100644 --- a/storage/innobase/dict/dict0stats_bg.cc +++ b/storage/innobase/dict/dict0stats_bg.cc @@ -71,6 +71,7 @@ static bool stats_initialised; static THD *dict_stats_thd; +void reset_thd(MYSQL_THD thd); /*****************************************************************//** Free the resources occupied by the recalc pool, called once during thread de-initialization. */ @@ -391,7 +392,7 @@ static void dict_stats_func(void*) while (dict_stats_process_entry_from_recalc_pool(dict_stats_thd)) {} dict_defrag_process_entries_from_defrag_pool(dict_stats_thd); - innobase_reset_background_thd(dict_stats_thd); + reset_thd(dict_stats_thd); set_current_thd(nullptr); if (!is_recalc_pool_empty()) dict_stats_schedule(MIN_RECALC_INTERVAL * 1000); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 6410f7d5966e6..40be0c838ff56 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -137,10 +137,11 @@ TABLE *find_fk_open_table(THD *thd, const char *db, size_t db_len, const char *table, size_t table_len); MYSQL_THD create_background_thd(); void reset_thd(MYSQL_THD thd); -TABLE *get_purge_table(THD *thd); TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, - const char *tb, size_t tblen); -void close_thread_tables(THD* thd); + const char *tb, size_t tblen, + MDL_ticket *mdl_ticket) noexcept; +int close_thread_tables(THD* thd) noexcept; +MDL_ticket *get_mdl_ticket(TABLE *table) noexcept; #ifdef MYSQL_DYNAMIC_PLUGIN #define tc_size 400 @@ -1711,25 +1712,6 @@ MYSQL_THD innobase_create_background_thd(const char* name) return thd; } - -/** Close opened tables, free memory, delete items for a MYSQL_THD. -@param[in] thd MYSQL_THD to reset */ -void -innobase_reset_background_thd(MYSQL_THD thd) -{ - if (!thd) { - thd = current_thd; - } - - ut_ad(thd); - ut_ad(THDVAR(thd, background_thread)); - - /* background purge thread */ - const char *proc_info= thd_proc_info(thd, "reset"); - reset_thd(thd); - thd_proc_info(thd, proc_info); -} - /******************************************************************//** Returns the NUL terminated value of glob_hostname. @return pointer to glob_hostname. */ @@ -8484,7 +8466,7 @@ ATTRIBUTE_COLD bool wsrep_append_table_key(MYSQL_THD thd, { char db_buf[NAME_LEN + 1]; char tbl_buf[NAME_LEN + 1]; - ulint db_buf_len, tbl_buf_len; + size_t db_buf_len, tbl_buf_len; if (!table.parse_name(db_buf, tbl_buf, &db_buf_len, &tbl_buf_len)) { @@ -20334,37 +20316,28 @@ ha_innobase::multi_range_read_explain_info( for purge thread */ static TABLE* innodb_find_table_for_vc(THD* thd, dict_table_t* table) { - TABLE *mysql_table; - const bool bg_thread = THDVAR(thd, background_thread); - - if (bg_thread) { - if ((mysql_table = get_purge_table(thd))) { - return mysql_table; - } - } else { - if (table->vc_templ->mysql_table_query_id - == thd_get_query_id(thd)) { - return table->vc_templ->mysql_table; - } + table->lock_mutex_lock(); + TABLE *maria_table = table->vc_templ->mysql_table; + const uint64_t cached_id = table->vc_templ->mysql_table_query_id; + table->lock_mutex_unlock(); + if (cached_id == thd_get_query_id(thd)) { + return maria_table; } + TABLE *mysql_table; char db_buf[NAME_LEN + 1]; char tbl_buf[NAME_LEN + 1]; - ulint db_buf_len, tbl_buf_len; + size_t db_buf_len, tbl_buf_len; if (!table->parse_name(db_buf, tbl_buf, &db_buf_len, &tbl_buf_len)) { return NULL; } - - if (bg_thread) { - return open_purge_table(thd, db_buf, db_buf_len, - tbl_buf, tbl_buf_len); - } - mysql_table = find_fk_open_table(thd, db_buf, db_buf_len, tbl_buf, tbl_buf_len); + table->lock_mutex_lock(); table->vc_templ->mysql_table = mysql_table; table->vc_templ->mysql_table_query_id = thd_get_query_id(thd); + table->lock_mutex_unlock(); return mysql_table; } diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 91d1d7e21fe40..463a85e1fc9d7 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -2580,6 +2580,17 @@ struct dict_table_t { return true; return false; } + + /** @return whether the table has any indexed virtual column */ + bool has_virtual_index() const noexcept + { + if (UNIV_UNLIKELY(n_v_cols != 0)) + for (dict_index_t *index = indexes.start; + index; index = UT_LIST_GET_NEXT(indexes, index)) + if (index->has_virtual()) + return true; + return false; + } }; inline void dict_index_t::set_modified(mtr_t& mtr) const diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index 26a498e749c13..c6276b86585fb 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -393,11 +393,6 @@ MYSQL_THD innobase_create_background_thd(const char* name); @param[in] thd MYSQL_THD to destroy */ void destroy_background_thd(MYSQL_THD thd); -/** Close opened tables, free memory, delete items for a MYSQL_THD. -@param[in] thd MYSQL_THD to reset */ -void -innobase_reset_background_thd(MYSQL_THD); - /** Open a table based on a database and table name. @param db schema name @param name table name within the schema diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h index baa7777e6c81e..70214977d535b 100644 --- a/storage/innobase/include/row0purge.h +++ b/storage/innobase/include/row0purge.h @@ -67,6 +67,48 @@ row_purge_step( que_thr_t* thr) /*!< in: query thread */ MY_ATTRIBUTE((nonnull, warn_unused_result)); +/** Table context for purge operations. Uses pointer to store +either TABLE* or MDL_ticket* in a single union +For tables WITH indexed virtual columns: +- Opens TABLE* via open_purge_table() +- Stores TABLE* with LSB=1 flag in mariadb_table +- MDL_ticket* is accessed via TABLE->mdl_ticket +- get_ticket() returns TABLE->mdl_ticket + +For tables WITHOUT indexed virtual columns: +- Only acquires MDL_ticket* (no TABLE* needed) +- Stores MDL_ticket* with LSB=0 in the union +- get_ticket() returns the stored ticket directly */ +class purge_table +{ + union + { + /** TABLE* with the least signficant bit set */ + uintptr_t mariadb_table; + /** metadata lock when !get_mariadb_table() */ + MDL_ticket *ticket; + }; +public: + dict_table_t *table; + + purge_table() : ticket(nullptr), table(nullptr) {} + + inline TABLE *get_maria_table() const noexcept + { + return UNIV_UNLIKELY(mariadb_table & 1) + ? reinterpret_cast(mariadb_table & ~uintptr_t{1}) + : nullptr; + } + + /** @return whether we must wait for MDL on the table */ + bool must_wait() const noexcept + { return table == reinterpret_cast(-1); } + + inline MDL_ticket *get_ticket() const noexcept; + inline void set_mariadb_table(TABLE *t) noexcept; + inline void set_ticket(MDL_ticket *t) noexcept; +}; + /** Purge worker context */ struct purge_node_t { @@ -93,8 +135,8 @@ struct purge_node_t /** whether the operation is in progress */ bool in_progress= false; #endif - /** table where purge is done */ - dict_table_t *table= nullptr; + /** purge table handle */ + purge_table pt; /** update vector for a clustered index record */ upd_t *update; /** row reference to the next row to handle, or nullptr */ @@ -111,8 +153,9 @@ struct purge_node_t /** Undo recs to purge */ std::queue undo_recs; - /** map of table identifiers to table handles and meta-data locks */ - std::unordered_map> tables; + /** map of table identifiers to table handles and TABLE* object, + which is set by purge co-ordinator thread */ + std::unordered_map tables; /** Constructor */ explicit purge_node_t(que_thr_t *parent) : diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h index 2ddffa41af195..e4284bbd10026 100644 --- a/storage/innobase/include/row0vers.h +++ b/storage/innobase/include/row0vers.h @@ -65,13 +65,15 @@ bool dtuple_vcol_data_missing(const dtuple_t &tuple, @param[in,out] row the cluster index row in dtuple form @param[in] clust_index clustered index @param[in] index the secondary index -@param[in] heap heap used to build virtual dtuple. */ +@param[in] heap heap used to build virtual dtuple. +@param[in] maria_table MariaDB table object */ bool row_vers_build_clust_v_col( dtuple_t* row, dict_index_t* clust_index, dict_index_t* index, - mem_heap_t* heap); + mem_heap_t* heap, + TABLE* maria_table= nullptr); /** Build a dtuple contains virtual column data for current cluster index @param[in] rec cluster index rec @param[in] clust_index cluster index @@ -83,6 +85,7 @@ row_vers_build_clust_v_col( @param[in,out] heap heap memory @param[in,out] v_heap heap memory to keep virtual column tuple @param[in,out] mtr mini-transaction +@param[in] maria_table MariaDB table object @return dtuple contains virtual column data */ dtuple_t* row_vers_build_cur_vrow( @@ -94,7 +97,8 @@ row_vers_build_cur_vrow( roll_ptr_t roll_ptr, mem_heap_t* heap, mem_heap_t* v_heap, - mtr_t* mtr); + mtr_t* mtr, + TABLE* maria_table= nullptr); /*****************************************************************//** Constructs the version of a clustered index record which a consistent diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h index 19062268762c4..28aa3c830d879 100644 --- a/storage/innobase/include/trx0purge.h +++ b/storage/innobase/include/trx0purge.h @@ -29,6 +29,7 @@ Created 3/26/1996 Heikki Tuuri #include "trx0sys.h" #include "que0types.h" #include "srw_lock.h" +#include "row0purge.h" #include #include @@ -230,6 +231,15 @@ class purge_sys_t trx_rseg_t* rseg; /*!< Rollback segment for the next undo record to purge */ private: + /** Coordinator thread's THD during batch processing. + The coordinator thread sets this at the start of trx_purge() + and clears it at the end. This is being used in + purge_node_t::end() to determine whether InnoDB should + call reset_thd(). The coordinator skips this call + because it manages table cleanup centrally in + trx_purge() after all workers complete. */ + THD* coordinator_thd= nullptr; + uint32_t page_no; /*!< Page number for the next undo record to purge, page number of the log header, if dummy record */ @@ -316,8 +326,12 @@ class purge_sys_t /** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */ void resume(); - /** Close and reopen all tables in case of a MDL conflict with DDL */ - dict_table_t *close_and_reopen(table_id_t id, THD *thd, MDL_ticket **mdl); + /** Close and reopen all tables in case of a MDL conflict with DDL + @param id table identifier that triggered reopen + @param thd coordinator thread + @return purge_table for the reopened table, or empty on error */ + purge_table close_and_reopen(table_id_t id, THD *thd) noexcept; + private: /** Suspend purge during a DDL operation on FULLTEXT INDEX tables */ void wait_FTS(bool also_sys); @@ -417,7 +431,7 @@ class purge_sys_t /** A wrapper around trx_sys_t::clone_oldest_view(). */ template - void clone_oldest_view() + void clone_oldest_view(THD *thd) { if (!also_end_view) wait_FTS(true); @@ -427,6 +441,7 @@ class purge_sys_t (end_view= view). clamp_low_limit_id(head.trx_no ? head.trx_no : tail.trx_no); latch.wr_unlock(); + coordinator_thd= thd; } /** Wake up the purge threads if there is work to do. */ @@ -476,6 +491,12 @@ class purge_sys_t marked for truncate. @param space undo tablespace being truncated */ void cleanse_purge_queue(const fil_space_t &space); + + /** Reset the state of a purge_worker_task at the end of a batch */ + inline void reset_worker_thd(THD *thd) const noexcept; + + /** Set coordinator thread */ + inline void set_coordinator_thd(THD *thd) noexcept; }; /** The global data structure coordinating a purge */ diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index 59d8bd8409955..ca2db547efe38 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -50,6 +50,7 @@ Created 3/14/1997 Heikki Tuuri #include "debug_sync.h" #include +void reset_thd(MYSQL_THD thd); /************************************************************************* IMPORTANT NOTE: Any operation that generates redo MUST check that there is enough space in the redo log before for that operation. This is @@ -81,7 +82,7 @@ row_purge_reposition_pcur( } else { node->found_clust = row_search_on_row_ref( - &node->pcur, mode, node->table, node->ref, mtr); + &node->pcur, mode, node->pt.table, node->ref, mtr); if (node->found_clust) { btr_pcur_store_position(&node->pcur, mtr); @@ -107,7 +108,7 @@ row_purge_remove_clust_if_poss_low( purge_node_t* node, /*!< in/out: row purge node */ btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_PURGE_TREE */ { - dict_index_t* index = dict_table_get_first_index(node->table); + dict_index_t* index = dict_table_get_first_index(node->pt.table); table_id_t table_id = 0; index_id_t index_id = 0; dict_table_t *table = nullptr; @@ -146,7 +147,7 @@ row_purge_remove_clust_if_poss_low( return success; } - if (node->table->id == DICT_INDEXES_ID) { + if (node->pt.table->id == DICT_INDEXES_ID) { /* If this is a record of the SYS_INDEXES table, then we have to free the file segments of the index tree associated with the index */ @@ -504,15 +505,12 @@ static bool row_purge_is_unsafe(const purge_node_t &node, dtuple_t* cur_vrow = NULL; ut_ad(index->table == clust_index->table); + ut_ad(node.pt.table == index->table); heap = mem_heap_create(1024); clust_offsets = rec_get_offsets(rec, clust_index, NULL, clust_index->n_core_fields, ULINT_UNDEFINED, &heap); - if (dict_index_has_virtual(index)) { - v_heap = mem_heap_create(100); - } - if (!rec_get_deleted_flag(rec, rec_offs_comp(clust_offsets))) { row_ext_t* ext; @@ -549,7 +547,8 @@ static bool row_purge_is_unsafe(const purge_node_t &node, || dbug_v_purge) { if (!row_vers_build_clust_v_col( - row, clust_index, index, heap)) { + row, clust_index, index, heap, + node.pt.get_maria_table())) { goto unsafe_to_purge; } @@ -625,10 +624,12 @@ static bool row_purge_is_unsafe(const purge_node_t &node, deleted, but the previous version of it might not. We will need to get the virtual column data from undo record associated with current cluster index */ + v_heap = mem_heap_create(100); cur_vrow = row_vers_build_cur_vrow( rec, clust_index, &clust_offsets, - index, trx_id, roll_ptr, heap, v_heap, mtr); + index, trx_id, roll_ptr, heap, v_heap, mtr, + node.pt.get_maria_table()); } version = rec; @@ -670,7 +671,11 @@ static bool row_purge_is_unsafe(const purge_node_t &node, } /* Keep the virtual row info for the next version, unless it is changed */ - mem_heap_empty(v_heap); + if (v_heap) { + mem_heap_empty(v_heap); + } else { + v_heap = mem_heap_create(100); + } cur_vrow = dtuple_copy(vrow, v_heap); dtuple_dup_v_fld(cur_vrow, v_heap); } @@ -916,7 +921,7 @@ static trx_id_t row_purge_remove_sec_if_poss_leaf(purge_node_t *node, trx_id_t page_max_trx_id = 0; log_free_check(); - ut_ad(index->table == node->table); + ut_ad(index->table == node->pt.table); ut_ad(!index->table->is_temporary()); mtr.start(); index->set_modified(mtr); @@ -1095,7 +1100,7 @@ row_purge_upd_exist_or_extern_func( { mem_heap_t* heap; - ut_ad(!node->table->skip_alter_undo); + ut_ad(!node->pt.table->skip_alter_undo); if (node->rec_type == TRX_UNDO_UPD_DEL_REC || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) @@ -1123,7 +1128,7 @@ row_purge_upd_exist_or_extern_func( heap, ROW_BUILD_FOR_PURGE); row_purge_remove_sec_if_poss(node, node->index, entry); - ut_ad(node->table); + ut_ad(node->pt.table); mem_heap_empty(heap); } @@ -1133,7 +1138,7 @@ row_purge_upd_exist_or_extern_func( skip_secondaries: mtr_t mtr; - dict_index_t* index = dict_table_get_first_index(node->table); + dict_index_t* index = dict_table_get_first_index(node->pt.table); /* Free possible externally stored fields */ for (ulint i = 0; i < upd_get_n_fields(node->update); i++) { @@ -1402,27 +1407,20 @@ row_purge_parse_undo_rec( break; } - auto &tables_entry= node->tables[table_id]; - node->table = tables_entry.first; - if (!node->table) { + node->pt = node->tables[table_id]; + if (!node->pt.table) { return false; } -#ifndef DBUG_OFF - if (MDL_ticket* mdl = tables_entry.second) { - static_cast(thd_mdl_context(current_thd)) - ->lock_warrant = mdl->get_ctx(); - } -#endif - ut_ad(!node->table->is_temporary()); + ut_ad(!node->pt.table->is_temporary()); - clust_index = dict_table_get_first_index(node->table); + clust_index = dict_table_get_first_index(node->pt.table); if (clust_index->is_corrupted()) { /* The table was corrupt in the data dictionary. dict_set_corrupted() works on an index, and we do not have an index to call it with. */ - DBUG_ASSERT(table_id == node->table->id); + DBUG_ASSERT(table_id == node->pt.table->id); return false; } @@ -1476,11 +1474,11 @@ row_purge_record_func( bool updated_extern) { ut_ad(!node->found_clust); - ut_ad(!node->table->skip_alter_undo); + ut_ad(!node->pt.table->skip_alter_undo); ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); node->index = dict_table_get_next_index( - dict_table_get_first_index(node->table)); + dict_table_get_first_index(node->pt.table)); bool purged = true; @@ -1490,10 +1488,10 @@ row_purge_record_func( case TRX_UNDO_DEL_MARK_REC: purged = row_purge_del_mark(node); if (purged) { - if (node->table->stat_initialized() + if (node->pt.table->stat_initialized() && srv_stats_include_delete_marked) { dict_stats_update_if_needed( - node->table, *thr->graph->trx); + node->pt.table, *thr->graph->trx); } MONITOR_INC(MONITOR_N_DEL_ROW_PURGE); } @@ -1575,6 +1573,22 @@ inline void purge_node_t::start() cmpl_info= 0; } +inline void purge_sys_t::reset_worker_thd(THD *thd) const noexcept +{ + /* Only reset THD for worker threads, not the coordinator. + The coordinator thread opens TABLE* objects in + trx_purge_attach_undo_recs() and stores them in + purge_node_t->tables. These TABLE* objects must remain open until + the entire purge batch completes. Coordinator thread could + close the tables prematurely if it calls reset_thd() + The coordinator handles cleanup centrally in trx_purge() after all + purge_node_t entries are processed. Worker threads have their own + THD lifecycle and must call reset_thd() to clean up their + thread-local resources. */ + if (thd != coordinator_thd) + reset_thd(thd); +} + /** Reset the state at end @return the query graph parent */ inline que_node_t *purge_node_t::end(THD *thd) @@ -1582,7 +1596,7 @@ inline que_node_t *purge_node_t::end(THD *thd) DBUG_ASSERT(common.type == QUE_NODE_PURGE); ut_ad(undo_recs.empty()); ut_d(in_progress= false); - innobase_reset_background_thd(thd); + purge_sys.reset_worker_thd(thd); #ifndef DBUG_OFF static_cast(thd_mdl_context(thd))->lock_warrant= nullptr; #endif diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc index 29dbd48c3ddc2..30b859625c7ee 100644 --- a/storage/innobase/row/row0vers.cc +++ b/storage/innobase/row/row0vers.cc @@ -459,10 +459,10 @@ row_vers_build_clust_v_col( dtuple_t* row, dict_index_t* clust_index, dict_index_t* index, - mem_heap_t* heap) + mem_heap_t* heap, + TABLE* maria_table) { THD* thd= current_thd; - TABLE* maria_table= 0; ut_ad(dict_index_has_virtual(index)); ut_ad(index->table == clust_index->table); @@ -632,7 +632,8 @@ row_vers_build_cur_vrow( roll_ptr_t roll_ptr, mem_heap_t* heap, mem_heap_t* v_heap, - mtr_t* mtr) + mtr_t* mtr, + TABLE* maria_table) { dtuple_t* cur_vrow = NULL; @@ -653,7 +654,7 @@ row_vers_build_cur_vrow( NULL, NULL, NULL, NULL, heap); if (!row_vers_build_clust_v_col(row, clust_index, index, - heap)) { + heap, maria_table)) { return nullptr; } diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index ab36a73220a14..c2516498569f3 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -45,6 +45,12 @@ Created 3/26/1996 Heikki Tuuri #include "log.h" #include "sql_class.h" +TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, + const char *tb, size_t tblen, + MDL_ticket *mdl_ticket) noexcept; +int close_thread_tables(THD* thd) noexcept; +MDL_ticket *get_mdl_ticket(TABLE *table) noexcept; + /** Maximum allowable purge history length. <=0 means 'infinite'. */ ulong srv_max_purge_lag = 0; @@ -1050,27 +1056,65 @@ inline trx_purge_rec_t purge_sys_t::fetch_next_rec() return get_next_rec(roll_ptr); } +inline MDL_ticket *purge_table::get_ticket() const noexcept +{ + if (TABLE* t= get_maria_table()) + return get_mdl_ticket(t); + return ticket; +} + +inline void purge_table::set_mariadb_table(TABLE *t) noexcept +{ + mariadb_table= reinterpret_cast(t) | 1; +} + +inline void purge_table::set_ticket(MDL_ticket *t) noexcept +{ + ticket= t; +} + /** Close all tables that were opened in a purge batch for a worker. -@param node purge task context -@param thd purge coordinator thread handle */ -static void trx_purge_close_tables(purge_node_t *node, THD *thd) noexcept +@param thd purge coordinator thread handle +@param batch_cleanup if true, clears the list of opened tables + in the purge node. */ +static void trx_purge_close_tables(THD *thd, bool batch_cleanup=false) noexcept { - for (auto &t : node->tables) + MDL_context *mdl_context = static_cast(thd_mdl_context(thd)); + std::vector mdl_tickets; + for (que_thr_t *thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr; + thr= UT_LIST_GET_NEXT(thrs, thr)) { - dict_table_t *table= t.second.first; - if (table != nullptr && table != reinterpret_cast(-1)) - table->release(); + purge_node_t* node = static_cast(thr->child); + for (auto &t : node->tables) + { + purge_table& pt = t.second; + if (pt.table && !pt.must_wait()) + { + if (pt.get_maria_table()) + { + pt.table->lock_mutex_lock(); + pt.table->vc_templ->mysql_table= nullptr; + pt.table->vc_templ->mysql_table_query_id= 0; + pt.table->lock_mutex_unlock(); + } + + pt.table->release(); + pt.table= nullptr; + } + MDL_ticket *ticket= pt.get_ticket(); + if (ticket) + mdl_tickets.push_back(ticket); + } + if (batch_cleanup) + node->tables.clear(); } - for (auto &t : node->tables) + close_thread_tables(thd); + + for (auto mdl : mdl_tickets) { - dict_table_t *table= t.second.first; - if (table != nullptr && table != reinterpret_cast(-1)) - { - t.second.first= reinterpret_cast(-1); - if (t.second.second != nullptr) - thd->mdl_context.release_lock(t.second.second); - } + if (mdl && mdl_context) + mdl_context->release_lock(mdl); } } @@ -1137,80 +1181,104 @@ static dict_table_t *trx_purge_table_acquire(dict_table_t *table, /** Open a table handle for the purge of committed transaction history @param table_id InnoDB table identifier @param mdl_context metadata lock acquisition context -@param mdl metadata lock -@return table handle -@retval nullptr if the table is not found or accessible -@retval -1 if the purge of history must be suspended due to DDL */ -static dict_table_t *trx_purge_table_open(table_id_t table_id, - MDL_context *mdl_context, - MDL_ticket **mdl) noexcept +@return purge_table with dict_table_t* and TABLE* (or)MDL_ticket, +possibly with must_wait() || table == nullptr */ +static purge_table trx_purge_table_open(table_id_t table_id, + MDL_context *mdl_context) noexcept { - dict_table_t *table; + purge_table result; + MDL_ticket *mdl= nullptr; for (;;) { dict_sys.freeze(SRW_LOCK_CALL); - table= dict_sys.find_table(table_id); - if (table) + result.table= dict_sys.find_table(table_id); + if (result.table) break; dict_sys.unfreeze(); dict_sys.lock(SRW_LOCK_CALL); - table= dict_load_table_on_id(table_id, DICT_ERR_IGNORE_FK_NOKEY); + result.table= dict_load_table_on_id(table_id, DICT_ERR_IGNORE_FK_NOKEY); dict_sys.unlock(); - if (!table) - return nullptr; + if (!result.table) + return result; /* At this point, the freshly loaded table may already have been evicted. We must look it up again while holding a shared dict_sys.latch. We keep trying this until the table is found in the cache or it cannot be found in the dictionary (because the table has been dropped or rebuilt). */ } - table= trx_purge_table_acquire(table, mdl_context, mdl); + result.table= trx_purge_table_acquire(result.table, mdl_context, &mdl); dict_sys.unfreeze(); - return table; + + if (mdl && result.table->has_virtual_index()) + { + char db_buf[NAME_LEN + 1]; + char tbl_buf[NAME_LEN + 1]; + size_t db_len, tbl_len; + + if (result.table->parse_name(db_buf, tbl_buf, &db_len, &tbl_len)) + { + THD *thd= current_thd; + TABLE *maria_table= open_purge_table(thd, db_buf, db_len, + tbl_buf, tbl_len, mdl); + if (maria_table) + { + if (result.table->vc_templ) + { + ut_ad(thd->query_id == 0); + result.table->lock_mutex_lock(); + result.table->vc_templ->mysql_table= maria_table; + result.table->vc_templ->mysql_table_query_id= 0; + result.table->lock_mutex_unlock(); + } + result.set_mariadb_table(maria_table); + return result; + } + } + } + result.set_ticket(mdl); + return result; } ATTRIBUTE_COLD -dict_table_t *purge_sys_t::close_and_reopen(table_id_t id, THD *thd, - MDL_ticket **mdl) +purge_table purge_sys_t::close_and_reopen(table_id_t id, THD *thd) noexcept { MDL_context *mdl_context= &thd->mdl_context; retry: ut_ad(m_active); - - for (que_thr_t *thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr; - thr= UT_LIST_GET_NEXT(thrs, thr)) - trx_purge_close_tables(static_cast(thr->child), thd); + trx_purge_close_tables(thd); m_active= false; wait_FTS(false); m_active= true; - dict_table_t *table= trx_purge_table_open(id, mdl_context, mdl); - if (table == reinterpret_cast(-1)) + purge_table pt= trx_purge_table_open(id, mdl_context); + if (pt.must_wait()) goto retry; - for (que_thr_t *thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr; + /* Reopen all other tables from all nodes */ + for (que_thr_t *thr= UT_LIST_GET_FIRST(query->thrs); thr; thr= UT_LIST_GET_NEXT(thrs, thr)) { purge_node_t *node= static_cast(thr->child); - for (auto &t : node->tables) + for (auto it = node->tables.begin(); it != node->tables.end(); ) { - if (t.second.first) + it->second= trx_purge_table_open(it->first, mdl_context); + if (it->second.must_wait()) { - t.second.first= trx_purge_table_open(t.first, mdl_context, - &t.second.second); - if (t.second.first == reinterpret_cast(-1)) - { - if (table) - dict_table_close(table, thd, *mdl); - goto retry; - } + if (pt.table) + pt.table->release(); + goto retry; } +#ifndef DBUG_OFF + if (MDL_ticket *mdl= it->second.get_ticket()) + static_cast(thd_mdl_context(thd))->lock_warrant= + mdl->get_ctx(); +#endif + ++it; } } - - return table; + return pt; } /** Run a purge batch. @@ -1252,29 +1320,33 @@ static purge_sys_t::iterator trx_purge_attach_undo_recs(THD *thd, } table_id_t table_id= trx_undo_rec_get_table_id(purge_rec.undo_rec); - purge_node_t *&table_node= table_id_map[table_id]; if (table_node) ut_ad(!table_node->in_progress); if (!table_node) { - std::pair p; - p.first= trx_purge_table_open(table_id, &thd->mdl_context, &p.second); - if (p.first == reinterpret_cast(-1)) - p.first= purge_sys.close_and_reopen(table_id, thd, &p.second); + purge_table pt= trx_purge_table_open(table_id, &thd->mdl_context); + if (pt.must_wait()) + pt= purge_sys.close_and_reopen(table_id, thd); if (!thr || !(thr= UT_LIST_GET_NEXT(thrs, thr))) thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); ++*n_work_items; table_node= static_cast(thr->child); - ut_a(que_node_get_type(table_node) == QUE_NODE_PURGE); - ut_d(auto pair=) table_node->tables.emplace(table_id, p); - ut_ad(pair.second); - if (p.first) + + table_node->tables.emplace(table_id, pt); + if (pt.table) + { +#ifndef DBUG_OFF + if (MDL_ticket *mdl= pt.get_ticket()) + static_cast(thd_mdl_context(thd))->lock_warrant= + mdl->get_ctx(); +#endif goto enqueue; + } } - else if (table_node->tables[table_id].first) + else if (table_node->tables[table_id].table) { enqueue: table_node->undo_recs.push(purge_rec); @@ -1360,6 +1432,13 @@ void purge_sys_t::batch_cleanup(const purge_sys_t::iterator &head) #ifdef SUX_LOCK_GENERIC end_latch.wr_unlock(); #endif + coordinator_thd= nullptr; +} + + +inline void purge_sys_t::set_coordinator_thd(THD *thd) noexcept +{ + coordinator_thd= thd; } /** @@ -1371,12 +1450,12 @@ TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, ulint history_size) { ut_ad(n_tasks > 0); - purge_sys.clone_oldest_view(); + THD *const thd= current_thd; + + purge_sys.clone_oldest_view(thd); ut_d(if (srv_purge_view_update_only_debug) return 0); - THD *const thd= current_thd; - /* Fetch the UNDO recs that need to be purged. */ ulint n_work= 0; const purge_sys_t::iterator head= trx_purge_attach_undo_recs(thd, &n_work); @@ -1405,12 +1484,12 @@ TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, ulint history_size) for (auto i= n_work; i--; ) { if (!thr) - thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); + thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); else - thr= UT_LIST_GET_NEXT(thrs, thr); + thr= UT_LIST_GET_NEXT(thrs, thr); if (!thr) - break; + break; ut_ad(thr->state == QUE_THR_COMPLETED); thr->state= QUE_THR_RUNNING; @@ -1439,13 +1518,7 @@ TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, ulint history_size) if (workers) trx_purge_wait_for_workers_to_complete(); - for (thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr && n_work--; - thr= UT_LIST_GET_NEXT(thrs, thr)) - { - purge_node_t *node= static_cast(thr->child); - trx_purge_close_tables(node, thd); - node->tables.clear(); - } + trx_purge_close_tables(thd, true); } purge_sys.batch_cleanup(head); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 430eb3c33f26a..e622b7f78f323 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -723,7 +723,7 @@ dberr_t trx_lists_init_at_db_start() if (trx_sys.is_undo_empty()) { func_exit: - purge_sys.clone_oldest_view(); + purge_sys.clone_oldest_view(nullptr); return DB_SUCCESS; } From 7a65c7ea939d5a405eeaf7cf6825a0e3c13ef32d Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Mon, 27 Apr 2026 21:57:31 +0530 Subject: [PATCH 2/2] MDEV-39261 MariaDB crash on startup in presence of indexed virtual columns Problem: ======== Purge threads computing virtual columns could crash due to: 1. Stale TABLE* pointers when tables are flushed/rebuilt during purge 2. open_purge_table() called close_thread_tables() on failure, making MDL tickets invalid before purge could release them 3. Purge coordinator opened TABLE* but workers accessed it with wrong TABLE->in_use 4. No retry mechanism when open_purge_table() failed due to concurrent FLUSH TABLES, BACKUP STAGE, or ALTER TABLE operations Solution: ======== 1. Removed close_thread_tables() from open_purge_table(). Purge coordinator thread should close explicitly in close_and_reopen() 2. Added retry logic: when open_purge_table() returns NULL due to table flush/rebuild, set must_wait() flag and retry in close_and_reopen() 3. Update close_and_reopen() with purge_table parameter to close the failed table. Pass it to trx_purge_close_tables() for proper cleanup 4. Properly set and reset TABLE::in_use during purge operations: - Set to coordinator_thd in row_purge_parse_undo_rec() when opening - Reset in trx_purge_close_table() when closing 5. The auto_increment initialization now happens unconditionally for purge threads , ensuring the auto_increment counter is always properly initialized when purge opens tables with virtual columns --- sql/sql_class.cc | 8 +- storage/innobase/handler/ha_innodb.cc | 6 +- storage/innobase/include/row0purge.h | 13 ++- storage/innobase/include/trx0purge.h | 33 ++++--- storage/innobase/row/row0purge.cc | 18 ++-- storage/innobase/trx/trx0purge.cc | 120 +++++++++++++++----------- 6 files changed, 118 insertions(+), 80 deletions(-) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 39a58a8f3365e..7b54c1ddc2d7d 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5041,11 +5041,9 @@ TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, bool error= open_table(thd, tl, &ot_ctx); - /* we don't recover here */ - DBUG_ASSERT(!error || !ot_ctx.can_recover_from_failed_open()); - - if (unlikely(error)) - close_thread_tables(thd); + /* FLUSH TABLES is executed concurrently - the TABLE_SHARE is marked + as flushed and open_table() requests OT_REOPEN_TABLES backoff. + So removed the assert */ DBUG_RETURN(error ? NULL : tl->table); } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 40be0c838ff56..3a53231cbb459 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -6015,9 +6015,7 @@ ha_innobase::open(const char* name, int, uint) /* Index block size in InnoDB: used by MySQL in query optimization */ stats.block_size = static_cast(srv_page_size); - const my_bool for_vc_purge = THDVAR(thd, background_thread); - - if (for_vc_purge || !m_prebuilt->table + if (!m_prebuilt->table || m_prebuilt->table->is_temporary() || m_prebuilt->table->persistent_autoinc || !m_prebuilt->table->is_readable()) { @@ -6044,7 +6042,7 @@ ha_innobase::open(const char* name, int, uint) ut_ad(!m_prebuilt->table || table->versioned() == m_prebuilt->table->versioned()); - if (!for_vc_purge) { + if (!THDVAR(thd, background_thread)) { info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST | HA_STATUS_OPEN); } diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h index 70214977d535b..b1f812935fc0d 100644 --- a/storage/innobase/include/row0purge.h +++ b/storage/innobase/include/row0purge.h @@ -89,11 +89,15 @@ class purge_table MDL_ticket *ticket; }; public: + /** Pointer to the InnoDB table, or nullptr if the table + no longer exists, or -1 if we failed to acquire a metadata lock */ dict_table_t *table; purge_table() : ticket(nullptr), table(nullptr) {} - inline TABLE *get_maria_table() const noexcept + /** Get the TABLE pointer for virtual column computation. + @return TABLE* if the LSB flag is set, nullptr otherwise */ + TABLE *get_maria_table() const noexcept { return UNIV_UNLIKELY(mariadb_table & 1) ? reinterpret_cast(mariadb_table & ~uintptr_t{1}) @@ -105,8 +109,11 @@ class purge_table { return table == reinterpret_cast(-1); } inline MDL_ticket *get_ticket() const noexcept; - inline void set_mariadb_table(TABLE *t) noexcept; - inline void set_ticket(MDL_ticket *t) noexcept; + inline void set_mariadb_table(TABLE *t) noexcept + { mariadb_table= reinterpret_cast(t) | 1; } + + inline void set_ticket(MDL_ticket *t) noexcept + { ticket= t; } }; /** Purge worker context */ diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h index 28aa3c830d879..25b76c0ecccaf 100644 --- a/storage/innobase/include/trx0purge.h +++ b/storage/innobase/include/trx0purge.h @@ -1,3 +1,4 @@ + /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. @@ -34,6 +35,7 @@ Created 3/26/1996 Heikki Tuuri #include #include +struct TABLE; /** Prepend the history list with an undo log. Remove the undo log segment from the rseg slot if it is too big for reuse. @param[in] trx transaction @@ -232,13 +234,16 @@ class purge_sys_t record to purge */ private: /** Coordinator thread's THD during batch processing. - The coordinator thread sets this at the start of trx_purge() - and clears it at the end. This is being used in - purge_node_t::end() to determine whether InnoDB should - call reset_thd(). The coordinator skips this call - because it manages table cleanup centrally in - trx_purge() after all workers complete. */ - THD* coordinator_thd= nullptr; + Set by the coordinator at the start of trx_purge() and cleared + at the end. This serves two purposes: + 1. Identifies the coordinator thread in reset_worker_thd() + which skips THD reset for the coordinator since it manages + cleanup centrally in trx_purge() after all workers complete. + + 2. Used to set TABLE::in_use when opening tables for purge + operations with virtual columns, ensuring proper table + ownership tracking during the purge batch. */ + THD *coordinator_thd= nullptr; uint32_t page_no; /*!< Page number for the next undo record to purge, page number of the @@ -328,9 +333,11 @@ class purge_sys_t /** Close and reopen all tables in case of a MDL conflict with DDL @param id table identifier that triggered reopen + @param pt last purge_table entry processed @param thd coordinator thread @return purge_table for the reopened table, or empty on error */ - purge_table close_and_reopen(table_id_t id, THD *thd) noexcept; + purge_table close_and_reopen(table_id_t id, purge_table pt, + THD *thd) noexcept; private: /** Suspend purge during a DDL operation on FULLTEXT INDEX tables */ @@ -352,6 +359,8 @@ class purge_sys_t { ut_d(const auto p=) m_FTS_paused.fetch_sub(1); ut_ad(p & ~PAUSED_SYS); } /** @return whether stop_SYS() is in effect */ bool must_wait_FTS() const { return m_FTS_paused & ~PAUSED_SYS; } + /** Reset coordinator thread back to table->in_use */ + inline void reset_in_use(TABLE *table) const noexcept; private: /** @@ -433,6 +442,10 @@ class purge_sys_t template void clone_oldest_view(THD *thd) { + ut_ad(also_end_view == !thd); + ut_ad(!thd || !coordinator_thd); + coordinator_thd= thd; + if (!also_end_view) wait_FTS(true); latch.wr_lock(SRW_LOCK_CALL); @@ -441,7 +454,6 @@ class purge_sys_t (end_view= view). clamp_low_limit_id(head.trx_no ? head.trx_no : tail.trx_no); latch.wr_unlock(); - coordinator_thd= thd; } /** Wake up the purge threads if there is work to do. */ @@ -494,9 +506,6 @@ class purge_sys_t /** Reset the state of a purge_worker_task at the end of a batch */ inline void reset_worker_thd(THD *thd) const noexcept; - - /** Set coordinator thread */ - inline void set_coordinator_thd(THD *thd) noexcept; }; /** The global data structure coordinating a purge */ diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index ca2db547efe38..837e1b60edd1c 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -1361,6 +1361,7 @@ MY_ATTRIBUTE((nonnull,warn_unused_result)) @param[in] node row undo node @param[in] undo_rec record to purge @param[in] thr query thread +@param[in] thd worker thread @param[out] updated_extern true if an externally stored field was updated @return true if purge operation required */ @@ -1370,6 +1371,7 @@ row_purge_parse_undo_rec( purge_node_t* node, const trx_undo_rec_t* undo_rec, que_thr_t* thr, + const THD* thd, bool* updated_extern) { dict_index_t* clust_index; @@ -1414,6 +1416,10 @@ row_purge_parse_undo_rec( ut_ad(!node->pt.table->is_temporary()); + if (TABLE *maria_table = node->pt.get_maria_table()) { + maria_table->in_use = (THD*)thd; + } + clust_index = dict_table_get_first_index(node->pt.table); if (clust_index->is_corrupted()) { @@ -1537,13 +1543,14 @@ row_purge( /*======*/ purge_node_t* node, /*!< in: row purge node */ const trx_undo_rec_t* undo_rec, /*!< in: record to purge */ - que_thr_t* thr) /*!< in: query thread */ + que_thr_t* thr, /*!< in: query thread */ + const THD* thd) /*!< in: Worker thread */ { if (undo_rec != reinterpret_cast(-1)) { bool updated_extern; - while (row_purge_parse_undo_rec( - node, undo_rec, thr, &updated_extern)) { + while (row_purge_parse_undo_rec(node, undo_rec, thr, + thd, &updated_extern)) { bool purged = row_purge_record( node, undo_rec, thr, updated_extern); @@ -1613,6 +1620,7 @@ row_purge_step( /*===========*/ que_thr_t* thr) /*!< in: query thread */ { + const THD *thd = current_thd; purge_node_t* node; node = static_cast(thr->run_node); @@ -1623,8 +1631,7 @@ row_purge_step( trx_purge_rec_t purge_rec = node->undo_recs.front(); node->undo_recs.pop(); node->roll_ptr = purge_rec.roll_ptr; - - row_purge(node, purge_rec.undo_rec, thr); + row_purge(node, purge_rec.undo_rec, thr, thd); } thr->run_node = node->end(current_thd); @@ -1679,4 +1686,5 @@ purge_node_t::validate_pcur() return(true); } + #endif /* UNIV_DEBUG */ diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index c2516498569f3..213243d528697 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -1063,48 +1063,58 @@ inline MDL_ticket *purge_table::get_ticket() const noexcept return ticket; } -inline void purge_table::set_mariadb_table(TABLE *t) noexcept +inline void purge_sys_t::reset_in_use(TABLE *table) const noexcept { - mariadb_table= reinterpret_cast(t) | 1; + if (table) + table->in_use= coordinator_thd; } -inline void purge_table::set_ticket(MDL_ticket *t) noexcept +/** Close a single purge table entry. +Clears the cached TABLE* pointer from vc_templ, closes the +dict_table_t, and collects the MDL_ticket forlater release. +@param pt purge_table entry to close +@param mdl_tickets vector to collect standalone MDL tickets */ +static void trx_purge_close_table(purge_table &pt, + std::vector &mdl_tickets) { - ticket= t; + if (pt.table && !pt.must_wait()) + { + if (TABLE *maria_table= pt.get_maria_table()) + purge_sys.reset_in_use(maria_table); + pt.table->release(); + pt.table= nullptr; + } + + MDL_ticket *ticket= pt.get_ticket(); + if (ticket) + { + mdl_tickets.push_back(ticket); + pt.set_ticket(nullptr); + } } /** Close all tables that were opened in a purge batch for a worker. @param thd purge coordinator thread handle +@param last_entry additional table to close (not in node->tables), + typically the table that triggered + close_and_reopen(); nullptr if no additional + table to close @param batch_cleanup if true, clears the list of opened tables in the purge node. */ -static void trx_purge_close_tables(THD *thd, bool batch_cleanup=false) noexcept +static void trx_purge_close_tables(THD *thd, purge_table *last_entry, + bool batch_cleanup=false) noexcept { MDL_context *mdl_context = static_cast(thd_mdl_context(thd)); std::vector mdl_tickets; + if (last_entry) + trx_purge_close_table(*last_entry, mdl_tickets); + for (que_thr_t *thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr; thr= UT_LIST_GET_NEXT(thrs, thr)) { purge_node_t* node = static_cast(thr->child); for (auto &t : node->tables) - { - purge_table& pt = t.second; - if (pt.table && !pt.must_wait()) - { - if (pt.get_maria_table()) - { - pt.table->lock_mutex_lock(); - pt.table->vc_templ->mysql_table= nullptr; - pt.table->vc_templ->mysql_table_query_id= 0; - pt.table->lock_mutex_unlock(); - } - - pt.table->release(); - pt.table= nullptr; - } - MDL_ticket *ticket= pt.get_ticket(); - if (ticket) - mdl_tickets.push_back(ticket); - } + trx_purge_close_table(t.second, mdl_tickets); if (batch_cleanup) node->tables.clear(); } @@ -1234,6 +1244,21 @@ static purge_table trx_purge_table_open(table_id_t table_id, result.set_mariadb_table(maria_table); return result; } + else + { + /* open_table() could be failed and return nullptr. + This can happen when: + 1. FLUSH TABLES is executed concurrently: TABLE_SHARE is + marked as flushed and open_table() requests + OT_REOPEN_TABLES backoff + 2. BACKUP STAGE operations trigger table cache flushes + 3. Table is being rebuilt by ALTER TABLE and the old version + needs to be closed before opening the new version + The purge coordinator should close all open tables and retry + opening. */ + result.table->release(); + result.table= reinterpret_cast(-1); + } } } result.set_ticket(mdl); @@ -1241,18 +1266,20 @@ static purge_table trx_purge_table_open(table_id_t table_id, } ATTRIBUTE_COLD -purge_table purge_sys_t::close_and_reopen(table_id_t id, THD *thd) noexcept +purge_table purge_sys_t::close_and_reopen(table_id_t id, + purge_table pt, + THD *thd) noexcept { MDL_context *mdl_context= &thd->mdl_context; retry: ut_ad(m_active); - trx_purge_close_tables(thd); + trx_purge_close_tables(thd, &pt); m_active= false; wait_FTS(false); m_active= true; - purge_table pt= trx_purge_table_open(id, mdl_context); + pt= trx_purge_table_open(id, mdl_context); if (pt.must_wait()) goto retry; @@ -1261,21 +1288,16 @@ purge_table purge_sys_t::close_and_reopen(table_id_t id, THD *thd) noexcept thr= UT_LIST_GET_NEXT(thrs, thr)) { purge_node_t *node= static_cast(thr->child); - for (auto it = node->tables.begin(); it != node->tables.end(); ) + for (auto &t : node->tables) { - it->second= trx_purge_table_open(it->first, mdl_context); - if (it->second.must_wait()) - { - if (pt.table) - pt.table->release(); + t.second= trx_purge_table_open(t.first, mdl_context); + if (t.second.must_wait()) goto retry; - } #ifndef DBUG_OFF - if (MDL_ticket *mdl= it->second.get_ticket()) + if (MDL_ticket *mdl= t.second.get_ticket()) static_cast(thd_mdl_context(thd))->lock_warrant= mdl->get_ctx(); #endif - ++it; } } return pt; @@ -1322,12 +1344,20 @@ static purge_sys_t::iterator trx_purge_attach_undo_recs(THD *thd, table_id_t table_id= trx_undo_rec_get_table_id(purge_rec.undo_rec); purge_node_t *&table_node= table_id_map[table_id]; if (table_node) + { ut_ad(!table_node->in_progress); - if (!table_node) + if (table_node->tables[table_id].table) + { +enqueue: + table_node->undo_recs.push(purge_rec); + ut_ad(!table_node->in_progress); + } + } + else { purge_table pt= trx_purge_table_open(table_id, &thd->mdl_context); if (pt.must_wait()) - pt= purge_sys.close_and_reopen(table_id, thd); + pt= purge_sys.close_and_reopen(table_id, pt, thd); if (!thr || !(thr= UT_LIST_GET_NEXT(thrs, thr))) thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); @@ -1346,12 +1376,6 @@ static purge_sys_t::iterator trx_purge_attach_undo_recs(THD *thd, goto enqueue; } } - else if (table_node->tables[table_id].table) - { - enqueue: - table_node->undo_recs.push(purge_rec); - ut_ad(!table_node->in_progress); - } const size_t size{purge_sys.n_pages_handled()}; if (size >= size_t{srv_purge_batch_size} || @@ -1435,12 +1459,6 @@ void purge_sys_t::batch_cleanup(const purge_sys_t::iterator &head) coordinator_thd= nullptr; } - -inline void purge_sys_t::set_coordinator_thd(THD *thd) noexcept -{ - coordinator_thd= thd; -} - /** Run a purge batch. @param n_tasks number of purge tasks to submit to the queue @@ -1518,7 +1536,7 @@ TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, ulint history_size) if (workers) trx_purge_wait_for_workers_to_complete(); - trx_purge_close_tables(thd, true); + trx_purge_close_tables(thd, nullptr, true); } purge_sys.batch_cleanup(head);