From dc680d2119d3cc4ad642e5c1f11f38ab072b3032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 11 Mar 2022 16:14:06 +0200 Subject: [PATCH 1/7] Avoid shutdown timeout in innodb.undo_truncate Let us explicitly wait for purge before invoking a slow shutdown, so that instrumented builds (such as ASAN or UBSAN) will not exceed the 60-second timeout during shutdown. --- mysql-test/suite/innodb/r/undo_truncate.result | 2 ++ mysql-test/suite/innodb/t/undo_truncate.test | 2 ++ 2 files changed, 4 insertions(+) diff --git a/mysql-test/suite/innodb/r/undo_truncate.result b/mysql-test/suite/innodb/r/undo_truncate.result index 4b185a58d2c3f..1cc4fb2857c55 100644 --- a/mysql-test/suite/innodb/r/undo_truncate.result +++ b/mysql-test/suite/innodb/r/undo_truncate.result @@ -31,5 +31,7 @@ connection con2; commit; disconnect con2; connection default; +SET GLOBAL innodb_purge_rseg_truncate_frequency=1; +SET GLOBAL innodb_max_purge_lag_wait=0; set global innodb_fast_shutdown=0; drop table t1, t2; diff --git a/mysql-test/suite/innodb/t/undo_truncate.test b/mysql-test/suite/innodb/t/undo_truncate.test index 51bb4f4b9fce5..2ca4f67d0bfa8 100644 --- a/mysql-test/suite/innodb/t/undo_truncate.test +++ b/mysql-test/suite/innodb/t/undo_truncate.test @@ -45,6 +45,8 @@ connection default; let $trx_before= `SHOW ENGINE INNODB STATUS`; let $trx_before= `select substr('$trx_before',9)+2`; +SET GLOBAL innodb_purge_rseg_truncate_frequency=1; +SET GLOBAL innodb_max_purge_lag_wait=0; set global innodb_fast_shutdown=0; --source include/restart_mysqld.inc --replace_regex /.*Trx id counter ([0-9]+).*/\1/ From 6789f2cfabb6a92b17ce822921cb522098ef3ff7 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 12 Oct 2021 18:34:51 +0200 Subject: [PATCH 2/7] MDEV-18304 sql_safe_updates does not work with OR clauses not every index-using plan sets bits in table->quick_keys. QUICK_ROR_INTERSECT_SELECT, for example, doesn't. Use the fact that select->quick is set instead. Also allow EXPLAIN to work. --- mysql-test/main/sql_safe_updates.result | 24 ++++++++++++++++++++++++ mysql-test/main/sql_safe_updates.test | 25 ++++++++++++++++++++++--- sql/sql_delete.cc | 5 +++-- sql/sql_update.cc | 5 +++-- 4 files changed, 52 insertions(+), 7 deletions(-) diff --git a/mysql-test/main/sql_safe_updates.result b/mysql-test/main/sql_safe_updates.result index 356cd36bad930..f2944e6048976 100644 --- a/mysql-test/main/sql_safe_updates.result +++ b/mysql-test/main/sql_safe_updates.result @@ -1,3 +1,27 @@ +# +# MDEV-14429 sql_safe_updates in my.cnf not work +# select @@sql_safe_updates; @@sql_safe_updates 1 +# +# MDEV-18304 sql_safe_updates does not work with OR clauses +# +create table t1 (a int, b int, primary key (a), key (b)); +update t1 set b=2 where a=1 or b=2; +ERROR HY000: You are using safe update mode and you tried to update a table without a WHERE that uses a KEY column +explain update t1 set b=2 where a=1 or b=2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1 Using where +delete from t1 where a=1 or b=2; +ERROR HY000: You are using safe update mode and you tried to update a table without a WHERE that uses a KEY column +explain delete from t1 where a=1 or b=2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1 Using where +insert into t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8); +update t1 set b=2 where a=1 or b=2; +delete from t1 where a=1 or b=2; +drop table t1; +# +# End of 10.3 tests +# diff --git a/mysql-test/main/sql_safe_updates.test b/mysql-test/main/sql_safe_updates.test index 18decd0ff2cd2..25fe4a15ca26a 100644 --- a/mysql-test/main/sql_safe_updates.test +++ b/mysql-test/main/sql_safe_updates.test @@ -1,4 +1,23 @@ -# -# MDEV-14429 sql_safe_updates in my.cnf not work -# +--echo # +--echo # MDEV-14429 sql_safe_updates in my.cnf not work +--echo # select @@sql_safe_updates; + +--echo # +--echo # MDEV-18304 sql_safe_updates does not work with OR clauses +--echo # +create table t1 (a int, b int, primary key (a), key (b)); +--error ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE +update t1 set b=2 where a=1 or b=2; +explain update t1 set b=2 where a=1 or b=2; +--error ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE +delete from t1 where a=1 or b=2; +explain delete from t1 where a=1 or b=2; +insert into t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8); +update t1 set b=2 where a=1 or b=2; +delete from t1 where a=1 or b=2; +drop table t1; + +--echo # +--echo # End of 10.3 tests +--echo # diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index f6c05da944576..a0d8feb61e873 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -371,7 +371,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, DBUG_RETURN(TRUE); const_cond= (!conds || conds->const_item()); - safe_update= MY_TEST(thd->variables.option_bits & OPTION_SAFE_UPDATES); + safe_update= (thd->variables.option_bits & OPTION_SAFE_UPDATES) && + !thd->lex->describe; if (safe_update && const_cond) { my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE, @@ -497,7 +498,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, } /* If running in safe sql mode, don't allow updates without keys */ - if (table->quick_keys.is_clear_all()) + if (!select || !select->quick) { thd->set_status_no_index_used(); if (safe_update && !using_limit) diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 6065d03402ffa..1e997b75c7dac 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -314,7 +314,8 @@ int mysql_update(THD *thd, ha_rows *found_return, ha_rows *updated_return) { bool using_limit= limit != HA_POS_ERROR; - bool safe_update= thd->variables.option_bits & OPTION_SAFE_UPDATES; + bool safe_update= (thd->variables.option_bits & OPTION_SAFE_UPDATES) + && !thd->lex->describe; bool used_key_is_modified= FALSE, transactional_table; bool will_batch= FALSE; bool can_compare_record; @@ -517,7 +518,7 @@ int mysql_update(THD *thd, } /* If running in safe sql mode, don't allow updates without keys */ - if (table->quick_keys.is_clear_all()) + if (!select || !select->quick) { thd->set_status_no_index_used(); if (safe_update && !using_limit) From f217c761892683403b65da75b2f2abb8ebd295b2 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 21 Feb 2022 18:16:17 +0100 Subject: [PATCH 3/7] mtr: fix --source lines detection mysqltest allows leading spaces before `--`, so mtr should too --- mysql-test/lib/mtr_cases.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysql-test/lib/mtr_cases.pm b/mysql-test/lib/mtr_cases.pm index 52872fa610f0a..dc385cb6c78e5 100644 --- a/mysql-test/lib/mtr_cases.pm +++ b/mysql-test/lib/mtr_cases.pm @@ -1036,7 +1036,7 @@ sub get_tags_from_file($$) { } # Check for a sourced include file. - if ($line =~ /^(--)?[[:space:]]*source[[:space:]]+([^;[:space:]]+)/) + if ($line =~ /^[[:space:]]*(--)?[[:space:]]*source[[:space:]]+([^;[:space:]]+)/) { my $include= $2; # The rules below must match open_file() function of mysqltest.cc From 1c43660aea349a92c13e04994941ded1b91714ff Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Mon, 14 Mar 2022 22:35:11 +0530 Subject: [PATCH 4/7] MDEV-28060 Online DDL fails while checking for instant alter condition - InnoDB fails to skip newly created column while checking for change column when table is in redundant row format. This issue is caused the MDEV-18035 (ccb1acbd3c15f0d99d1ea3cd1b206da38fa1c17f) --- mysql-test/suite/innodb/r/instant_alter_bugs.result | 9 +++++++++ mysql-test/suite/innodb/t/instant_alter_bugs.test | 9 +++++++++ storage/innobase/handler/handler0alter.cc | 7 ++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/innodb/r/instant_alter_bugs.result b/mysql-test/suite/innodb/r/instant_alter_bugs.result index e0457155d6a48..0c82ee8f0705d 100644 --- a/mysql-test/suite/innodb/r/instant_alter_bugs.result +++ b/mysql-test/suite/innodb/r/instant_alter_bugs.result @@ -448,4 +448,13 @@ ALTER TABLE t ADD d INT; affected rows: 0 info: Records: 0 Duplicates: 0 Warnings: 0 DROP TABLE t; +# +# MDEV-28060 Online DDL fails while checking for instant +# alter condition +# +CREATE TABLE t1(f1 CHAR(10) NOT NULL)ROW_FORMAT=REDUNDANT,ENGINE=InnoDB; +ALTER TABLE t1 ADD COLUMN(f2 INT NOT NULL, f3 INT NOT NULL, +f4 INT NOT NULL, f5 INT NOT NULL), +CHANGE COLUMN f1 f1 CHAR(10) DEFAULT NULL; +DROP TABLE t1; SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency; diff --git a/mysql-test/suite/innodb/t/instant_alter_bugs.test b/mysql-test/suite/innodb/t/instant_alter_bugs.test index 1ba6442c8605c..b22d4bbbae1dd 100644 --- a/mysql-test/suite/innodb/t/instant_alter_bugs.test +++ b/mysql-test/suite/innodb/t/instant_alter_bugs.test @@ -466,4 +466,13 @@ ALTER TABLE t ADD d INT; --disable_info DROP TABLE t; +--echo # +--echo # MDEV-28060 Online DDL fails while checking for instant +--echo # alter condition +--echo # +CREATE TABLE t1(f1 CHAR(10) NOT NULL)ROW_FORMAT=REDUNDANT,ENGINE=InnoDB; +ALTER TABLE t1 ADD COLUMN(f2 INT NOT NULL, f3 INT NOT NULL, + f4 INT NOT NULL, f5 INT NOT NULL), + CHANGE COLUMN f1 f1 CHAR(10) DEFAULT NULL; +DROP TABLE t1; SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency; diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index e812bfc49284f..e79d9d67dbf30 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -1795,8 +1795,13 @@ instant_alter_column_possible( Field** af = altered_table->field; Field** const end = altered_table->field + altered_table->s->fields; + List_iterator_fast cf_it( + ha_alter_info->alter_info->create_list); for (unsigned c = 0; af < end; af++) { - if (!(*af)->stored_in_db()) { + const Create_field* cf = cf_it++; + if (!cf->field || !(*af)->stored_in_db()) { + /* Ignore virtual or newly created + column */ continue; } From dafc5fb9c17637c6233e8a73c890f158d645a37c Mon Sep 17 00:00:00 2001 From: Hugo Wen Date: Fri, 4 Feb 2022 03:56:08 +0000 Subject: [PATCH 5/7] MDEV-27342: Fix issue of recovery failure using new server id Commit 6c39eaeb1 made the crash recovery dependent on server_id. The crash recovery could fail when restoring a new instance from original crashed data directory USING A NEW SERVER ID. The issue doesn't exist in previous major versions before 10.6. Root cause is when generating the input XID to be searched in the hash, server id is populated with the current server id. So if the server id changed when recovering, the XID couldn't be found in the hash due to server id doesn't match. This fix is to use original server id when creating the input XID object in function `xarecover_do_commit_or_rollback`. All new code of the whole pull request, including one or several files that are either new files or modified ones, are contributed under the BSD-new license. I am contributing on behalf of my employer Amazon Web Services, Inc. --- ...nlog_xa_recover_using_new_server_id.result | 17 +++++++ ...binlog_xa_recover_using_new_server_id.test | 44 +++++++++++++++++++ sql/handler.cc | 19 +++++--- sql/handler.h | 16 +++++-- 4 files changed, 85 insertions(+), 11 deletions(-) create mode 100644 mysql-test/suite/binlog/r/binlog_xa_recover_using_new_server_id.result create mode 100644 mysql-test/suite/binlog/t/binlog_xa_recover_using_new_server_id.test diff --git a/mysql-test/suite/binlog/r/binlog_xa_recover_using_new_server_id.result b/mysql-test/suite/binlog/r/binlog_xa_recover_using_new_server_id.result new file mode 100644 index 0000000000000..2495df1845f7f --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_xa_recover_using_new_server_id.result @@ -0,0 +1,17 @@ +========= Set server_id to 99 and prepare test table. +SET GLOBAL server_id= 99; +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +========= Crash the server. +SET SESSION debug_dbug="+d,crash_commit_after_log"; +INSERT INTO t1 VALUES (1, NULL); +Got one of the listed errors +========= Restart the server with default config file in which server_id= 1. +========= Check that recover succeeds and server is up. +connection default; +========= Check that all transactions are recovered. +SELECT a FROM t1 ORDER BY a; +a +1 +========= Cleanup. +connection default; +DROP TABLE t1; diff --git a/mysql-test/suite/binlog/t/binlog_xa_recover_using_new_server_id.test b/mysql-test/suite/binlog/t/binlog_xa_recover_using_new_server_id.test new file mode 100644 index 0000000000000..a7f2a206bae68 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_xa_recover_using_new_server_id.test @@ -0,0 +1,44 @@ +# This test verifies attempt to xa recover using a new server id that +# different from the transaction's original server_id. +# + +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_binlog_format_row.inc +# Valgrind does not work well with test that crashes the server +--source include/not_valgrind.inc + + +--echo ========= Set server_id to 99 and prepare test table. +SET GLOBAL server_id= 99; +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; + + +--echo ========= Crash the server. +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait-binlog_xa_recover_using_new_server_id.test +EOF +SET SESSION debug_dbug="+d,crash_commit_after_log"; +--error 2006,2013 +INSERT INTO t1 VALUES (1, NULL); + + +--echo ========= Restart the server with default config file in which server_id= 1. +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart-binlog_xa_recover_using_new_server_id.test +EOF + + +--echo ========= Check that recover succeeds and server is up. +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + + +--echo ========= Check that all transactions are recovered. +SELECT a FROM t1 ORDER BY a; + + +--echo ========= Cleanup. +connection default; +DROP TABLE t1; diff --git a/sql/handler.cc b/sql/handler.cc index 678284f087be6..d30c91f219563 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -2414,7 +2414,7 @@ struct xarecover_st */ static xid_recovery_member* xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root, - XID *full_xid_arg) + XID *full_xid_arg, decltype(::server_id) server_id_arg) { xid_recovery_member *member= (xid_recovery_member *) alloc_root(ptr_mem_root, sizeof(xid_recovery_member)); @@ -2428,7 +2428,7 @@ xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root, if (full_xid_arg) *xid_full= *full_xid_arg; - *member= xid_recovery_member(xid_arg, 1, false, xid_full); + *member= xid_recovery_member(xid_arg, 1, false, xid_full, server_id_arg); return my_hash_insert(hash_arg, (uchar*) member) ? NULL : member; @@ -2443,14 +2443,15 @@ xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root, */ static bool xid_member_replace(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root, - XID *full_xid_arg) + XID *full_xid_arg, + decltype(::server_id) server_id_arg) { xid_recovery_member* member; if ((member= (xid_recovery_member *) my_hash_search(hash_arg, (uchar *)& xid_arg, sizeof(xid_arg)))) member->in_engine_prepare++; else - member= xid_member_insert(hash_arg, xid_arg, ptr_mem_root, full_xid_arg); + member= xid_member_insert(hash_arg, xid_arg, ptr_mem_root, full_xid_arg, server_id_arg); return member == NULL; } @@ -2502,7 +2503,8 @@ static void xarecover_do_commit_or_rollback(handlerton *hton, Binlog_offset *ptr_commit_max= arg->binlog_coord; if (!member->full_xid) - x.set(member->xid); + // Populate xid using the server_id from original transaction + x.set(member->xid, member->server_id); else x= *member->full_xid; @@ -2658,9 +2660,12 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, */ if (info->mem_root) { - // remember "full" xid too when it's not in mysql format + // remember "full" xid too when it's not in mysql format. + // Also record the transaction's original server_id. It will be used for + // populating the input XID to be searched in hash. if (xid_member_replace(info->commit_list, x, info->mem_root, - is_server_xid? NULL : &info->list[i])) + is_server_xid? NULL : &info->list[i], + is_server_xid? info->list[i].get_trx_server_id() : server_id)) { info->error= true; sql_print_error("Error in memory allocation at xarecover_handlerton"); diff --git a/sql/handler.h b/sql/handler.h index 43a99a5d80fe2..0434d88aa9895 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -900,12 +900,13 @@ struct xid_t { if ((bqual_length= bl)) memcpy(data+gl, b, bl); } - void set(ulonglong xid) + // Populate server_id if it's specified, otherwise use the current server_id + void set(ulonglong xid, decltype(::server_id) trx_server_id= server_id) { my_xid tmp; formatID= 1; set(MYSQL_XID_PREFIX_LEN, 0, MYSQL_XID_PREFIX); - memcpy(data+MYSQL_XID_PREFIX_LEN, &server_id, sizeof(server_id)); + memcpy(data+MYSQL_XID_PREFIX_LEN, &trx_server_id, sizeof(trx_server_id)); tmp= xid; memcpy(data+MYSQL_XID_OFFSET, &tmp, sizeof(tmp)); gtrid_length=MYSQL_XID_GTRID_LEN; @@ -931,6 +932,12 @@ struct xid_t { !memcmp(data, MYSQL_XID_PREFIX, MYSQL_XID_PREFIX_LEN) ? quick_get_my_xid() : 0; } + decltype(::server_id) get_trx_server_id() + { + decltype(::server_id) trx_server_id; + memcpy(&trx_server_id, data+MYSQL_XID_PREFIX_LEN, sizeof(trx_server_id)); + return trx_server_id; + } uint length() { return static_cast(sizeof(formatID)) + key_length(); @@ -972,11 +979,12 @@ struct xid_recovery_member bool decided_to_commit; Binlog_offset binlog_coord; // semisync recovery binlog offset XID *full_xid; // needed by wsrep or past it recovery + decltype(::server_id) server_id; // server id of orginal server xid_recovery_member(my_xid xid_arg, uint prepare_arg, bool decided_arg, - XID *full_xid_arg) + XID *full_xid_arg, decltype(::server_id) server_id_arg) : xid(xid_arg), in_engine_prepare(prepare_arg), - decided_to_commit(decided_arg), full_xid(full_xid_arg) {}; + decided_to_commit(decided_arg), full_xid(full_xid_arg) , server_id(server_id_arg) {}; }; /* for recover() handlerton call */ From 00896db1c513ca8d71cbe0e8b56b699b43df1467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 15 Mar 2022 10:37:13 +0200 Subject: [PATCH 6/7] MDEV-25214 Crash in fil_space_t::try_to_close fil_space_t::try_to_close(): Tolerate a tablespace that has no data files attached. The function fil_ibd_create() initially creates and attaches a tablespace with no files, and invokes fil_space_t::add() later. fil_node_open_file(): After releasing and reacquiring fil_system.mutex, check if the file was already opened by another thread. This avoids an assertion failure !node->is_open() in fil_node_open_file_low(). These failures were reproduced with the test innodb.table_definition_cache_debug and the fix of MDEV-27985. --- storage/innobase/fil/fil0fil.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 39049f237e15c..64b4d6e7ea32f 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2021, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -89,7 +89,9 @@ bool fil_space_t::try_to_close(bool print_info) of fil_system.space_list, so that they would be less likely to be closed here. */ fil_node_t *node= UT_LIST_GET_FIRST(space->chain); - ut_ad(node); + if (!node) + /* fil_ibd_create() did not invoke fil_space_t::add() yet */ + continue; ut_ad(!UT_LIST_GET_NEXT(chain, node)); if (!node->is_open()) @@ -454,6 +456,8 @@ static bool fil_node_open_file(fil_node_t *node) /* Flush tablespaces so that we can close modified files. */ fil_flush_file_spaces(); mutex_enter(&fil_system.mutex); + if (node->is_open()) + return true; } } From 73fee39ea62037780c59161507e89dd76c10b7a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 15 Mar 2022 14:44:22 +0200 Subject: [PATCH 7/7] MDEV-27985 buf_flush_freed_pages() causes InnoDB to hang buf_flush_freed_pages(): Assert that neither buf_pool.mutex nor buf_pool.flush_list_mutex are held. Simplify the loops. Return the tablespace and the number of pages written or punched. buf_flush_LRU_list_batch(), buf_do_flush_list_batch(): Release buf_pool.mutex before invoking buf_flush_space(). buf_flush_list_space(): Acquire the mutexes only after invoking buf_flush_freed_pages(). Reviewed by: Thirunarayanan Balathandayuthapani --- storage/innobase/buf/buf0flu.cc | 103 ++++++++++++++++++++++---------- 1 file changed, 72 insertions(+), 31 deletions(-) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 60108af6cf6d0..81183d42f7c1d 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1046,42 +1046,52 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, return i; } -MY_ATTRIBUTE((nonnull)) +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Write punch-hole or zeroes of the freed ranges when innodb_immediate_scrub_data_uncompressed from the freed ranges. -@param space tablespace which may contain ranges of freed pages */ -static void buf_flush_freed_pages(fil_space_t *space) +@param space tablespace which may contain ranges of freed pages +@param writable whether the tablespace is writable +@return number of pages written or hole-punched */ +static uint32_t buf_flush_freed_pages(fil_space_t *space, bool writable) { const bool punch_hole= space->punch_hole; - if (!srv_immediate_scrub_data_uncompressed && !punch_hole) - return; - lsn_t flush_to_disk_lsn= log_sys.get_flushed_lsn(); + if (!punch_hole && !srv_immediate_scrub_data_uncompressed) + return 0; + + mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); + mysql_mutex_assert_not_owner(&buf_pool.mutex); - std::unique_lock freed_lock(space->freed_range_mutex); - if (space->freed_ranges.empty() - || flush_to_disk_lsn < space->get_last_freed_lsn()) + space->freed_range_mutex.lock(); + if (space->freed_ranges.empty() || + log_sys.get_flushed_lsn() < space->get_last_freed_lsn()) { - freed_lock.unlock(); - return; + space->freed_range_mutex.unlock(); + return 0; } + const unsigned physical_size{space->physical_size()}; + range_set freed_ranges= std::move(space->freed_ranges); - freed_lock.unlock(); + uint32_t written= 0; - for (const auto &range : freed_ranges) + if (!writable); + else if (punch_hole) { - const ulint physical_size= space->physical_size(); - - if (punch_hole) + for (const auto &range : freed_ranges) { + written+= range.last - range.first + 1; space->reacquire(); space->io(IORequest(IORequest::PUNCH_RANGE), os_offset_t{range.first} * physical_size, (range.last - range.first + 1) * physical_size, nullptr); } - else if (srv_immediate_scrub_data_uncompressed) + } + else + { + for (const auto &range : freed_ranges) { + written+= range.last - range.first + 1; for (os_offset_t i= range.first; i <= range.last; i++) { space->reacquire(); @@ -1090,8 +1100,10 @@ static void buf_flush_freed_pages(fil_space_t *space) const_cast(field_ref_zero)); } } - buf_pool.stat.n_pages_written+= (range.last - range.first + 1); } + + space->freed_range_mutex.unlock(); + return written; } /** Flushes to disk all flushable pages within the flush area @@ -1213,14 +1225,12 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max) /** Start writing out pages for a tablespace. @param id tablespace identifier -@return tablespace -@retval nullptr if the pages for this tablespace should be discarded */ -static fil_space_t *buf_flush_space(const uint32_t id) +@return tablespace and number of pages written */ +static std::pair buf_flush_space(const uint32_t id) { - fil_space_t *space= fil_space_t::get(id); - if (space) - buf_flush_freed_pages(space); - return space; + if (fil_space_t *space= fil_space_t::get(id)) + return {space, buf_flush_freed_pages(space, true)}; + return {nullptr, 0}; } struct flush_counters_t @@ -1288,6 +1298,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) n->flushed + n->evicted < max) || recv_recovery_is_on()); ++scanned) { + retry: buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage); const lsn_t oldest_modification= bpage->oldest_modification(); buf_pool.lru_hp.set(prev); @@ -1309,10 +1320,18 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) { if (last_space_id != space_id) { + buf_pool.lru_hp.set(bpage); + mysql_mutex_unlock(&buf_pool.mutex); if (space) space->release(); - space= buf_flush_space(space_id); + auto p= buf_flush_space(space_id); + space= p.first; last_space_id= space_id; + mysql_mutex_lock(&buf_pool.mutex); + if (p.second) + buf_pool.stat.n_pages_written+= p.second; + bpage= buf_pool.lru_hp.get(); + goto retry; } else ut_ad(!space); @@ -1455,10 +1474,28 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn) { if (last_space_id != space_id) { + mysql_mutex_lock(&buf_pool.flush_list_mutex); + buf_pool.flush_hp.set(bpage); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + mysql_mutex_unlock(&buf_pool.mutex); if (space) space->release(); - space= buf_flush_space(space_id); + auto p= buf_flush_space(space_id); + space= p.first; last_space_id= space_id; + mysql_mutex_lock(&buf_pool.mutex); + if (p.second) + buf_pool.stat.n_pages_written+= p.second; + mysql_mutex_lock(&buf_pool.flush_list_mutex); + bpage= buf_pool.flush_hp.get(); + if (!bpage) + break; + if (bpage->id() != page_id) + continue; + buf_pool.flush_hp.set(UT_LIST_GET_PREV(list, bpage)); + if (bpage->oldest_modification() <= 1 || !bpage->ready_for_flush()) + goto next; + mysql_mutex_unlock(&buf_pool.flush_list_mutex); } else ut_ad(!space); @@ -1486,6 +1523,7 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn) } mysql_mutex_lock(&buf_pool.flush_list_mutex); + next: bpage= buf_pool.flush_hp.get(); } @@ -1582,11 +1620,14 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed) bool may_have_skipped= false; ulint max_n_flush= srv_io_capacity; - mysql_mutex_lock(&buf_pool.mutex); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - bool acquired= space->acquire(); - buf_flush_freed_pages(space); + { + const uint32_t written{buf_flush_freed_pages(space, acquired)}; + mysql_mutex_lock(&buf_pool.mutex); + if (written) + buf_pool.stat.n_pages_written+= written; + } + mysql_mutex_lock(&buf_pool.flush_list_mutex); for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; ) {