Skip to content

Commit 6aa50ba

Browse files
committed
MDEV-16283 ALTER TABLE...DISCARD TABLESPACE still takes long on a large buffer pool
Also fixes MDEV-14727, MDEV-14491 InnoDB: Error: Waited for 5 secs for hash index ref_count (1) to drop to 0 by replacing the flawed wait logic in dict_index_remove_from_cache_low(). On DISCARD TABLESPACE, there is no need to drop the adaptive hash index. We must drop it on IMPORT TABLESPACE, and eventually on DROP TABLE or DROP INDEX. As long as the dict_index_t object remains in the cache and the table remains inaccessible, the adaptive hash index entries to orphaned pages would not do any harm. They would be dropped when buffer pool pages are reused for something else. btr_search_drop_page_hash_when_freed(), buf_LRU_drop_page_hash_batch(): Remove the parameter zip_size, and pass 0 to buf_page_get_gen(). buf_page_get_gen(): Ignore zip_size if mode==BUF_PEEK_IF_IN_POOL. buf_LRU_drop_page_hash_for_tablespace(): Drop the adaptive hash index even if the tablespace is inaccessible. buf_LRU_drop_page_hash_for_tablespace(): New global function, to drop the adaptive hash index. buf_LRU_flush_or_remove_pages(), fil_delete_tablespace(): Remove the parameter drop_ahi. dict_index_remove_from_cache_low(): Actively drop the adaptive hash index if entries exist. This should prevent InnoDB hangs on DROP TABLE or DROP INDEX. row_import_for_mysql(): Drop any adaptive hash index entries for the table. row_drop_table_for_mysql(): Drop any adaptive hash index for the table, except if the table resides in the system tablespace. (DISCARD TABLESPACE does not apply to the system tablespace, and we do no want to drop the adaptive hash index for other tables than the one that is being dropped.) row_truncate_table_for_mysql(): Drop any adaptive hash index entries for the table, except if the table resides in the system tablespace.
1 parent b7985a4 commit 6aa50ba

File tree

20 files changed

+178
-174
lines changed

20 files changed

+178
-174
lines changed

storage/innobase/btr/btr0sea.cc

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2008, Google Inc.
5+
Copyright (c) 2018, MariaDB Corporation.
56
67
Portions of this file contain modifications contributed and copyrighted by
78
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -1250,17 +1251,11 @@ btr_search_drop_page_hash_index(
12501251
mem_free(folds);
12511252
}
12521253

1253-
/********************************************************************//**
1254-
Drops a possible page hash index when a page is evicted from the buffer pool
1255-
or freed in a file segment. */
1254+
/** Drop possible adaptive hash index entries when a page is evicted
1255+
from the buffer pool or freed in a file, or the index is being dropped. */
12561256
UNIV_INTERN
12571257
void
1258-
btr_search_drop_page_hash_when_freed(
1259-
/*=================================*/
1260-
ulint space, /*!< in: space id */
1261-
ulint zip_size, /*!< in: compressed page size in bytes
1262-
or 0 for uncompressed pages */
1263-
ulint page_no) /*!< in: page number */
1258+
btr_search_drop_page_hash_when_freed(ulint space, ulint page_no)
12641259
{
12651260
buf_block_t* block;
12661261
mtr_t mtr;
@@ -1273,7 +1268,7 @@ btr_search_drop_page_hash_when_freed(
12731268
are possibly holding, we cannot s-latch the page, but must
12741269
(recursively) x-latch it, even though we are only reading. */
12751270

1276-
block = buf_page_get_gen(space, zip_size, page_no, RW_X_LATCH, NULL,
1271+
block = buf_page_get_gen(space, 0, page_no, RW_X_LATCH, NULL,
12771272
BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__,
12781273
&mtr);
12791274

storage/innobase/buf/buf0buf.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3075,17 +3075,18 @@ buf_page_get_gen(
30753075
#ifdef UNIV_DEBUG
30763076
switch (mode) {
30773077
case BUF_EVICT_IF_IN_POOL:
3078+
case BUF_PEEK_IF_IN_POOL:
30783079
/* After DISCARD TABLESPACE, the tablespace would not exist,
30793080
but in IMPORT TABLESPACE, PageConverter::operator() must
30803081
replace any old pages, which were not evicted during DISCARD.
3081-
Skip the assertion on zip_size. */
3082+
Similarly, btr_search_drop_page_hash_when_freed() must
3083+
remove any old pages. Skip the assertion on zip_size. */
30823084
break;
30833085
case BUF_GET_NO_LATCH:
30843086
ut_ad(rw_latch == RW_NO_LATCH);
30853087
/* fall through */
30863088
case BUF_GET:
30873089
case BUF_GET_IF_IN_POOL:
3088-
case BUF_PEEK_IF_IN_POOL:
30893090
case BUF_GET_IF_IN_POOL_OR_WATCH:
30903091
case BUF_GET_POSSIBLY_FREED:
30913092
ut_ad(zip_size == fil_space_get_zip_size(space));
@@ -3257,7 +3258,8 @@ buf_page_get_gen(
32573258

32583259
fix_mutex = buf_page_get_mutex(&fix_block->page);
32593260

3260-
ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
3261+
ut_ad(page_zip_get_size(&block->page.zip) == zip_size
3262+
|| mode == BUF_PEEK_IF_IN_POOL);
32613263

32623264
switch (mode) {
32633265
case BUF_GET_IF_IN_POOL:

storage/innobase/buf/buf0lru.cc

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,6 @@ void
241241
buf_LRU_drop_page_hash_batch(
242242
/*=========================*/
243243
ulint space_id, /*!< in: space id */
244-
ulint zip_size, /*!< in: compressed page size in bytes
245-
or 0 for uncompressed pages */
246244
const ulint* arr, /*!< in: array of page_no */
247245
ulint count) /*!< in: number of entries in array */
248246
{
@@ -252,8 +250,7 @@ buf_LRU_drop_page_hash_batch(
252250
ut_ad(count <= BUF_LRU_DROP_SEARCH_SIZE);
253251

254252
for (i = 0; i < count; ++i) {
255-
btr_search_drop_page_hash_when_freed(space_id, zip_size,
256-
arr[i]);
253+
btr_search_drop_page_hash_when_freed(space_id, arr[i]);
257254
}
258255
}
259256

@@ -272,15 +269,6 @@ buf_LRU_drop_page_hash_for_tablespace(
272269
buf_page_t* bpage;
273270
ulint* page_arr;
274271
ulint num_entries;
275-
ulint zip_size;
276-
277-
zip_size = fil_space_get_zip_size(id);
278-
279-
if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
280-
/* Somehow, the tablespace does not exist. Nothing to drop. */
281-
ut_ad(0);
282-
return;
283-
}
284272

285273
page_arr = static_cast<ulint*>(ut_malloc(
286274
sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE));
@@ -333,8 +321,7 @@ buf_LRU_drop_page_hash_for_tablespace(
333321
the latching order. */
334322
buf_pool_mutex_exit(buf_pool);
335323

336-
buf_LRU_drop_page_hash_batch(
337-
id, zip_size, page_arr, num_entries);
324+
buf_LRU_drop_page_hash_batch(id, page_arr, num_entries);
338325

339326
num_entries = 0;
340327

@@ -365,10 +352,32 @@ buf_LRU_drop_page_hash_for_tablespace(
365352
buf_pool_mutex_exit(buf_pool);
366353

367354
/* Drop any remaining batch of search hashed pages. */
368-
buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
355+
buf_LRU_drop_page_hash_batch(id, page_arr, num_entries);
369356
ut_free(page_arr);
370357
}
371358

359+
/** Drop the adaptive hash index for a tablespace.
360+
@param[in,out] table table */
361+
UNIV_INTERN void buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table)
362+
{
363+
for (dict_index_t* index = dict_table_get_first_index(table);
364+
index != NULL;
365+
index = dict_table_get_next_index(index)) {
366+
if (btr_search_info_get_ref_count(
367+
btr_search_get_info(index))) {
368+
goto drop_ahi;
369+
}
370+
}
371+
372+
return;
373+
drop_ahi:
374+
ulint id = table->space;
375+
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
376+
buf_LRU_drop_page_hash_for_tablespace(buf_pool_from_array(i),
377+
id);
378+
}
379+
}
380+
372381
/******************************************************************//**
373382
While flushing (or removing dirty) pages from a tablespace we don't
374383
want to hog the CPU and resources. Release the buffer pool and block
@@ -675,18 +684,11 @@ buf_flush_dirty_pages(buf_pool_t* buf_pool, ulint id, const trx_t* trx)
675684
/** Empty the flush list for all pages belonging to a tablespace.
676685
@param[in] id tablespace identifier
677686
@param[in] trx transaction, for checking for user interrupt;
678-
or NULL if nothing is to be written
679-
@param[in] drop_ahi whether to drop the adaptive hash index */
680-
UNIV_INTERN
681-
void
682-
buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi)
687+
or NULL if nothing is to be written */
688+
UNIV_INTERN void buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx)
683689
{
684690
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
685-
buf_pool_t* buf_pool = buf_pool_from_array(i);
686-
if (drop_ahi) {
687-
buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
688-
}
689-
buf_flush_dirty_pages(buf_pool, id, trx);
691+
buf_flush_dirty_pages(buf_pool_from_array(i), id, trx);
690692
}
691693

692694
if (trx && !trx_is_interrupted(trx)) {

storage/innobase/dict/dict0dict.cc

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2012, Facebook Inc.
5-
Copyright (c) 2013, 2017, MariaDB Corporation.
5+
Copyright (c) 2013, 2018, MariaDB Corporation.
66
77
This program is free software; you can redistribute it and/or modify it under
88
the terms of the GNU General Public License as published by the Free Software
@@ -1674,7 +1674,7 @@ dict_table_rename_in_cache(
16741674
filepath = fil_make_ibd_name(table->name, false);
16751675
}
16761676

1677-
fil_delete_tablespace(table->space, true);
1677+
fil_delete_tablespace(table->space);
16781678

16791679
/* Delete any temp file hanging around. */
16801680
if (os_file_status(filepath, &exists, &ftype)
@@ -2719,35 +2719,13 @@ dict_index_remove_from_cache_low(
27192719
zero. See also: dict_table_can_be_evicted() */
27202720

27212721
do {
2722-
ulint ref_count = btr_search_info_get_ref_count(info);
2723-
2724-
if (ref_count == 0) {
2722+
if (!btr_search_info_get_ref_count(info)) {
27252723
break;
27262724
}
27272725

2728-
/* Sleep for 10ms before trying again. */
2729-
os_thread_sleep(10000);
2730-
++retries;
2731-
2732-
if (retries % 500 == 0) {
2733-
/* No luck after 5 seconds of wait. */
2734-
fprintf(stderr, "InnoDB: Error: Waited for"
2735-
" %lu secs for hash index"
2736-
" ref_count (%lu) to drop"
2737-
" to 0.\n"
2738-
"index: \"%s\""
2739-
" table: \"%s\"\n",
2740-
retries/100,
2741-
ref_count,
2742-
index->name,
2743-
table->name);
2744-
}
2726+
buf_LRU_drop_page_hash_for_tablespace(table);
27452727

2746-
/* To avoid a hang here we commit suicide if the
2747-
ref_count doesn't drop to zero in 600 seconds. */
2748-
if (retries >= 60000) {
2749-
ut_error;
2750-
}
2728+
ut_a(++retries < 10000);
27512729
} while (srv_shutdown_state == SRV_SHUTDOWN_NONE || !lru_evict);
27522730

27532731
rw_lock_free(&index->lock);

storage/innobase/fil/fil0fil.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2891,7 +2891,7 @@ fil_delete_tablespace(ulint id, bool drop_ahi)
28912891
To deal with potential read requests by checking the
28922892
::stop_new_ops flag in fil_io() */
28932893

2894-
buf_LRU_flush_or_remove_pages(id, NULL, drop_ahi);
2894+
buf_LRU_flush_or_remove_pages(id, NULL);
28952895

28962896
#endif /* !UNIV_HOTBACKUP */
28972897

@@ -3002,7 +3002,7 @@ fil_discard_tablespace(
30023002
{
30033003
dberr_t err;
30043004

3005-
switch (err = fil_delete_tablespace(id, true)) {
3005+
switch (err = fil_delete_tablespace(id)) {
30063006
case DB_SUCCESS:
30073007
break;
30083008

storage/innobase/fsp/fsp0fsp.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22
33
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
4-
Copyright (c) 2017, MariaDB Corporation.
4+
Copyright (c) 2017, 2018, MariaDB Corporation.
55
66
This program is free software; you can redistribute it and/or modify it under
77
the terms of the GNU General Public License as published by the Free Software
@@ -3027,7 +3027,7 @@ fseg_free_page_low(
30273027
/* Drop search system page hash index if the page is found in
30283028
the pool and is hashed */
30293029

3030-
btr_search_drop_page_hash_when_freed(space, zip_size, page);
3030+
btr_search_drop_page_hash_when_freed(space, page);
30313031

30323032
descr = xdes_get_descriptor(space, zip_size, page, mtr);
30333033

@@ -3247,7 +3247,7 @@ fseg_free_extent(
32473247
found in the pool and is hashed */
32483248

32493249
btr_search_drop_page_hash_when_freed(
3250-
space, zip_size, first_page_in_extent + i);
3250+
space, first_page_in_extent + i);
32513251
}
32523252
}
32533253

storage/innobase/include/btr0sea.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/*****************************************************************************
22
33
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4+
Copyright (c) 2018, MariaDB Corporation.
45
56
This program is free software; you can redistribute it and/or modify it under
67
the terms of the GNU General Public License as published by the Free Software
@@ -141,17 +142,11 @@ btr_search_drop_page_hash_index(
141142
s- or x-latched, or an index page
142143
for which we know that
143144
block->buf_fix_count == 0 */
144-
/********************************************************************//**
145-
Drops a possible page hash index when a page is evicted from the buffer pool
146-
or freed in a file segment. */
145+
/** Drop possible adaptive hash index entries when a page is evicted
146+
from the buffer pool or freed in a file, or the index is being dropped. */
147147
UNIV_INTERN
148148
void
149-
btr_search_drop_page_hash_when_freed(
150-
/*=================================*/
151-
ulint space, /*!< in: space id */
152-
ulint zip_size, /*!< in: compressed page size in bytes
153-
or 0 for uncompressed pages */
154-
ulint page_no); /*!< in: page number */
149+
btr_search_drop_page_hash_when_freed(ulint space, ulint page_no);
155150
/********************************************************************//**
156151
Updates the page hash index when a single record is inserted on a page. */
157152
UNIV_INTERN

storage/innobase/include/buf0lru.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22
33
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
4-
Copyright (c) 2017, MariaDB Corporation.
4+
Copyright (c) 2017, 2018, MariaDB Corporation.
55
66
This program is free software; you can redistribute it and/or modify it under
77
the terms of the GNU General Public License as published by the Free Software
@@ -34,6 +34,7 @@ Created 11/5/1995 Heikki Tuuri
3434

3535
// Forward declaration
3636
struct trx_t;
37+
struct dict_table_t;
3738

3839
/******************************************************************//**
3940
Returns TRUE if less than 25 % of the buffer pool is available. This can be
@@ -52,14 +53,15 @@ These are low-level functions
5253
/** Minimum LRU list length for which the LRU_old pointer is defined */
5354
#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
5455

56+
/** Drop the adaptive hash index for a tablespace.
57+
@param[in,out] table table */
58+
UNIV_INTERN void buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table);
59+
5560
/** Empty the flush list for all pages belonging to a tablespace.
5661
@param[in] id tablespace identifier
5762
@param[in] trx transaction, for checking for user interrupt;
58-
or NULL if nothing is to be written
59-
@param[in] drop_ahi whether to drop the adaptive hash index */
60-
UNIV_INTERN
61-
void
62-
buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi=false);
63+
or NULL if nothing is to be written */
64+
UNIV_INTERN void buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx);
6365

6466
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
6567
/********************************************************************//**

storage/innobase/row/row0import.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Created 2012-02-08 by Sunny Bains.
3131
#endif
3232

3333
#include "btr0pcur.h"
34+
#include "btr0sea.h"
3435
#include "que0que.h"
3536
#include "dict0boot.h"
3637
#include "ibuf0ibuf.h"
@@ -3983,6 +3984,17 @@ row_import_for_mysql(
39833984
return(row_import_cleanup(prebuilt, trx, err));
39843985
}
39853986

3987+
/* On DISCARD TABLESPACE, we did not drop any adaptive hash
3988+
index entries. If we replaced the discarded tablespace with a
3989+
smaller one here, there could still be some adaptive hash
3990+
index entries that point to cached garbage pages in the buffer
3991+
pool, because PageConverter::operator() only evicted those
3992+
pages that were replaced by the imported pages. We must
3993+
discard all remaining adaptive hash index entries, because the
3994+
adaptive hash index must be a subset of the table contents;
3995+
false positives are not tolerated. */
3996+
buf_LRU_drop_page_hash_for_tablespace(table);
3997+
39863998
row_mysql_lock_data_dictionary(trx);
39873999

39884000
/* If the table is stored in a remote tablespace, we need to

storage/innobase/row/row0mysql.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3516,6 +3516,8 @@ row_truncate_table_for_mysql(
35163516
fil_space_release(space);
35173517
}
35183518

3519+
buf_LRU_drop_page_hash_for_tablespace(table);
3520+
35193521
if (flags != ULINT_UNDEFINED
35203522
&& fil_discard_tablespace(space_id) == DB_SUCCESS) {
35213523

@@ -4209,6 +4211,21 @@ row_drop_table_for_mysql(
42094211
rw_lock_x_unlock(dict_index_get_lock(index));
42104212
}
42114213

4214+
if (table->space != TRX_SYS_SPACE) {
4215+
/* On DISCARD TABLESPACE, we would not drop the
4216+
adaptive hash index entries. If the tablespace is
4217+
missing here, delete-marking the record in SYS_INDEXES
4218+
would not free any pages in the buffer pool. Thus,
4219+
dict_index_remove_from_cache() would hang due to
4220+
adaptive hash index entries existing in the buffer
4221+
pool. To prevent this hang, and also to guarantee
4222+
that btr_search_drop_page_hash_when_freed() will avoid
4223+
calling btr_search_drop_page_hash_index() while we
4224+
hold the InnoDB dictionary lock, we will drop any
4225+
adaptive hash index entries upfront. */
4226+
buf_LRU_drop_page_hash_for_tablespace(table);
4227+
}
4228+
42124229
/* We use the private SQL parser of Innobase to generate the
42134230
query graphs needed in deleting the dictionary data from system
42144231
tables in Innobase. Deleting a row from SYS_INDEXES table also

0 commit comments

Comments
 (0)