Skip to content

Commit bd5a640

Browse files
committed
MDEV-26033: Race condition between buf_pool.page_hash and resize()
The replacement of buf_pool.page_hash with a different type of hash table in commit 5155a30 (MDEV-22871) introduced a race condition with buffer pool resizing. We have an execution trace where buf_pool.page_hash.array is changed to point to something else while page_hash_latch::read_lock() is executing. The same should also affect page_hash_latch::write_lock(). We fix the race condition by never resizing (and reallocating) the buf_pool.page_hash. We assume that resizing the buffer pool is a rare operation. Yes, there might be a performance regression if a server is first started up with a tiny buffer pool, which is later enlarged. In that case, the tiny buf_pool.page_hash.array could cause increased use of the hash bucket lists. That problem can be worked around by initially starting up the server with a larger buffer pool and then shrinking that, until changing to a larger size again. buf_pool_t::resize_hash(): Remove. buf_pool_t::page_hash_table::lock(): Do not attempt to deal with hash table resizing. If we really wanted that in a safe manner, we would probably have to introduce a global rw-lock around the operation, or at the very least, poll buf_pool.resizing, both of which would be detrimental to performance.
1 parent 7792628 commit bd5a640

File tree

2 files changed

+6
-113
lines changed

2 files changed

+6
-113
lines changed

storage/innobase/buf/buf0buf.cc

Lines changed: 2 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,13 +1536,6 @@ void buf_pool_t::close()
15361536
ut_free(chunks);
15371537
chunks= nullptr;
15381538
page_hash.free();
1539-
while (page_hash_table *old_page_hash= freed_page_hash)
1540-
{
1541-
freed_page_hash= static_cast<page_hash_table*>
1542-
(old_page_hash->array[1].node);
1543-
old_page_hash->free();
1544-
UT_DELETE(old_page_hash);
1545-
}
15461539
zip_hash.free();
15471540

15481541
io_buf.close();
@@ -1829,57 +1822,6 @@ inline bool buf_pool_t::withdraw_blocks()
18291822
return(false);
18301823
}
18311824

1832-
/** resize page_hash and zip_hash */
1833-
inline void buf_pool_t::resize_hash()
1834-
{
1835-
page_hash_table *new_page_hash= UT_NEW_NOKEY(page_hash_table());
1836-
new_page_hash->create(2 * buf_pool.curr_size);
1837-
new_page_hash->write_lock_all();
1838-
1839-
for (auto i= page_hash.pad(page_hash.n_cells); i--; )
1840-
{
1841-
static_assert(!((page_hash_table::ELEMENTS_PER_LATCH + 1) &
1842-
page_hash_table::ELEMENTS_PER_LATCH),
1843-
"must be one less than a power of 2");
1844-
if (!(i & page_hash_table::ELEMENTS_PER_LATCH))
1845-
{
1846-
ut_ad(reinterpret_cast<page_hash_latch*>
1847-
(&page_hash.array[i])->is_write_locked());
1848-
continue;
1849-
}
1850-
while (buf_page_t *bpage= static_cast<buf_page_t*>
1851-
(page_hash.array[i].node))
1852-
{
1853-
ut_ad(bpage->in_page_hash);
1854-
const ulint fold= bpage->id().fold();
1855-
HASH_DELETE(buf_page_t, hash, &buf_pool.page_hash, fold, bpage);
1856-
HASH_INSERT(buf_page_t, hash, new_page_hash, fold, bpage);
1857-
}
1858-
}
1859-
1860-
buf_pool.page_hash.array[1].node= freed_page_hash;
1861-
std::swap(buf_pool.page_hash, *new_page_hash);
1862-
freed_page_hash= new_page_hash;
1863-
1864-
/* recreate zip_hash */
1865-
hash_table_t new_hash;
1866-
new_hash.create(2 * buf_pool.curr_size);
1867-
1868-
for (ulint i= 0; i < buf_pool.zip_hash.n_cells; i++)
1869-
{
1870-
while (buf_page_t *bpage= static_cast<buf_page_t*>
1871-
(HASH_GET_FIRST(&buf_pool.zip_hash, i)))
1872-
{
1873-
const ulint fold= BUF_POOL_ZIP_FOLD_BPAGE(bpage);
1874-
HASH_DELETE(buf_page_t, hash, &buf_pool.zip_hash, fold, bpage);
1875-
HASH_INSERT(buf_page_t, hash, &new_hash, fold, bpage);
1876-
}
1877-
}
1878-
1879-
std::swap(buf_pool.zip_hash.array, new_hash.array);
1880-
buf_pool.zip_hash.n_cells= new_hash.n_cells;
1881-
new_hash.free();
1882-
}
18831825

18841826

18851827
inline void buf_pool_t::page_hash_table::write_lock_all()
@@ -1904,26 +1846,6 @@ inline void buf_pool_t::page_hash_table::write_unlock_all()
19041846
}
19051847

19061848

1907-
inline void buf_pool_t::write_lock_all_page_hash()
1908-
{
1909-
mysql_mutex_assert_owner(&mutex);
1910-
page_hash.write_lock_all();
1911-
for (page_hash_table *old_page_hash= freed_page_hash; old_page_hash;
1912-
old_page_hash= static_cast<page_hash_table*>
1913-
(old_page_hash->array[1].node))
1914-
old_page_hash->write_lock_all();
1915-
}
1916-
1917-
1918-
inline void buf_pool_t::write_unlock_all_page_hash()
1919-
{
1920-
page_hash.write_unlock_all();
1921-
for (page_hash_table *old_page_hash= freed_page_hash; old_page_hash;
1922-
old_page_hash= static_cast<page_hash_table*>
1923-
(old_page_hash->array[1].node))
1924-
old_page_hash->write_unlock_all();
1925-
}
1926-
19271849
namespace
19281850
{
19291851

@@ -2097,7 +2019,7 @@ inline void buf_pool_t::resize()
20972019
resizing.store(true, std::memory_order_relaxed);
20982020

20992021
mysql_mutex_lock(&mutex);
2100-
write_lock_all_page_hash();
2022+
page_hash.write_lock_all();
21012023

21022024
chunk_t::map_reg = UT_NEW_NOKEY(chunk_t::map());
21032025

@@ -2252,16 +2174,8 @@ inline void buf_pool_t::resize()
22522174
= srv_buf_pool_base_size > srv_buf_pool_size * 2
22532175
|| srv_buf_pool_base_size * 2 < srv_buf_pool_size;
22542176

2255-
/* Normalize page_hash and zip_hash,
2256-
if the new size is too different */
2257-
if (!warning && new_size_too_diff) {
2258-
buf_resize_status("Resizing hash table");
2259-
resize_hash();
2260-
ib::info() << "hash tables were resized";
2261-
}
2262-
22632177
mysql_mutex_unlock(&mutex);
2264-
write_unlock_all_page_hash();
2178+
page_hash.write_unlock_all();
22652179

22662180
UT_DELETE(chunk_map_old);
22672181

storage/innobase/include/buf0buf.h

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1895,22 +1895,14 @@ class buf_pool_t
18951895
page_hash_latch *lock_get(ulint fold) const
18961896
{ return lock_get(fold, n_cells); }
18971897

1898-
/** Acquire an array latch, tolerating concurrent buf_pool_t::resize()
1898+
/** Acquire an array latch.
18991899
@tparam exclusive whether the latch is to be acquired exclusively
19001900
@param fold hash bucket key */
19011901
template<bool exclusive> page_hash_latch *lock(ulint fold)
19021902
{
1903-
for (;;)
1904-
{
1905-
auto n= n_cells;
1906-
page_hash_latch *latch= lock_get(fold, n);
1907-
latch->acquire<exclusive>();
1908-
/* Our latch prevents n_cells from changing. */
1909-
if (UNIV_LIKELY(n == n_cells))
1910-
return latch;
1911-
/* Retry, because buf_pool_t::resize_hash() affected us. */
1912-
latch->release<exclusive>();
1913-
}
1903+
page_hash_latch *latch= lock_get(fold, n_cells);
1904+
latch->acquire<exclusive>();
1905+
return latch;
19141906
}
19151907

19161908
/** Exclusively aqcuire all latches */
@@ -1920,19 +1912,6 @@ class buf_pool_t
19201912
inline void write_unlock_all();
19211913
};
19221914

1923-
private:
1924-
/** Former page_hash that has been deleted during resize();
1925-
singly-linked list via freed_page_hash->array[1] */
1926-
page_hash_table *freed_page_hash;
1927-
1928-
/** Lock all page_hash, also freed_page_hash. */
1929-
inline void write_lock_all_page_hash();
1930-
/** Release all page_hash, also freed_page_hash. */
1931-
inline void write_unlock_all_page_hash();
1932-
/** Resize page_hash and zip_hash. */
1933-
inline void resize_hash();
1934-
1935-
public:
19361915
/** Hash table of file pages (buf_page_t::in_file() holds),
19371916
indexed by page_id_t. Protected by both mutex and page_hash.lock_get(). */
19381917
page_hash_table page_hash;

0 commit comments

Comments
 (0)