Skip to content

Commit 73fee39

Browse files
committed
MDEV-27985 buf_flush_freed_pages() causes InnoDB to hang
buf_flush_freed_pages(): Assert that neither buf_pool.mutex nor buf_pool.flush_list_mutex are held. Simplify the loops. Return the tablespace and the number of pages written or punched. buf_flush_LRU_list_batch(), buf_do_flush_list_batch(): Release buf_pool.mutex before invoking buf_flush_space(). buf_flush_list_space(): Acquire the mutexes only after invoking buf_flush_freed_pages(). Reviewed by: Thirunarayanan Balathandayuthapani
1 parent 00896db commit 73fee39

File tree

1 file changed

+72
-31
lines changed

1 file changed

+72
-31
lines changed

storage/innobase/buf/buf0flu.cc

Lines changed: 72 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,42 +1046,52 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
10461046
return i;
10471047
}
10481048

1049-
MY_ATTRIBUTE((nonnull))
1049+
MY_ATTRIBUTE((nonnull, warn_unused_result))
10501050
/** Write punch-hole or zeroes of the freed ranges when
10511051
innodb_immediate_scrub_data_uncompressed from the freed ranges.
1052-
@param space tablespace which may contain ranges of freed pages */
1053-
static void buf_flush_freed_pages(fil_space_t *space)
1052+
@param space tablespace which may contain ranges of freed pages
1053+
@param writable whether the tablespace is writable
1054+
@return number of pages written or hole-punched */
1055+
static uint32_t buf_flush_freed_pages(fil_space_t *space, bool writable)
10541056
{
10551057
const bool punch_hole= space->punch_hole;
1056-
if (!srv_immediate_scrub_data_uncompressed && !punch_hole)
1057-
return;
1058-
lsn_t flush_to_disk_lsn= log_sys.get_flushed_lsn();
1058+
if (!punch_hole && !srv_immediate_scrub_data_uncompressed)
1059+
return 0;
1060+
1061+
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
1062+
mysql_mutex_assert_not_owner(&buf_pool.mutex);
10591063

1060-
std::unique_lock<std::mutex> freed_lock(space->freed_range_mutex);
1061-
if (space->freed_ranges.empty()
1062-
|| flush_to_disk_lsn < space->get_last_freed_lsn())
1064+
space->freed_range_mutex.lock();
1065+
if (space->freed_ranges.empty() ||
1066+
log_sys.get_flushed_lsn() < space->get_last_freed_lsn())
10631067
{
1064-
freed_lock.unlock();
1065-
return;
1068+
space->freed_range_mutex.unlock();
1069+
return 0;
10661070
}
10671071

1072+
const unsigned physical_size{space->physical_size()};
1073+
10681074
range_set freed_ranges= std::move(space->freed_ranges);
1069-
freed_lock.unlock();
1075+
uint32_t written= 0;
10701076

1071-
for (const auto &range : freed_ranges)
1077+
if (!writable);
1078+
else if (punch_hole)
10721079
{
1073-
const ulint physical_size= space->physical_size();
1074-
1075-
if (punch_hole)
1080+
for (const auto &range : freed_ranges)
10761081
{
1082+
written+= range.last - range.first + 1;
10771083
space->reacquire();
10781084
space->io(IORequest(IORequest::PUNCH_RANGE),
10791085
os_offset_t{range.first} * physical_size,
10801086
(range.last - range.first + 1) * physical_size,
10811087
nullptr);
10821088
}
1083-
else if (srv_immediate_scrub_data_uncompressed)
1089+
}
1090+
else
1091+
{
1092+
for (const auto &range : freed_ranges)
10841093
{
1094+
written+= range.last - range.first + 1;
10851095
for (os_offset_t i= range.first; i <= range.last; i++)
10861096
{
10871097
space->reacquire();
@@ -1090,8 +1100,10 @@ static void buf_flush_freed_pages(fil_space_t *space)
10901100
const_cast<byte*>(field_ref_zero));
10911101
}
10921102
}
1093-
buf_pool.stat.n_pages_written+= (range.last - range.first + 1);
10941103
}
1104+
1105+
space->freed_range_mutex.unlock();
1106+
return written;
10951107
}
10961108

10971109
/** Flushes to disk all flushable pages within the flush area
@@ -1213,14 +1225,12 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max)
12131225

12141226
/** Start writing out pages for a tablespace.
12151227
@param id tablespace identifier
1216-
@return tablespace
1217-
@retval nullptr if the pages for this tablespace should be discarded */
1218-
static fil_space_t *buf_flush_space(const uint32_t id)
1228+
@return tablespace and number of pages written */
1229+
static std::pair<fil_space_t*, uint32_t> buf_flush_space(const uint32_t id)
12191230
{
1220-
fil_space_t *space= fil_space_t::get(id);
1221-
if (space)
1222-
buf_flush_freed_pages(space);
1223-
return space;
1231+
if (fil_space_t *space= fil_space_t::get(id))
1232+
return {space, buf_flush_freed_pages(space, true)};
1233+
return {nullptr, 0};
12241234
}
12251235

12261236
struct flush_counters_t
@@ -1288,6 +1298,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
12881298
n->flushed + n->evicted < max) ||
12891299
recv_recovery_is_on()); ++scanned)
12901300
{
1301+
retry:
12911302
buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage);
12921303
const lsn_t oldest_modification= bpage->oldest_modification();
12931304
buf_pool.lru_hp.set(prev);
@@ -1309,10 +1320,18 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
13091320
{
13101321
if (last_space_id != space_id)
13111322
{
1323+
buf_pool.lru_hp.set(bpage);
1324+
mysql_mutex_unlock(&buf_pool.mutex);
13121325
if (space)
13131326
space->release();
1314-
space= buf_flush_space(space_id);
1327+
auto p= buf_flush_space(space_id);
1328+
space= p.first;
13151329
last_space_id= space_id;
1330+
mysql_mutex_lock(&buf_pool.mutex);
1331+
if (p.second)
1332+
buf_pool.stat.n_pages_written+= p.second;
1333+
bpage= buf_pool.lru_hp.get();
1334+
goto retry;
13161335
}
13171336
else
13181337
ut_ad(!space);
@@ -1455,10 +1474,28 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
14551474
{
14561475
if (last_space_id != space_id)
14571476
{
1477+
mysql_mutex_lock(&buf_pool.flush_list_mutex);
1478+
buf_pool.flush_hp.set(bpage);
1479+
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
1480+
mysql_mutex_unlock(&buf_pool.mutex);
14581481
if (space)
14591482
space->release();
1460-
space= buf_flush_space(space_id);
1483+
auto p= buf_flush_space(space_id);
1484+
space= p.first;
14611485
last_space_id= space_id;
1486+
mysql_mutex_lock(&buf_pool.mutex);
1487+
if (p.second)
1488+
buf_pool.stat.n_pages_written+= p.second;
1489+
mysql_mutex_lock(&buf_pool.flush_list_mutex);
1490+
bpage= buf_pool.flush_hp.get();
1491+
if (!bpage)
1492+
break;
1493+
if (bpage->id() != page_id)
1494+
continue;
1495+
buf_pool.flush_hp.set(UT_LIST_GET_PREV(list, bpage));
1496+
if (bpage->oldest_modification() <= 1 || !bpage->ready_for_flush())
1497+
goto next;
1498+
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
14621499
}
14631500
else
14641501
ut_ad(!space);
@@ -1486,6 +1523,7 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
14861523
}
14871524

14881525
mysql_mutex_lock(&buf_pool.flush_list_mutex);
1526+
next:
14891527
bpage= buf_pool.flush_hp.get();
14901528
}
14911529

@@ -1582,11 +1620,14 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed)
15821620
bool may_have_skipped= false;
15831621
ulint max_n_flush= srv_io_capacity;
15841622

1585-
mysql_mutex_lock(&buf_pool.mutex);
1586-
mysql_mutex_lock(&buf_pool.flush_list_mutex);
1587-
15881623
bool acquired= space->acquire();
1589-
buf_flush_freed_pages(space);
1624+
{
1625+
const uint32_t written{buf_flush_freed_pages(space, acquired)};
1626+
mysql_mutex_lock(&buf_pool.mutex);
1627+
if (written)
1628+
buf_pool.stat.n_pages_written+= written;
1629+
}
1630+
mysql_mutex_lock(&buf_pool.flush_list_mutex);
15901631

15911632
for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; )
15921633
{

0 commit comments

Comments
 (0)