Skip to content

Commit

Permalink
MDEV-33585 follow-up optimization
Browse files Browse the repository at this point in the history
log_t: Define buf_size, max_buf_free as 32-bit and next_checkpoint_no
as byte (we only need a bit) and rearrange some data members,
so that on AMD64 we can fit log_sys.latch and log_sys.log in
the same 64-byte cache line.

mtr_t::commit_log(), mtr_t::commit_logger: A part of mtr_t::commit()
split into a separate function, so that we will not unnecessarily invoke
log_sys.get_write_target() when running on a memory-mapped log file,
or log_sys.is_pmem().

Reviewed by: Vladislav Vaintroub
Tested by: Matthias Leich
  • Loading branch information
dr-m committed Apr 9, 2024
1 parent 0892e6d commit 42bda68
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 131 deletions.
9 changes: 5 additions & 4 deletions storage/innobase/include/log0log.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,6 @@ struct log_t

/** Last written LSN */
lsn_t write_lsn;
/** recommended maximum buf_free size, after which the buffer is flushed */
size_t max_buf_free;

/** buffer for writing data to ib_logfile0, or nullptr if is_pmem()
In write_buf(), buf and flush_buf are swapped */
Expand All @@ -241,6 +239,10 @@ struct log_t
std::atomic<bool> need_checkpoint;
/** whether a checkpoint is pending; protected by latch.wr_lock() */
Atomic_relaxed<bool> checkpoint_pending;
/** next checkpoint number (protected by latch.wr_lock()) */
byte next_checkpoint_no;
/** recommended maximum buf_free size, after which the buffer is flushed */
unsigned max_buf_free;
/** Log sequence number when a log file overwrite (broken crash recovery)
was noticed. Protected by latch.wr_lock(). */
lsn_t overwrite_warned;
Expand All @@ -249,8 +251,6 @@ struct log_t
Atomic_relaxed<lsn_t> last_checkpoint_lsn;
/** next checkpoint LSN (protected by latch.wr_lock()) */
lsn_t next_checkpoint_lsn;
/** next checkpoint number (protected by latch.wr_lock()) */
ulint next_checkpoint_no;

/** Log file */
log_file_t log;
Expand Down Expand Up @@ -323,6 +323,7 @@ struct log_t
/** whether there is capacity in the log buffer */
bool buf_free_ok() const noexcept
{
ut_ad(!is_pmem());
return (buf_free.load(std::memory_order_relaxed) & ~buf_free_LOCK) <
max_buf_free;
}
Expand Down
9 changes: 9 additions & 0 deletions storage/innobase/include/mtr0mtr.h
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,13 @@ struct mtr_t {
/** Encrypt the log */
ATTRIBUTE_NOINLINE void encrypt();

/** Commit the mini-transaction log.
@tparam pmem log_sys.is_pmem()
@param mtr mini-transaction
@param lsns {start_lsn,flush_ahead} */
template<bool pmem>
static void commit_log(mtr_t *mtr, std::pair<lsn_t,page_flush_ahead> lsns);

/** Append the redo log records to the redo log buffer.
@return {start_lsn,flush_ahead} */
std::pair<lsn_t,page_flush_ahead> do_write();
Expand All @@ -708,6 +715,8 @@ struct mtr_t {
template<bool spin,bool pmem> static
std::pair<lsn_t,page_flush_ahead> finish_writer(mtr_t *mtr, size_t len);

/** The applicable variant of commit_log() */
static void (*commit_logger)(mtr_t *, std::pair<lsn_t,page_flush_ahead>);
/** The applicable variant of finish_writer() */
static std::pair<lsn_t,page_flush_ahead> (*finisher)(mtr_t *, size_t);

Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/log/log0log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ void log_t::attach_low(log_file_t file, os_offset_t size)
log.close();
mprotect(ptr, size_t(size), PROT_READ);
buf= static_cast<byte*>(ptr);
max_buf_free= size;
max_buf_free= 1;
# if defined __linux__ || defined _WIN32
set_block_size(CPU_LEVEL1_DCACHE_LINESIZE);
# endif
Expand Down
268 changes: 142 additions & 126 deletions storage/innobase/mtr/mtr0mtr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,26 @@ Created 11/26/1995 Heikki Tuuri
#include "mariadb_stats.h"
#include "my_cpu.h"

#ifdef HAVE_PMEM
void (*mtr_t::commit_logger)(mtr_t *, std::pair<lsn_t,page_flush_ahead>);
#endif
std::pair<lsn_t,mtr_t::page_flush_ahead> (*mtr_t::finisher)(mtr_t *, size_t);
unsigned mtr_t::spin_wait_delay;

void mtr_t::finisher_update()
{
ut_ad(log_sys.latch_have_wr());
finisher=
#ifdef HAVE_PMEM
log_sys.is_pmem()
? (spin_wait_delay
? mtr_t::finish_writer<true,true> : mtr_t::finish_writer<false,true>)
:
if (log_sys.is_pmem())
{
commit_logger= mtr_t::commit_log<true>;
finisher= spin_wait_delay
? mtr_t::finish_writer<true,true> : mtr_t::finish_writer<false,true>;
return;
}
commit_logger= mtr_t::commit_log<false>;
#endif
finisher=
(spin_wait_delay
? mtr_t::finish_writer<true,false> : mtr_t::finish_writer<false,false>);
}
Expand Down Expand Up @@ -336,7 +343,6 @@ inline lsn_t log_t::get_write_target() const
ut_ad(latch_have_any());
if (UNIV_LIKELY(buf_free_ok()))
return 0;
ut_ad(!is_pmem());
/* The LSN corresponding to the end of buf is
write_lsn - (first_lsn & 4095) + buf_free,
but we use simpler arithmetics to return a smaller write target in
Expand All @@ -345,151 +351,161 @@ inline lsn_t log_t::get_write_target() const
return write_lsn + max_buf_free / 2;
}

/** Commit a mini-transaction. */
void mtr_t::commit()
template<bool pmem>
void mtr_t::commit_log(mtr_t *mtr, std::pair<lsn_t,page_flush_ahead> lsns)
{
ut_ad(is_active());
ut_ad(!is_inside_ibuf());

/* This is a dirty read, for debugging. */
ut_ad(!m_modifications || !recv_no_log_write);
ut_ad(!m_modifications || m_log_mode != MTR_LOG_NONE);
ut_ad(!m_latch_ex);
size_t modified= 0;
const lsn_t write_lsn= pmem ? 0 : log_sys.get_write_target();

if (m_modifications && (m_log_mode == MTR_LOG_NO_REDO || !m_log.empty()))
if (mtr->m_made_dirty)
{
if (UNIV_UNLIKELY(!is_logged()))
auto it= mtr->m_memo.rbegin();

mysql_mutex_lock(&buf_pool.flush_list_mutex);

buf_page_t *const prev=
buf_pool.prepare_insert_into_flush_list(lsns.first);

while (it != mtr->m_memo.rend())
{
release_unlogged();
goto func_exit;
const mtr_memo_slot_t &slot= *it++;
if (slot.type & MTR_MEMO_MODIFY)
{
ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY ||
slot.type == MTR_MEMO_PAGE_SX_MODIFY);
modified++;
buf_block_t *b= static_cast<buf_block_t*>(slot.object);
ut_ad(b->page.id() < end_page_id);
ut_d(const auto s= b->page.state());
ut_ad(s > buf_page_t::FREED);
ut_ad(s < buf_page_t::READ_FIX);
ut_ad(mach_read_from_8(b->page.frame + FIL_PAGE_LSN) <=
mtr->m_commit_lsn);
mach_write_to_8(b->page.frame + FIL_PAGE_LSN, mtr->m_commit_lsn);
if (UNIV_LIKELY_NULL(b->page.zip.data))
memcpy_aligned<8>(FIL_PAGE_LSN + b->page.zip.data,
FIL_PAGE_LSN + b->page.frame, 8);
buf_pool.insert_into_flush_list(prev, b, lsns.first);
}
}

ut_ad(!srv_read_only_mode);
std::pair<lsn_t,page_flush_ahead> lsns{do_write()};
process_freed_pages();
size_t modified= 0;
const lsn_t write_lsn= log_sys.get_write_target();
ut_ad(modified);
buf_pool.flush_list_requests+= modified;
buf_pool.page_cleaner_wakeup();
mysql_mutex_unlock(&buf_pool.flush_list_mutex);

if (m_made_dirty)
if (mtr->m_latch_ex)
{
auto it= m_memo.rbegin();

mysql_mutex_lock(&buf_pool.flush_list_mutex);
log_sys.latch.wr_unlock();
mtr->m_latch_ex= false;
}
else
log_sys.latch.rd_unlock();

buf_page_t *const prev=
buf_pool.prepare_insert_into_flush_list(lsns.first);
mtr->release();
}
else
{
if (mtr->m_latch_ex)
{
log_sys.latch.wr_unlock();
mtr->m_latch_ex= false;
}
else
log_sys.latch.rd_unlock();

while (it != m_memo.rend())
{
const mtr_memo_slot_t &slot= *it++;
for (auto it= mtr->m_memo.rbegin(); it != mtr->m_memo.rend(); )
{
const mtr_memo_slot_t &slot= *it++;
ut_ad(slot.object);
switch (slot.type) {
case MTR_MEMO_S_LOCK:
static_cast<index_lock*>(slot.object)->s_unlock();
break;
case MTR_MEMO_SPACE_X_LOCK:
static_cast<fil_space_t*>(slot.object)->set_committed_size();
static_cast<fil_space_t*>(slot.object)->x_unlock();
break;
case MTR_MEMO_X_LOCK:
case MTR_MEMO_SX_LOCK:
static_cast<index_lock*>(slot.object)->
u_or_x_unlock(slot.type == MTR_MEMO_SX_LOCK);
break;
default:
buf_page_t *bpage= static_cast<buf_page_t*>(slot.object);
ut_d(const auto s=)
bpage->unfix();
if (slot.type & MTR_MEMO_MODIFY)
{
ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY ||
slot.type == MTR_MEMO_PAGE_SX_MODIFY);
modified++;
buf_block_t *b= static_cast<buf_block_t*>(slot.object);
ut_ad(b->page.id() < end_page_id);
ut_d(const auto s= b->page.state());
ut_ad(s > buf_page_t::FREED);
ut_ad(bpage->oldest_modification() > 1);
ut_ad(bpage->oldest_modification() < mtr->m_commit_lsn);
ut_ad(bpage->id() < end_page_id);
ut_ad(s >= buf_page_t::FREED);
ut_ad(s < buf_page_t::READ_FIX);
ut_ad(mach_read_from_8(b->page.frame + FIL_PAGE_LSN) <=
m_commit_lsn);
mach_write_to_8(b->page.frame + FIL_PAGE_LSN, m_commit_lsn);
if (UNIV_LIKELY_NULL(b->page.zip.data))
memcpy_aligned<8>(FIL_PAGE_LSN + b->page.zip.data,
FIL_PAGE_LSN + b->page.frame, 8);
buf_pool.insert_into_flush_list(prev, b, lsns.first);
ut_ad(mach_read_from_8(bpage->frame + FIL_PAGE_LSN) <=
mtr->m_commit_lsn);
mach_write_to_8(bpage->frame + FIL_PAGE_LSN, mtr->m_commit_lsn);
if (UNIV_LIKELY_NULL(bpage->zip.data))
memcpy_aligned<8>(FIL_PAGE_LSN + bpage->zip.data,
FIL_PAGE_LSN + bpage->frame, 8);
modified++;
}
switch (auto latch= slot.type & ~MTR_MEMO_MODIFY) {
case MTR_MEMO_PAGE_S_FIX:
bpage->lock.s_unlock();
continue;
case MTR_MEMO_PAGE_SX_FIX:
case MTR_MEMO_PAGE_X_FIX:
bpage->lock.u_or_x_unlock(latch == MTR_MEMO_PAGE_SX_FIX);
continue;
default:
ut_ad(latch == MTR_MEMO_BUF_FIX);
}
}
}

ut_ad(modified);
buf_pool.flush_list_requests+= modified;
buf_pool.page_cleaner_wakeup();
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
buf_pool.add_flush_list_requests(modified);
mtr->m_memo.clear();
}

if (m_latch_ex)
{
log_sys.latch.wr_unlock();
m_latch_ex= false;
}
else
log_sys.latch.rd_unlock();
mariadb_increment_pages_updated(modified);

release();
}
else
{
if (m_latch_ex)
{
log_sys.latch.wr_unlock();
m_latch_ex= false;
}
else
log_sys.latch.rd_unlock();
if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO))
buf_flush_ahead(mtr->m_commit_lsn, lsns.second == PAGE_FLUSH_SYNC);

for (auto it= m_memo.rbegin(); it != m_memo.rend(); )
{
const mtr_memo_slot_t &slot= *it++;
ut_ad(slot.object);
switch (slot.type) {
case MTR_MEMO_S_LOCK:
static_cast<index_lock*>(slot.object)->s_unlock();
break;
case MTR_MEMO_SPACE_X_LOCK:
static_cast<fil_space_t*>(slot.object)->set_committed_size();
static_cast<fil_space_t*>(slot.object)->x_unlock();
break;
case MTR_MEMO_X_LOCK:
case MTR_MEMO_SX_LOCK:
static_cast<index_lock*>(slot.object)->
u_or_x_unlock(slot.type == MTR_MEMO_SX_LOCK);
break;
default:
buf_page_t *bpage= static_cast<buf_page_t*>(slot.object);
ut_d(const auto s=)
bpage->unfix();
if (slot.type & MTR_MEMO_MODIFY)
{
ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY ||
slot.type == MTR_MEMO_PAGE_SX_MODIFY);
ut_ad(bpage->oldest_modification() > 1);
ut_ad(bpage->oldest_modification() < m_commit_lsn);
ut_ad(bpage->id() < end_page_id);
ut_ad(s >= buf_page_t::FREED);
ut_ad(s < buf_page_t::READ_FIX);
ut_ad(mach_read_from_8(bpage->frame + FIL_PAGE_LSN) <=
m_commit_lsn);
mach_write_to_8(bpage->frame + FIL_PAGE_LSN, m_commit_lsn);
if (UNIV_LIKELY_NULL(bpage->zip.data))
memcpy_aligned<8>(FIL_PAGE_LSN + bpage->zip.data,
FIL_PAGE_LSN + bpage->frame, 8);
modified++;
}
switch (auto latch= slot.type & ~MTR_MEMO_MODIFY) {
case MTR_MEMO_PAGE_S_FIX:
bpage->lock.s_unlock();
continue;
case MTR_MEMO_PAGE_SX_FIX:
case MTR_MEMO_PAGE_X_FIX:
bpage->lock.u_or_x_unlock(latch == MTR_MEMO_PAGE_SX_FIX);
continue;
default:
ut_ad(latch == MTR_MEMO_BUF_FIX);
}
}
}
if (!pmem && UNIV_UNLIKELY(write_lsn != 0))
log_write_up_to(write_lsn, false);
}

buf_pool.add_flush_list_requests(modified);
m_memo.clear();
}
/** Commit a mini-transaction. */
void mtr_t::commit()
{
ut_ad(is_active());
ut_ad(!is_inside_ibuf());

mariadb_increment_pages_updated(modified);
/* This is a dirty read, for debugging. */
ut_ad(!m_modifications || !recv_no_log_write);
ut_ad(!m_modifications || m_log_mode != MTR_LOG_NONE);
ut_ad(!m_latch_ex);

if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO))
buf_flush_ahead(m_commit_lsn, lsns.second == PAGE_FLUSH_SYNC);
if (m_modifications && (m_log_mode == MTR_LOG_NO_REDO || !m_log.empty()))
{
if (UNIV_UNLIKELY(!is_logged()))
{
release_unlogged();
goto func_exit;
}

if (UNIV_UNLIKELY(write_lsn != 0))
log_write_up_to(write_lsn, false);
ut_ad(!srv_read_only_mode);
std::pair<lsn_t,page_flush_ahead> lsns{do_write()};
process_freed_pages();
#ifdef HAVE_PMEM
commit_logger(this, lsns);
#else
commit_log<false>(this, lsns);
#endif
}
else
{
Expand Down

0 comments on commit 42bda68

Please sign in to comment.