Skip to content

Commit

Permalink
MDEV-12353: Replace MLOG_*LIST_*_DELETE and MLOG_*REC_DELETE
Browse files Browse the repository at this point in the history
No longer write the following redo log records:
MLOG_COMP_LIST_END_DELETE, MLOG_LIST_END_DELETE,
MLOG_COMP_LIST_START_DELETE, MLOG_LIST_START_DELETE,
MLOG_REC_DELETE,MLOG_COMP_REC_DELETE.

Each individual deleted record will be logged separately
using physical log records.

page_dir_slot_set_n_owned(),
page_zip_rec_set_owned(), page_zip_dir_delete(), page_zip_clear_rec():
Add the parameter mtr, and write redo log.

page_dir_slot_set_rec(): Remove. Replaced with lower-level operations
that write redo log when necessary.

page_rec_set_n_owned(): Replaces rec_set_n_owned_old(),
rec_set_n_owned_new().

rec_set_heap_no(): Replaces rec_set_heap_no_old(), rec_set_heap_no_new().

page_mem_free(), page_dir_split_slot(), page_dir_balance_slot():
Add the parameter mtr.

page_dir_set_n_slots(): Merge with the caller page_dir_split_slot().

page_dir_slot_set_rec(): Merge with the callers page_dir_split_slot()
and page_dir_balance_slot().

page_cur_insert_rec_low(), page_cur_insert_rec_zip():
Suppress the logging of lower-level operations.

page_cur_delete_rec_write_log(): Remove.

page_cur_delete_rec(): Do not tolerate mtr=NULL.

rec_convert_dtuple_to_rec_old(), rec_convert_dtuple_to_rec_comp():
Replace rec_set_heap_no_old() and rec_set_heap_no_new() with direct
access that does not involve redo logging.

mtr_t::memcpy(): Do allow non-redo-logged writes to uncompressed pages
of ROW_FORMAT=COMPRESSED pages.

buf_page_io_complete(): Evict the uncompressed page of
a ROW_FORMAT=COMPRESSED page after recovery. Because we no longer
write logical log records for deleting index records, but instead
write physical records that may refer directly to the compressed
page frame of a ROW_FORMAT=COMPRESSED page, and because on recovery
we will only apply the changes to the ROW_FORMAT=COMPRESSED page,
the uncompressed page frame can be stale until page_zip_decompress()
is executed.

recv_parse_or_apply_log_rec_body(): After applying MLOG_ZIP_WRITE_STRING,
ensure that the FIL_PAGE_TYPE of the uncompressed page matches the
compressed page, because buf_flush_init_for_writing() assumes that
field to be valid.

mlog_init_t::mark_ibuf_exist(): Invoke page_zip_decompress(), because
the uncompressed page after buf_page_create() is not necessarily
up to date.

buf_LRU_block_remove_hashed(): Bypass a page_zip_validate() check
during redo log apply.

recv_apply_hashed_log_recs(): Invoke mlog_init.mark_ibuf_exist()
also for the last batch, to ensure that page_zip_decompress() will
be called for freshly initialized pages.
  • Loading branch information
dr-m committed Feb 13, 2020
1 parent d00185c commit 2a77b2a
Show file tree
Hide file tree
Showing 18 changed files with 615 additions and 561 deletions.
1 change: 1 addition & 0 deletions mysql-test/suite/innodb/t/innodb_defrag_concurrent.opt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--loose-innodb-buffer-pool-stats
--loose-innodb-buffer-page
--loose-innodb-buffer-page-lru
--innodb-log-buffer-size=2m
--innodb-defragment=1
5 changes: 2 additions & 3 deletions storage/innobase/btr/btr0btr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4013,9 +4013,8 @@ btr_discard_only_page_on_level(
DBUG_ASSERT(index->table->instant);
DBUG_ASSERT(rec_is_alter_metadata(rec, *index));
btr_set_instant(block, *index, mtr);
rec = page_cur_insert_rec_low(
&cur,
index, rec, offsets, mtr);
rec = page_cur_insert_rec_low(&cur, index, rec,
offsets, mtr);
ut_ad(rec);
mem_heap_free(heap);
} else if (index->is_instant()) {
Expand Down
46 changes: 33 additions & 13 deletions storage/innobase/btr/btr0bulk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,16 +202,22 @@ inline void PageBulk::insertPage(const rec_t *rec, offset_t *offsets)
static_cast<uint16_t>(next_rec - insert_rec));
mach_write_to_2(m_cur_rec - REC_NEXT,
static_cast<uint16_t>(insert_rec - m_cur_rec));
rec_set_n_owned_new(insert_rec, NULL, 0);
rec_set_heap_no_new(insert_rec,
PAGE_HEAP_NO_USER_LOW + m_rec_no);
rec_set_bit_field_1(insert_rec, 0, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
rec_set_bit_field_2(insert_rec,
PAGE_HEAP_NO_USER_LOW + m_rec_no,
REC_NEW_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
} else {
mach_write_to_2(insert_rec - REC_NEXT,
mach_read_from_2(m_cur_rec - REC_NEXT));
mach_write_to_2(m_cur_rec - REC_NEXT, page_offset(insert_rec));
rec_set_n_owned_old(insert_rec, 0);
rec_set_heap_no_old(insert_rec,
PAGE_HEAP_NO_USER_LOW + m_rec_no);
rec_set_bit_field_1(insert_rec, 0, REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
rec_set_bit_field_2(insert_rec,
PAGE_HEAP_NO_USER_LOW + m_rec_no,
REC_OLD_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
}

/* 4. Set member variables. */
Expand Down Expand Up @@ -282,8 +288,9 @@ inline void PageBulk::finishPage()
{
slot-= PAGE_DIR_SLOT_SIZE;
mach_write_to_2(slot, offset);
rec_set_n_owned_new(m_page + offset, nullptr, count);
count = 0;
rec_set_bit_field_1(m_page + offset, count, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
count= 0;
}

uint16_t next= (mach_read_from_2(m_page + offset - REC_NEXT) + offset) &
Expand All @@ -308,8 +315,8 @@ inline void PageBulk::finishPage()
{
slot-= PAGE_DIR_SLOT_SIZE;
mach_write_to_2(slot, page_offset(insert_rec));
rec_set_n_owned_old(insert_rec, count);

rec_set_bit_field_1(insert_rec, count, REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
count= 0;
}

Expand All @@ -328,13 +335,26 @@ inline void PageBulk::finishPage()

count+= (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;

page_dir_slot_set_n_owned(slot, nullptr, 0);
rec_t *rec= const_cast<rec_t*>(page_dir_slot_get_rec(slot));
rec_set_bit_field_1(rec, 0, m_is_comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
slot+= PAGE_DIR_SLOT_SIZE;
}

slot-= PAGE_DIR_SLOT_SIZE;
page_dir_slot_set_rec(slot, page_get_supremum_rec(m_page));
page_dir_slot_set_n_owned(slot, nullptr, count + 1);

if (m_is_comp)
{
mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
rec_set_bit_field_1(m_page + PAGE_NEW_SUPREMUM, count + 1, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
}
else
{
mach_write_to_2(slot, PAGE_OLD_SUPREMUM);
rec_set_bit_field_1(m_page + PAGE_OLD_SUPREMUM, count + 1, REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
}

ut_ad(!dict_index_is_spatial(m_index));
ut_ad(!page_get_instant(m_page));
Expand Down
21 changes: 17 additions & 4 deletions storage/innobase/buf/buf0buf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5535,13 +5535,27 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
ut_ad(buf_pool->n_pend_reads > 0);
buf_pool->n_pend_reads--;
buf_pool->stat.n_pages_read++;
ut_ad(!uncompressed || !bpage->zip.data
|| !recv_recovery_is_on()
|| buf_page_can_relocate(bpage));
mutex_exit(block_mutex);

if (uncompressed) {
#if 1 /* MDEV-12353 FIXME: Remove this! */
if (UNIV_LIKELY_NULL(bpage->zip.data)
&& recv_recovery_is_on()) {
rw_lock_x_unlock_gen(
&reinterpret_cast<buf_block_t*>(bpage)
->lock, BUF_IO_READ);
if (!buf_LRU_free_page(bpage, false)) {
ut_ad(!"could not remove");
}
goto func_exit;
}
#endif
rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
BUF_IO_READ);
}

mutex_exit(block_mutex);
} else {
/* Write means a flush operation: call the completion
routine in the flush system */
Expand Down Expand Up @@ -5575,9 +5589,8 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
DBUG_PRINT("ib_buf", ("%s page %u:%u",
io_type == BUF_IO_READ ? "read" : "wrote",
bpage->id.space(), bpage->id.page_no()));

func_exit:
mutex_exit(&buf_pool->mutex);

return DB_SUCCESS;
}

Expand Down
5 changes: 4 additions & 1 deletion storage/innobase/buf/buf0lru.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1765,7 +1765,10 @@ buf_LRU_block_remove_hashed(
case FIL_PAGE_INDEX:
case FIL_PAGE_RTREE:
#if defined UNIV_ZIP_DEBUG && defined BTR_CUR_HASH_ADAPT
ut_a(page_zip_validate(
/* During recovery, we only update the
compressed page, not the uncompressed one. */
ut_a(recv_recovery_is_on()
|| page_zip_validate(
&bpage->zip, page,
((buf_block_t*) bpage)->index));
#endif /* UNIV_ZIP_DEBUG && BTR_CUR_HASH_ADAPT */
Expand Down
17 changes: 1 addition & 16 deletions storage/innobase/include/page0cur.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,22 +209,6 @@ page_cur_insert_rec_zip(
offset_t* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*************************************************************//**
Copies records from page to a newly created page, from a given record onward,
including that record. Infimum and supremum records are not copied.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
if this is a compressed leaf page in a secondary index.
This has to be done either within the same mini-transaction,
or by invoking ibuf_reset_free_bits() before mtr_commit(). */
ATTRIBUTE_COLD /* only used when crash-upgrading */
void
page_copy_rec_list_end_to_created_page(
/*===================================*/
page_t* new_page, /*!< in/out: index page to copy to */
rec_t* rec, /*!< in: first record to copy */
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr); /*!< in: mtr */
/***********************************************************//**
Deletes a record at the page cursor. The cursor is moved to the
next record after the deleted one. */
Expand Down Expand Up @@ -363,6 +347,7 @@ page_parse_copy_rec_list_to_created_page(
/***********************************************************//**
Parses log record of a record delete on a page.
@return pointer to record end or NULL */
ATTRIBUTE_COLD /* only used when crash-upgrading */
const byte*
page_cur_parse_delete_rec(
/*======================*/
Expand Down
61 changes: 33 additions & 28 deletions storage/innobase/include/page0page.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,38 @@ inline trx_id_t page_get_max_trx_id(const page_t *page)
return mach_read_from_8(p);
}

/**
Set the number of owned records.
@tparam compressed whether to update any ROW_FORMAT=COMPRESSED page as well
@param[in,out] block index page
@param[in,out] rec ROW_FORMAT=REDUNDANT record
@param[in] n_owned number of records skipped in the sparse page directory
@param[in] comp whether ROW_FORMAT is one of COMPACT,DYNAMIC,COMPRESSED
@param[in,out] mtr mini-transaction */
template<bool compressed>
inline void page_rec_set_n_owned(buf_block_t *block, rec_t *rec, ulint n_owned,
bool comp, mtr_t *mtr)
{
ut_ad(block->frame == page_align(rec));
ut_ad(comp == (page_is_comp(block->frame) != 0));

if (page_zip_des_t *page_zip= compressed
? buf_block_get_page_zip(block) : nullptr)
{
ut_ad(comp);
rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
if (rec_get_status(rec) != REC_STATUS_SUPREMUM)
page_zip_rec_set_owned(block, rec, n_owned, mtr);
}
else
{
rec-= comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED;
mtr->write<1,mtr_t::OPT>(*block, rec, (*rec & ~REC_N_OWNED_MASK) |
(n_owned << REC_N_OWNED_SHIFT));
}
}

/*************************************************************//**
Sets the max trx id field value. */
void
Expand Down Expand Up @@ -620,17 +652,6 @@ uint16_t
page_dir_get_n_slots(
/*=================*/
const page_t* page); /*!< in: index page */
/*************************************************************//**
Sets the number of dir slots in directory. */
UNIV_INLINE
void
page_dir_set_n_slots(
/*=================*/
page_t* page, /*!< in/out: page */
page_zip_des_t* page_zip,/*!< in/out: compressed page whose
uncompressed part will be updated, or NULL */
ulint n_slots);/*!< in: number of slots */

/** Gets the pointer to a directory slot.
@param n sparse directory slot number
@return pointer to the sparse directory slot */
Expand Down Expand Up @@ -664,30 +685,13 @@ inline const rec_t *page_dir_slot_get_rec(const page_dir_slot_t *slot)
return page_dir_slot_get_rec(const_cast<rec_t*>(slot));
}
/***************************************************************//**
This is used to set the record offset in a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_rec(
/*==================*/
page_dir_slot_t*slot, /*!< in: directory slot */
const rec_t* rec); /*!< in: record on the page */
/***************************************************************//**
Gets the number of records owned by a directory slot.
@return number of records */
UNIV_INLINE
ulint
page_dir_slot_get_n_owned(
/*======================*/
const page_dir_slot_t* slot); /*!< in: page directory slot */
/***************************************************************//**
This is used to set the owned records field of a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_n_owned(
/*======================*/
page_dir_slot_t*slot, /*!< in/out: directory slot */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
ulint n); /*!< in: number of records owned by the slot */
/************************************************************//**
Calculates the space reserved for directory slots of a given
number of records. The exact value is a fraction number
Expand Down Expand Up @@ -1138,6 +1142,7 @@ page_move_rec_list_start(
/**********************************************************//**
Parses a log record of a record list end or start deletion.
@return end of log record or NULL */
ATTRIBUTE_COLD /* only used when crash-upgrading */
const byte*
page_parse_delete_rec_list(
/*=======================*/
Expand Down
46 changes: 0 additions & 46 deletions storage/innobase/include/page0page.ic
Original file line number Diff line number Diff line change
Expand Up @@ -419,19 +419,6 @@ page_dir_get_n_slots(
{
return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
}
/*************************************************************//**
Sets the number of dir slots in directory. */
UNIV_INLINE
void
page_dir_set_n_slots(
/*=================*/
page_t* page, /*!< in/out: page */
page_zip_des_t* page_zip,/*!< in/out: compressed page whose
uncompressed part will be updated, or NULL */
ulint n_slots)/*!< in: number of slots */
{
page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
}

/*************************************************************//**
Gets the number of records in the heap.
Expand Down Expand Up @@ -487,20 +474,6 @@ page_rec_check(
return(TRUE);
}

/***************************************************************//**
This is used to set the record offset in a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_rec(
/*==================*/
page_dir_slot_t*slot, /*!< in: directory slot */
const rec_t* rec) /*!< in: record on the page */
{
ut_ad(page_rec_check(rec));

mach_write_to_2(slot, page_offset(rec));
}

/***************************************************************//**
Gets the number of records owned by a directory slot.
@return number of records */
Expand All @@ -518,25 +491,6 @@ page_dir_slot_get_n_owned(
}
}

/***************************************************************//**
This is used to set the owned records field of a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_n_owned(
/*======================*/
page_dir_slot_t*slot, /*!< in/out: directory slot */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
ulint n) /*!< in: number of records owned by the slot */
{
rec_t* rec = (rec_t*) page_dir_slot_get_rec(slot);
if (page_rec_is_comp(slot)) {
rec_set_n_owned_new(rec, page_zip, n);
} else {
ut_ad(!page_zip);
rec_set_n_owned_old(rec, n);
}
}

/************************************************************//**
Calculates the space reserved for directory slots of a given number of
records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE /
Expand Down
7 changes: 5 additions & 2 deletions storage/innobase/include/page0types.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ typedef byte page_t;
#ifndef UNIV_INNOCHECKSUM
/** Index page cursor */
struct page_cur_t;
/** Buffer pool block */
struct buf_block_t;

/** Compressed index page */
typedef byte page_zip_t;
Expand Down Expand Up @@ -150,9 +152,10 @@ must already have been written on the uncompressed page. */
void
page_zip_rec_set_owned(
/*===================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
buf_block_t* block, /*!< in/out: ROW_FORMAT=COMPRESSED page */
const byte* rec, /*!< in: record on the uncompressed page */
ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
ulint flag, /*!< in: the owned flag (nonzero=TRUE) */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull));
#endif /* !UNIV_INNOCHECKSUM */
#endif
10 changes: 6 additions & 4 deletions storage/innobase/include/page0zip.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,9 +360,10 @@ must already have been written on the uncompressed page. */
void
page_zip_rec_set_owned(
/*===================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
buf_block_t* block, /*!< in/out: ROW_FORMAT=COMPRESSED page */
const byte* rec, /*!< in: record on the uncompressed page */
ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
ulint flag, /*!< in: the owned flag (nonzero=TRUE) */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull));

/**********************************************************************//**
Expand All @@ -385,9 +386,10 @@ page_zip_dir_delete(
byte* rec, /*!< in: deleted record */
const dict_index_t* index, /*!< in: index of rec */
const offset_t* offsets, /*!< in: rec_get_offsets(rec) */
const byte* free) /*!< in: previous start of
const byte* free, /*!< in: previous start of
the free list */
MY_ATTRIBUTE((nonnull(1,2,3,4)));
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull(1,2,3,4,6)));

/**********************************************************************//**
Add a slot to the dense page directory. */
Expand Down

0 comments on commit 2a77b2a

Please sign in to comment.