Skip to content

Commit

Permalink
MDEV-21069 Crash on DROP TABLE if the data file is corrupted
Browse files Browse the repository at this point in the history
buf_read_ibuf_merge_pages(): Discard any page numbers that are
outside the current bounds of the tablespace, by invoking the
function ibuf_delete_recs() that was introduced in MDEV-20934.
This could avoid an infinite change buffer merge loop on
innodb_fast_shutdown=0, because normally the change buffer merge
would only be attempted if a page was successfully loaded into
the buffer pool.

dict_drop_index_tree(): Add the parameter trx_t*.
To prevent the DROP TABLE crash, do not invoke btr_free_if_exists()
if the entire .ibd file will be dropped. Thus, we will avoid a crash
if the BTR_SEG_LEAF or BTR_SEG_TOP of the index is corrupted,
and we will also avoid unnecessarily accessing the to-be-dropped
tablespace via the buffer pool.

In MariaDB 10.2, we disable the DROP TABLE fix if innodb_safe_truncate=0,
because the backup-unsafe MySQL 5.7 WL#6501 form of TRUNCATE TABLE
requires that the individual pages be freed inside the tablespace.
  • Loading branch information
dr-m committed Nov 18, 2019
1 parent bd2b05d commit b80df9e
Show file tree
Hide file tree
Showing 9 changed files with 90 additions and 44 deletions.
26 changes: 25 additions & 1 deletion mysql-test/suite/innodb/t/ibuf_not_empty.test
Expand Up @@ -73,8 +73,32 @@ EOF
--replace_regex /contains \d+ entries/contains #### entries/
check table t1;

--source include/shutdown_mysqld.inc

# Truncate the file to 5 pages, as if it were empty
perl;
do "$ENV{MTR_SUITE_DIR}/include/crc32.pl";
my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd";
open(FILE, "+<$file") || die "Unable to open $file";
binmode FILE;
my $ps= $ENV{PAGE_SIZE};
my $pages=5;
my $page;
die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps;
substr($page,46,4)=pack("N", $pages);
my $polynomial = 0x82f63b78; # CRC-32C
my $ck= pack("N",mycrc32(substr($page, 4, 22), 0, $polynomial) ^
mycrc32(substr($page, 38, $ps - 38 - 8), 0, $polynomial));
substr($page,0,4)=$ck;
substr($page,$ps-8,4)=$ck;
sysseek(FILE, 0, 0) || die "Unable to rewind $file\n";
syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n";
truncate(FILE, $ps * $pages);
close(FILE) || die "Unable to close $file";
EOF

--let $restart_parameters=
--source include/restart_mysqld.inc
--source include/start_mysqld.inc
SET GLOBAL innodb_fast_shutdown=0;
--source include/restart_mysqld.inc

Expand Down
33 changes: 24 additions & 9 deletions storage/innobase/buf/buf0rea.cc
Expand Up @@ -819,11 +819,8 @@ buf_read_ibuf_merge_pages(
#endif

for (ulint i = 0; i < n_stored; i++) {
bool found;
const page_size_t page_size(fil_space_get_page_size(
space_ids[i], &found));

if (!found) {
fil_space_t* space = fil_space_acquire_silent(space_ids[i]);
if (!space) {
tablespace_deleted:
/* The tablespace was not found: remove all
entries for it */
Expand All @@ -835,6 +832,19 @@ buf_read_ibuf_merge_pages(
continue;
}

if (UNIV_UNLIKELY(page_nos[i] >= space->size)) {
do {
ibuf_delete_recs(page_id_t(space_ids[i],
page_nos[i]));
} while (++i < n_stored
&& space_ids[i - 1] == space_ids[i]
&& page_nos[i] >= space->size);
i--;
next:
fil_space_release(space);
continue;
}

const page_id_t page_id(space_ids[i], page_nos[i]);

buf_pool_t* buf_pool = buf_pool_get(page_id);
Expand All @@ -849,24 +859,29 @@ buf_read_ibuf_merge_pages(
buf_read_page_low(&err,
sync && (i + 1 == n_stored),
0,
BUF_READ_ANY_PAGE, page_id, page_size,
true, true /* ignore_missing_space */);
BUF_READ_ANY_PAGE, page_id,
page_size_t(space->flags), true);

switch(err) {
case DB_SUCCESS:
case DB_TABLESPACE_TRUNCATED:
case DB_ERROR:
break;
case DB_TABLESPACE_DELETED:
fil_space_release(space);
goto tablespace_deleted;
case DB_PAGE_CORRUPTED:
case DB_DECRYPTION_FAILED:
ib::error() << "Failed to read or decrypt " << page_id
<< " for change buffer merge";
ib::error() << "Failed to read or decrypt page "
<< page_nos[i]
<< " of '" << space->chain.start->name
<< "' for change buffer merge";
break;
default:
ut_error;
}

goto next;
}

os_aio_simulated_wake_handler_threads();
Expand Down
24 changes: 14 additions & 10 deletions storage/innobase/dict/dict0crea.cc
Expand Up @@ -954,17 +954,13 @@ dict_create_index_tree_in_mem(
/** Drop the index tree associated with a row in SYS_INDEXES table.
@param[in,out] rec SYS_INDEXES record
@param[in,out] pcur persistent cursor on rec
@param[in,out] trx dictionary transaction
@param[in,out] mtr mini-transaction
@return whether freeing the B-tree was attempted */
bool
dict_drop_index_tree(
rec_t* rec,
btr_pcur_t* pcur,
mtr_t* mtr)
bool dict_drop_index_tree(rec_t* rec, btr_pcur_t* pcur, trx_t* trx, mtr_t* mtr)
{
const byte* ptr;
ulint len;
ulint space;
ulint root_page_no;

ut_ad(mutex_own(&dict_sys->mutex));
Expand All @@ -991,15 +987,23 @@ dict_drop_index_tree(

ut_ad(len == 4);

space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
const uint32_t space_id = mach_read_from_4(ptr);
ut_ad(space_id < SRV_TMP_SPACE_ID);
if (space_id != TRX_SYS_SPACE
&& srv_safe_truncate
&& trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE) {
/* We are about to delete the entire .ibd file;
do not bother to free pages inside it. */
return false;
}

ptr = rec_get_nth_field_old(
rec, DICT_FLD__SYS_INDEXES__ID, &len);

ut_ad(len == 8);

bool found;
const page_size_t page_size(fil_space_get_page_size(space,
const page_size_t page_size(fil_space_get_page_size(space_id,
&found));

if (!found) {
Expand All @@ -1012,11 +1016,11 @@ dict_drop_index_tree(
/* If tablespace is scheduled for truncate, do not try to drop
the indexes in that tablespace. There is a truncate fixup action
which will take care of it. */
if (srv_is_tablespace_truncated(space)) {
if (srv_is_tablespace_truncated(space_id)) {
return(false);
}

btr_free_if_exists(page_id_t(space, root_page_no), page_size,
btr_free_if_exists(page_id_t(space_id, root_page_no), page_size,
mach_read_from_8(ptr), mtr);

return(true);
Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/ibuf/ibuf0ibuf.cc
Expand Up @@ -4333,7 +4333,7 @@ This prevents an infinite loop on slow shutdown
in the case where the change buffer bitmap claims that no buffered
changes exist, while entries exist in the change buffer tree.
@param page_id page number for which there should be no unbuffered changes */
ATTRIBUTE_COLD static void ibuf_delete_recs(const page_id_t page_id)
ATTRIBUTE_COLD void ibuf_delete_recs(const page_id_t page_id)
{
ulint dops[IBUF_OP_COUNT];
mtr_t mtr;
Expand Down
10 changes: 4 additions & 6 deletions storage/innobase/include/dict0crea.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, MariaDB Corporation.
Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Expand Down Expand Up @@ -121,13 +121,11 @@ dict_recreate_index_tree(
/** Drop the index tree associated with a row in SYS_INDEXES table.
@param[in,out] rec SYS_INDEXES record
@param[in,out] pcur persistent cursor on rec
@param[in,out] trx dictionary transaction
@param[in,out] mtr mini-transaction
@return whether freeing the B-tree was attempted */
bool
dict_drop_index_tree(
rec_t* rec,
btr_pcur_t* pcur,
mtr_t* mtr);
bool dict_drop_index_tree(rec_t* rec, btr_pcur_t* pcur, trx_t* trx, mtr_t* mtr)
MY_ATTRIBUTE((nonnull));

/***************************************************************//**
Creates an index tree for the index if it is not a member of a cluster.
Expand Down
8 changes: 8 additions & 0 deletions storage/innobase/include/ibuf0ibuf.h
Expand Up @@ -326,6 +326,14 @@ ibuf_insert(
const page_size_t& page_size,
que_thr_t* thr);

/**
Delete any buffered entries for a page.
This prevents an infinite loop on slow shutdown
in the case where the change buffer bitmap claims that no buffered
changes exist, while entries exist in the change buffer tree.
@param page_id page number for which there should be no unbuffered changes */
ATTRIBUTE_COLD void ibuf_delete_recs(const page_id_t page_id);

/** When an index page is read from a disk to the buffer pool, this function
applies any buffered operations to the page and deletes the entries from the
insert buffer. If the page is not read, but created in the buffer pool, this
Expand Down
20 changes: 9 additions & 11 deletions storage/innobase/row/row0trunc.cc
Expand Up @@ -748,14 +748,10 @@ class DropIndex : public Callback {
Constructor
@param[in,out] table Table to truncate
@param[in,out] trx dictionary transaction
@param[in] noredo whether to disable redo logging */
DropIndex(dict_table_t* table, bool noredo)
:
Callback(table->id, noredo),
m_table(table)
{
/* No op */
}
DropIndex(dict_table_t* table, trx_t* trx, bool noredo)
: Callback(table->id, noredo), m_trx(trx), m_table(table) {}

/**
@param mtr mini-transaction covering the read
Expand All @@ -764,8 +760,10 @@ class DropIndex : public Callback {
dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const;

private:
/** dictionary transaction */
trx_t* const m_trx;
/** Table to be truncated */
dict_table_t* m_table;
dict_table_t* const m_table;
};

/** Callback to create the indexes during TRUNCATE */
Expand Down Expand Up @@ -907,7 +905,7 @@ DropIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
{
rec_t* rec = btr_pcur_get_rec(pcur);

bool freed = dict_drop_index_tree(rec, pcur, mtr);
bool freed = dict_drop_index_tree(rec, pcur, m_trx, mtr);

#ifdef UNIV_DEBUG
{
Expand Down Expand Up @@ -1122,7 +1120,7 @@ row_truncate_rollback(
it can be recovered using drop/create sequence. */
dict_table_x_lock_indexes(table);

DropIndex dropIndex(table, no_redo);
DropIndex dropIndex(table, trx, no_redo);

SysIndexIterator().for_each(dropIndex);

Expand Down Expand Up @@ -1936,7 +1934,7 @@ dberr_t row_truncate_table_for_mysql(dict_table_t* table, trx_t* trx)
indexes) */
if (!dict_table_is_temporary(table)) {

DropIndex dropIndex(table, no_redo);
DropIndex dropIndex(table, trx, no_redo);

err = SysIndexIterator().for_each(dropIndex);

Expand Down
3 changes: 2 additions & 1 deletion storage/innobase/row/row0uins.cc
Expand Up @@ -126,7 +126,8 @@ row_undo_ins_remove_clust_rec(
ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);

dict_drop_index_tree(
btr_pcur_get_rec(&node->pcur), &(node->pcur), &mtr);
btr_pcur_get_rec(&node->pcur), &node->pcur, node->trx,
&mtr);

mtr.commit();

Expand Down
8 changes: 3 additions & 5 deletions storage/innobase/row/row0upd.cc
Expand Up @@ -3092,9 +3092,7 @@ row_upd_clust_step(

ulint mode;

DEBUG_SYNC_C_IF_THD(
thr_get_trx(thr)->mysql_thd,
"innodb_row_upd_clust_step_enter");
DEBUG_SYNC_C_IF_THD(trx->mysql_thd, "innodb_row_upd_clust_step_enter");

if (dict_index_is_online_ddl(index)) {
ut_ad(node->table->id != DICT_INDEXES_ID);
Expand Down Expand Up @@ -3123,7 +3121,7 @@ row_upd_clust_step(
ut_ad(!dict_index_is_online_ddl(index));

dict_drop_index_tree(
btr_pcur_get_rec(pcur), pcur, &mtr);
btr_pcur_get_rec(pcur), pcur, trx, &mtr);

mtr.commit();

Expand Down Expand Up @@ -3155,7 +3153,7 @@ row_upd_clust_step(
}
}

ut_ad(lock_trx_has_rec_x_lock(thr_get_trx(thr), index->table,
ut_ad(lock_trx_has_rec_x_lock(trx, index->table,
btr_pcur_get_block(pcur),
page_rec_get_heap_no(rec)));

Expand Down

0 comments on commit b80df9e

Please sign in to comment.