Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
MDEV-14717 RENAME TABLE in InnoDB is not crash-safe
This is a backport of commit 0bc3675
and commit 9eb3fcc.

InnoDB in MariaDB 10.2 appears to only write MLOG_FILE_RENAME2
redo log records during table-rebuilding ALGORITHM=INPLACE operations.
We must write the records for any .ibd file renames, so that the
operations are crash-safe.

If InnoDB is killed during a RENAME TABLE operation, it can happen that
the transaction for updating the data dictionary will be rolled back.
But, nothing will roll back the renaming of the .ibd file
(the MLOG_FILE_RENAME2 only guarantees roll-forward), or for that matter,
the renaming of the dict_table_t::name in the dict_sys cache. We introduce
the undo log record TRX_UNDO_RENAME_TABLE to fix this.

fil_space_for_table_exists_in_mem(): Remove the parameters
adjust_space, table_id and some code that was trying to work around
these deficiencies.

fil_name_write_rename(): Write a MLOG_FILE_RENAME2 record.

dict_table_rename_in_cache(): Invoke fil_name_write_rename().

trx_undo_rec_copy(): Set the first 2 bytes to the length of the
copied undo log record.

trx_undo_page_report_rename(), trx_undo_report_rename():
Write a TRX_UNDO_RENAME_TABLE record with the old table name.

row_rename_table_for_mysql(): Invoke trx_undo_report_rename()
before modifying any data dictionary tables.

row_undo_ins_parse_undo_rec(): Roll back TRX_UNDO_RENAME_TABLE
by invoking dict_table_rename_in_cache(), which will take care
of both renaming the table and the file.

ha_innobase::truncate(): Remove a work-around.
  • Loading branch information
dr-m committed Sep 7, 2018
1 parent e67b107 commit cf2a442
Show file tree
Hide file tree
Showing 14 changed files with 249 additions and 86 deletions.
12 changes: 12 additions & 0 deletions mysql-test/suite/innodb/r/rename_table_debug.result
@@ -0,0 +1,12 @@
CREATE TABLE t1 (a INT UNSIGNED PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 VALUES(42);
connect con1,localhost,root,,test;
SET DEBUG_SYNC='before_rename_table_commit SIGNAL renamed WAIT_FOR ever';
RENAME TABLE t1 TO t2;
connection default;
SET DEBUG_SYNC='now WAIT_FOR renamed';
disconnect con1;
SELECT * FROM t1;
a
42
DROP TABLE t1;
19 changes: 19 additions & 0 deletions mysql-test/suite/innodb/t/rename_table_debug.test
@@ -0,0 +1,19 @@
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/not_embedded.inc

CREATE TABLE t1 (a INT UNSIGNED PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 VALUES(42);

--connect (con1,localhost,root,,test)
SET DEBUG_SYNC='before_rename_table_commit SIGNAL renamed WAIT_FOR ever';
--send
RENAME TABLE t1 TO t2;
--connection default
SET DEBUG_SYNC='now WAIT_FOR renamed';
--let $shutdown_timeout=0
--source include/restart_mysqld.inc
--disconnect con1
SELECT * FROM t1;
DROP TABLE t1;
2 changes: 2 additions & 0 deletions storage/innobase/dict/dict0dict.cc
Expand Up @@ -1692,6 +1692,8 @@ dict_table_rename_in_cache(
return(err);
}

fil_name_write_rename(table->space, old_path, new_path);

bool success = fil_rename_tablespace(
table->space, old_path, new_name, new_path);

Expand Down
5 changes: 2 additions & 3 deletions storage/innobase/dict/dict0load.cc
Expand Up @@ -1448,7 +1448,7 @@ dict_check_sys_tables(
look to see if it is already in the tablespace cache. */
if (fil_space_for_table_exists_in_mem(
space_id, table_name.m_name,
false, true, NULL, 0, flags)) {
false, NULL, flags)) {
/* Recovery can open a datafile that does not
match SYS_DATAFILES. If they don't match, update
SYS_DATAFILES. */
Expand Down Expand Up @@ -2857,8 +2857,7 @@ dict_load_tablespace(

/* The tablespace may already be open. */
if (fil_space_for_table_exists_in_mem(
table->space, space_name, false,
true, heap, table->id, table->flags)) {
table->space, space_name, false, heap, table->flags)) {
return;
}

Expand Down
69 changes: 21 additions & 48 deletions storage/innobase/fil/fil0fil.cc
Expand Up @@ -2313,7 +2313,7 @@ fil_op_write_log(
@param[in,out] mtr mini-transaction */
static
void
fil_name_write_rename(
fil_name_write_rename_low(
ulint space_id,
ulint first_page_no,
const char* old_name,
Expand All @@ -2327,6 +2327,23 @@ fil_name_write_rename(
space_id, first_page_no, old_name, new_name, 0, mtr);
}

/** Write redo log for renaming a file.
@param[in] space_id tablespace id
@param[in] old_name tablespace file name
@param[in] new_name tablespace file name after renaming */
void
fil_name_write_rename(
ulint space_id,
const char* old_name,
const char* new_name)
{
mtr_t mtr;
mtr.start();
fil_name_write_rename_low(space_id, 0, old_name, new_name, &mtr);
mtr.commit();
log_write_up_to(mtr.commit_lsn(), true);
}

/** Write MLOG_FILE_NAME for a file.
@param[in] space_id tablespace id
@param[in] first_page_no first page number in the file
Expand Down Expand Up @@ -3394,12 +3411,7 @@ fil_rename_tablespace(
ut_ad(strchr(new_file_name, OS_PATH_SEPARATOR) != NULL);

if (!recv_recovery_on) {
mtr_t mtr;

mtr.start();
fil_name_write_rename(
id, 0, old_file_name, new_file_name, &mtr);
mtr.commit();
fil_name_write_rename(id, old_file_name, new_file_name);
log_mutex_enter();
}

Expand Down Expand Up @@ -4457,19 +4469,15 @@ startup, there may be many tablespaces which are not yet in the memory cache.
@param[in] print_error_if_does_not_exist
Print detailed error information to the
error log if a matching tablespace is not found from memory.
@param[in] adjust_space Whether to adjust space id on mismatch
@param[in] heap Heap memory
@param[in] table_id table id
@param[in] table_flags table flags
@return true if a matching tablespace exists in the memory cache */
bool
fil_space_for_table_exists_in_mem(
ulint id,
const char* name,
bool print_error_if_does_not_exist,
bool adjust_space,
mem_heap_t* heap,
table_id_t table_id,
ulint table_flags)
{
fil_space_t* fnamespace;
Expand All @@ -4494,41 +4502,6 @@ fil_space_for_table_exists_in_mem(
} else if (!valid || space == fnamespace) {
/* Found with the same file name, or got a flag mismatch. */
goto func_exit;
} else if (adjust_space
&& row_is_mysql_tmp_table_name(space->name)
&& !row_is_mysql_tmp_table_name(name)) {
/* Info from fnamespace comes from the ibd file
itself, it can be different from data obtained from
System tables since renaming files is not
transactional. We shall adjust the ibd file name
according to system table info. */
mutex_exit(&fil_system->mutex);

DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space",
DBUG_SUICIDE(););

const char* tmp_name = dict_mem_create_temporary_tablename(
heap, name, table_id);

fil_rename_tablespace(
fnamespace->id,
UT_LIST_GET_FIRST(fnamespace->chain)->name,
tmp_name, NULL);

DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space",
DBUG_SUICIDE(););

fil_rename_tablespace(
id, UT_LIST_GET_FIRST(space->chain)->name,
name, NULL);

DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space",
DBUG_SUICIDE(););

mutex_enter(&fil_system->mutex);
fnamespace = fil_space_get_by_name(name);
ut_ad(space == fnamespace);
goto func_exit;
}

if (!print_error_if_does_not_exist) {
Expand Down Expand Up @@ -5576,7 +5549,7 @@ fil_mtr_rename_log(
return(err);
}

fil_name_write_rename(
fil_name_write_rename_low(
old_table->space, 0, old_path, tmp_path, mtr);

ut_free(tmp_path);
Expand Down Expand Up @@ -5607,7 +5580,7 @@ fil_mtr_rename_log(
}
}

fil_name_write_rename(
fil_name_write_rename_low(
new_table->space, 0, new_path, old_path, mtr);

ut_free(new_path);
Expand Down
8 changes: 1 addition & 7 deletions storage/innobase/handler/ha_innodb.cc
Expand Up @@ -13353,6 +13353,7 @@ innobase_rename_table(
DEBUG_SYNC_C("innodb_rename_table_ready");

trx_start_if_not_started(trx, true);
ut_ad(trx->will_lock > 0);

/* Serialize data dictionary operations with dictionary mutex:
no deadlocks can occur then in these operations. */
Expand Down Expand Up @@ -13504,13 +13505,6 @@ int ha_innobase::truncate()
}

if (err) {
/* Before MDEV-14717, rollback of RENAME TABLE fails
to undo the rename in the file system, so we do it
manually here. In case the server is killed before the
TRUNCATE operation is committed, after recovery in
MariaDB 10.2, the data file could end up "missing"
(remain called temp_name). */
innobase_rename_table(trx, temp_name, name);
trx_rollback_to_savepoint(trx, NULL);
}

Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/include/dict0dict.h
Expand Up @@ -411,7 +411,7 @@ dict_table_rename_in_cache(
/*!< in: in ALTER TABLE we want
to preserve the original table name
in constraints which reference it */
MY_ATTRIBUTE((nonnull, warn_unused_result));
MY_ATTRIBUTE((nonnull));

/** Removes an index from the dictionary cache.
@param[in,out] table table whose index to remove
Expand Down
40 changes: 23 additions & 17 deletions storage/innobase/include/fil0fil.h
Expand Up @@ -869,6 +869,15 @@ fil_create_directory_for_tablename(
/*===============================*/
const char* name); /*!< in: name in the standard
'databasename/tablename' format */
/** Write redo log for renaming a file.
@param[in] space_id tablespace id
@param[in] old_name tablespace file name
@param[in] new_name tablespace file name after renaming */
void
fil_name_write_rename(
ulint space_id,
const char* old_name,
const char* new_name);
/********************************************************//**
Recreates table indexes by applying
TRUNCATE log record during recovery.
Expand Down Expand Up @@ -1128,27 +1137,24 @@ fil_file_readdir_next_file(
os_file_dir_t dir, /*!< in: directory stream */
os_file_stat_t* info); /*!< in/out: buffer where the
info is returned */
/*******************************************************************//**
Returns true if a matching tablespace exists in the InnoDB tablespace memory
cache. Note that if we have not done a crash recovery at the database startup,
there may be many tablespaces which are not yet in the memory cache.
/** Determine if a matching tablespace exists in the InnoDB tablespace
memory cache. Note that if we have not done a crash recovery at the database
startup, there may be many tablespaces which are not yet in the memory cache.
@param[in] id Tablespace ID
@param[in] name Tablespace name used in fil_space_create().
@param[in] print_error_if_does_not_exist
Print detailed error information to the
error log if a matching tablespace is not found from memory.
@param[in] heap Heap memory
@param[in] table_flags table flags
@return true if a matching tablespace exists in the memory cache */
bool
fil_space_for_table_exists_in_mem(
/*==============================*/
ulint id, /*!< in: space id */
const char* name, /*!< in: table name in the standard
'databasename/tablename' format */
ulint id,
const char* name,
bool print_error_if_does_not_exist,
/*!< in: print detailed error
information to the .err log if a
matching tablespace is not found from
memory */
bool adjust_space, /*!< in: whether to adjust space id
when find table space mismatch */
mem_heap_t* heap, /*!< in: heap memory */
table_id_t table_id, /*!< in: table id */
ulint table_flags); /*!< in: table flags */
mem_heap_t* heap,
ulint table_flags);

/** Try to extend a tablespace if it is smaller than the specified size.
@param[in,out] space tablespace
Expand Down
8 changes: 8 additions & 0 deletions storage/innobase/include/trx0rec.h
Expand Up @@ -179,6 +179,13 @@ trx_undo_rec_get_partial_row(
mem_heap_t* heap) /*!< in: memory heap from which the memory
needed is allocated */
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Report a RENAME TABLE operation.
@param[in,out] trx transaction
@param[in] table table that is being renamed
@return DB_SUCCESS or error code */
dberr_t
trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/***********************************************************************//**
Writes information to an undo log about an insert, update, or a delete marking
of a clustered index record. This information is used in a rollback of the
Expand Down Expand Up @@ -322,6 +329,7 @@ trx_undo_read_v_idx(
compilation info multiplied by 16 is ORed to this value in an undo log
record */

#define TRX_UNDO_RENAME_TABLE 9 /*!< RENAME TABLE */
#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */
#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked
record */
Expand Down
5 changes: 4 additions & 1 deletion storage/innobase/include/trx0rec.ic
Expand Up @@ -95,5 +95,8 @@ trx_undo_rec_copy(
len = mach_read_from_2(undo_rec)
- ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
ut_ad(len < UNIV_PAGE_SIZE);
return((trx_undo_rec_t*) mem_heap_dup(heap, undo_rec, len));
trx_undo_rec_t* rec = static_cast<trx_undo_rec_t*>(
mem_heap_dup(heap, undo_rec, len));
mach_write_to_2(rec, len);
return rec;
}
16 changes: 13 additions & 3 deletions storage/innobase/row/row0mysql.cc
Expand Up @@ -3299,7 +3299,7 @@ row_drop_single_table_tablespace(

/* If the tablespace is not in the cache, just delete the file. */
if (!fil_space_for_table_exists_in_mem(
space_id, tablename, true, false, NULL, 0, table_flags)) {
space_id, tablename, true, NULL, table_flags)) {

/* Force a delete of any discarded or temporary files. */
fil_delete_file(filepath);
Expand Down Expand Up @@ -4391,6 +4391,14 @@ row_rename_table_for_mysql(
goto funct_exit;
}

if (!table->is_temporary()) {
err = trx_undo_report_rename(trx, table);

if (err != DB_SUCCESS) {
goto funct_exit;
}
}

/* We use the private SQL parser of Innobase to generate the query
graphs needed in updating the dictionary data from system tables. */

Expand Down Expand Up @@ -4576,8 +4584,9 @@ row_rename_table_for_mysql(
}
}

if ((dict_table_has_fts_index(table)
|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID))
if (err == DB_SUCCESS
&& (dict_table_has_fts_index(table)
|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID))
&& !dict_tables_have_same_db(old_name, new_name)) {
err = fts_rename_aux_tables(table, new_name, trx);
if (err != DB_TABLE_NOT_FOUND) {
Expand Down Expand Up @@ -4734,6 +4743,7 @@ row_rename_table_for_mysql(
}

if (commit) {
DEBUG_SYNC(trx->mysql_thd, "before_rename_table_commit");
trx_commit_for_mysql(trx);
}

Expand Down
26 changes: 22 additions & 4 deletions storage/innobase/row/row0uins.cc
Expand Up @@ -330,16 +330,13 @@ row_undo_ins_parse_undo_rec(
byte* ptr;
undo_no_t undo_no;
table_id_t table_id;
ulint type;
ulint dummy;
bool dummy_extern;

ut_ad(node);

ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
ptr = trx_undo_rec_get_pars(node->undo_rec, &node->rec_type, &dummy,
&dummy_extern, &undo_no, &table_id);
ut_ad(type == TRX_UNDO_INSERT_REC);
node->rec_type = type;

node->update = NULL;
node->table = dict_table_open_on_id(
Expand All @@ -350,6 +347,27 @@ row_undo_ins_parse_undo_rec(
return;
}

switch (node->rec_type) {
default:
ut_ad(!"wrong undo record type");
goto close_table;
case TRX_UNDO_INSERT_REC:
break;
case TRX_UNDO_RENAME_TABLE:
dict_table_t* table = node->table;
ut_ad(!table->is_temporary());
ut_ad(dict_table_is_file_per_table(table)
== (table->space != TRX_SYS_SPACE));
size_t len = mach_read_from_2(node->undo_rec)
+ node->undo_rec - ptr - 2;
ptr[len] = 0;
const char* name = reinterpret_cast<char*>(ptr);
if (strcmp(table->name.m_name, name)) {
dict_table_rename_in_cache(table, name, false);
}
goto close_table;
}

if (UNIV_UNLIKELY(!fil_table_accessible(node->table))) {
close_table:
/* Normally, tables should not disappear or become
Expand Down

0 comments on commit cf2a442

Please sign in to comment.