Skip to content
Permalink
Browse files
MDEV-17158 TRUNCATE is not atomic after MDEV-13564
It turned out that ha_innobase::truncate() would prematurely
commit the transaction already before the completion of the
ha_innobase::create(). All of this must be atomic.

innodb.truncate_crash: Use the correct DEBUG_SYNC point, and
tolerate non-truncation of the table, because the redo log
for the TRUNCATE transaction commit might be flushed due to
some InnoDB background activity.

dict_build_tablespace_for_table(): Merge to the function
dict_build_table_def_step().

dict_build_table_def_step(): If a table is being created during
an already started data dictionary transaction (such as TRUNCATE),
persistently write the table_id to the undo log header before
creating any file. In this way, the recovery of TRUNCATE will be
able to delete the new file before rolling back the rename of
the original table.

dict_table_rename_in_cache(): Add the parameter replace_new_file,
used as part of rolling back a TRUNCATE operation.

fil_rename_tablespace_check(): Add the parameter replace_new.
If the parameter is set and a file identified by new_path exists,
remove a possible tablespace and also the file.

create_table_info_t::create_table_def(): Remove some debug assertions
that no longer hold. During TRUNCATE, the transaction will already
have been started (and performed a rename operation) before the
table is created. Also, remove a call to dict_build_tablespace_for_table().

create_table_info_t::create_table(): Add the parameter create_fk=true.
During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the
InnoDB data dictionary, because they will also not be removed.

row_table_add_foreign_constraints(): If trx=NULL, do not modify
the InnoDB data dictionary, but only load the FOREIGN KEY constraints
from the data dictionary.

ha_innobase::create(): Lock the InnoDB data dictionary cache only
if no transaction was passed by the caller. Unlock it in any case.

innobase_rename_table(): Add the parameter commit = true.
If !commit, do not lock or unlock the data dictionary cache.

ha_innobase::truncate(): Lock the data dictionary before invoking
rename or create, and let ha_innobase::create() unlock it and
also commit or roll back the transaction.

trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation()
and declared global instead of static.

row_undo_ins_parse_undo_rec(): If table_id is set, this must
be rolling back the rename operation in TRUNCATE TABLE, and
therefore replace_new_file=true.
  • Loading branch information
dr-m committed Sep 10, 2018
1 parent 99e36a7 commit 75f8e86
Show file tree
Hide file tree
Showing 17 changed files with 198 additions and 185 deletions.
@@ -1,14 +1,14 @@
FLUSH TABLES;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 SET a=1;
INSERT INTO t1 VALUES (1),(2);
connect wait,localhost,root,,test;
SET DEBUG_SYNC='after_trx_committed_in_memory SIGNAL c WAIT_FOR ever';
SET DEBUG_SYNC='before_trx_state_committed_in_memory SIGNAL c WAIT_FOR ever';
TRUNCATE TABLE t1;
connection default;
SET DEBUG_SYNC='now WAIT_FOR c';
disconnect wait;
SELECT * FROM t1;
a
1
SELECT COUNT(*) FROM t1;
COUNT(*)
0
TRUNCATE TABLE t1;
DROP TABLE t1;
@@ -5,10 +5,10 @@

FLUSH TABLES;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 SET a=1;
INSERT INTO t1 VALUES (1),(2);

connect (wait,localhost,root,,test);
SET DEBUG_SYNC='after_trx_committed_in_memory SIGNAL c WAIT_FOR ever';
SET DEBUG_SYNC='before_trx_state_committed_in_memory SIGNAL c WAIT_FOR ever';
send TRUNCATE TABLE t1;

connection default;
@@ -17,6 +17,7 @@ SET DEBUG_SYNC='now WAIT_FOR c';
--source include/restart_mysqld.inc
disconnect wait;

SELECT * FROM t1;
--replace_result 2 0
SELECT COUNT(*) FROM t1;
TRUNCATE TABLE t1;
DROP TABLE t1;
@@ -38,6 +38,7 @@ Created 1/8/1996 Heikki Tuuri
#include "row0mysql.h"
#include "pars0pars.h"
#include "trx0roll.h"
#include "trx0undo.h"
#include "ut0vec.h"
#include "dict0priv.h"
#include "fts0priv.h"
@@ -352,61 +353,43 @@ dict_build_table_def_step(
tab_node_t* node) /*!< in: table create node */
{
dict_table_t* table;
dtuple_t* row;
dberr_t err = DB_SUCCESS;

table = node->table;
ut_ad(!dict_table_is_temporary(table));

trx_t* trx = thr_get_trx(thr);
dict_table_assign_new_id(table, trx);

err = dict_build_tablespace_for_table(table, node);

if (err != DB_SUCCESS) {
return(err);
}

row = dict_create_sys_tables_tuple(table, node->heap);

ins_node_set_new_row(node->tab_def, row);

return(err);
}

/** Builds a tablespace to contain a table, using file-per-table=1.
@param[in,out] table Table to build in its own tablespace.
@param[in] node Table create node
@return DB_SUCCESS or error code */
dberr_t
dict_build_tablespace_for_table(
dict_table_t* table,
tab_node_t* node)
{
dberr_t err = DB_SUCCESS;
mtr_t mtr;
ulint space = 0;
bool needs_file_per_table;
char* filepath;

ut_ad(mutex_own(&dict_sys->mutex));

needs_file_per_table
= DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE);

/* Always set this bit for all new created tables */
DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
DICT_TF2_FLAG_UNSET(table,
DICT_TF2_FTS_AUX_HEX_NAME););

if (needs_file_per_table) {
ut_ad(!dict_table_is_temporary(table));
if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE)) {
/* This table will need a new tablespace. */

ut_ad(dict_table_get_format(table) <= UNIV_FORMAT_MAX);
ut_ad(DICT_TF_GET_ZIP_SSIZE(table->flags) == 0
|| dict_table_get_format(table) >= UNIV_FORMAT_B);

ut_ad(trx->table_id);
mtr_t mtr;
trx_undo_t* undo = trx->rsegs.m_redo.insert_undo;
if (undo && !undo->table_id
&& trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE) {
/* This must be a TRUNCATE operation where
the empty table is created after the old table
was renamed. Be sure to mark the transaction
associated with the new empty table, so that
we can remove it on recovery. */
mtr.start();
trx_undo_mark_as_dict(trx, undo, &mtr);
mtr.commit();
log_write_up_to(mtr.commit_lsn(), true);
}
ulint space;
/* Get a new tablespace ID */
dict_hdr_get_new_id(NULL, NULL, &space, table, false);

@@ -416,13 +399,14 @@ dict_build_tablespace_for_table(
);

if (space == ULINT_UNDEFINED) {
return(DB_ERROR);
return DB_ERROR;
}
table->space = static_cast<unsigned int>(space);
table->space = unsigned(space);

/* Determine the tablespace flags. */
bool has_data_dir = DICT_TF_HAS_DATA_DIR(table->flags);
ulint fsp_flags = dict_tf_to_fsp_flags(table->flags);
char* filepath;

if (has_data_dir) {
ut_ad(table->data_dir_path);
@@ -445,7 +429,7 @@ dict_build_tablespace_for_table(
- page 3 will contain the root of the clustered index of
the table we create here. */

err = fil_ibd_create(
dberr_t err = fil_ibd_create(
space, table->name.m_name, filepath, fsp_flags,
FIL_IBD_FILE_INITIAL_SIZE,
node ? node->mode : FIL_ENCRYPTION_DEFAULT,
@@ -454,30 +438,25 @@ dict_build_tablespace_for_table(
ut_free(filepath);

if (err != DB_SUCCESS) {

return(err);
return err;
}

mtr_start(&mtr);
mtr.start();
mtr.set_named_space(table->space);

fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);

mtr_commit(&mtr);
mtr.commit();
} else {
ut_ad(dict_tf_get_rec_format(table->flags)
!= REC_FORMAT_COMPRESSED);
if (dict_table_is_temporary(table)) {
table->space = SRV_TMP_SPACE_ID;
} else {
ut_ad(table->space == srv_sys_space.space_id());
}

DBUG_EXECUTE_IF("ib_ddl_crash_during_tablespace_alloc",
DBUG_SUICIDE(););
ut_ad(table->space == srv_sys_space.space_id());
}

return(DB_SUCCESS);
ins_node_set_new_row(node->tab_def,
dict_create_sys_tables_tuple(table, node->heap));

return DB_SUCCESS;
}

/***************************************************************//**
@@ -1581,9 +1581,14 @@ dict_table_rename_in_cache(
/*=======================*/
dict_table_t* table, /*!< in/out: table */
const char* new_name, /*!< in: new name */
ibool rename_also_foreigns)/*!< in: in ALTER TABLE we want
bool rename_also_foreigns,
/*!< in: in ALTER TABLE we want
to preserve the original table name
in constraints which reference it */
bool replace_new_file)
/*!< in: whether to replace the
file with the new name
(as part of rolling back TRUNCATE) */
{
dberr_t err;
dict_foreign_t* foreign;
@@ -1685,7 +1690,8 @@ dict_table_rename_in_cache(

/* New filepath must not exist. */
err = fil_rename_tablespace_check(
table->space, old_path, new_path, false);
table->space, old_path, new_path, false,
replace_new_file);
if (err != DB_SUCCESS) {
ut_free(old_path);
ut_free(new_path);
@@ -3307,13 +3307,15 @@ if that the old filepath exists and the new filepath does not exist.
@param[in] old_path old filepath
@param[in] new_path new filepath
@param[in] is_discarded whether the tablespace is discarded
@param[in] replace_new whether to ignore the existence of new_path
@return innodb error code */
dberr_t
fil_rename_tablespace_check(
ulint space_id,
const char* old_path,
const char* new_path,
bool is_discarded)
bool is_discarded,
bool replace_new)
{
bool exists = false;
os_file_type_t ftype;
@@ -3330,7 +3332,11 @@ fil_rename_tablespace_check(
}

exists = false;
if (!os_file_status(new_path, &exists, &ftype) || exists) {
if (os_file_status(new_path, &exists, &ftype) && !exists) {
return DB_SUCCESS;
}

if (!replace_new) {
ib::error() << "Cannot rename '" << old_path
<< "' to '" << new_path
<< "' for space ID " << space_id
@@ -3339,6 +3345,34 @@ fil_rename_tablespace_check(
return(DB_TABLESPACE_EXISTS);
}

/* This must be during the ROLLBACK of TRUNCATE TABLE.
Because InnoDB only allows at most one data dictionary
transaction at a time, and because this incomplete TRUNCATE
would have created a new tablespace file, we must remove
a possibly existing tablespace that is associated with the
new tablespace file. */
retry:
mutex_enter(&fil_system->mutex);
for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list);
space; space = UT_LIST_GET_NEXT(space_list, space)) {
ulint id = space->id;
if (id && id < SRV_LOG_SPACE_FIRST_ID
&& space->purpose == FIL_TYPE_TABLESPACE
&& !strcmp(new_path,
UT_LIST_GET_FIRST(space->chain)->name)) {
ib::info() << "TRUNCATE rollback: " << id
<< "," << new_path;
mutex_exit(&fil_system->mutex);
dberr_t err = fil_delete_tablespace(id);
if (err != DB_SUCCESS) {
return err;
}
goto retry;
}
}
mutex_exit(&fil_system->mutex);
fil_delete_file(new_path);

return(DB_SUCCESS);
}

0 comments on commit 75f8e86

Please sign in to comment.