Skip to content

Commit

Permalink
Fixed RocksDB to follow THD ha_data protocol
Browse files Browse the repository at this point in the history
Use thd_get_ha_data()/thd_set_ha_data() which protect against plugin
removal until it has THD ha_data.

Do not reset THD ha_data in rocksdb_close_connection(), cleaner approach
is to let ha_close_connection() do it.

Removed transaction objects cleanup from rocksdb_done_func(). As we lock
plugin properly, there must be no transaction objects during RocksDB
shutdown.
  • Loading branch information
svoj committed May 16, 2019
1 parent 8a880d6 commit a24dffd
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 67 deletions.
67 changes: 12 additions & 55 deletions storage/rocksdb/ha_rocksdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3290,9 +3290,9 @@ void Rdb_snapshot_notifier::SnapshotCreated(
std::multiset<Rdb_transaction *> Rdb_transaction::s_tx_list;
mysql_mutex_t Rdb_transaction::s_tx_list_mutex;

static Rdb_transaction *&get_tx_from_thd(THD *const thd) {
return *reinterpret_cast<Rdb_transaction **>(
my_core::thd_ha_data(thd, rocksdb_hton));
static Rdb_transaction *get_tx_from_thd(THD *const thd) {
return reinterpret_cast<Rdb_transaction *>(
my_core::thd_get_ha_data(thd, rocksdb_hton));
}

namespace {
Expand Down Expand Up @@ -3339,7 +3339,7 @@ class Rdb_perf_context_guard {
*/

static Rdb_transaction *get_or_create_tx(THD *const thd) {
Rdb_transaction *&tx = get_tx_from_thd(thd);
Rdb_transaction *tx = get_tx_from_thd(thd);
// TODO: this is called too many times.. O(#rows)
if (tx == nullptr) {
bool rpl_skip_tx_api= false; // MARIAROCKS_NOT_YET.
Expand All @@ -3354,6 +3354,7 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) {
}
tx->set_params(THDVAR(thd, lock_wait_timeout), THDVAR(thd, max_row_locks));
tx->start_tx();
my_core::thd_set_ha_data(thd, rocksdb_hton, tx);
} else {
tx->set_params(THDVAR(thd, lock_wait_timeout), THDVAR(thd, max_row_locks));
if (!tx->is_tx_started()) {
Expand All @@ -3365,7 +3366,7 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) {
}

static int rocksdb_close_connection(handlerton *const hton, THD *const thd) {
Rdb_transaction *&tx = get_tx_from_thd(thd);
Rdb_transaction *tx = get_tx_from_thd(thd);
if (tx != nullptr) {
int rc = tx->finish_bulk_load(false);
if (rc != 0) {
Expand All @@ -3376,7 +3377,6 @@ static int rocksdb_close_connection(handlerton *const hton, THD *const thd) {
}

delete tx;
tx = nullptr;
}
return HA_EXIT_SUCCESS;
}
Expand Down Expand Up @@ -3444,7 +3444,7 @@ static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx)
{
bool async=false; // This is "ASYNC_COMMIT" feature which is only present in webscalesql

Rdb_transaction *&tx = get_tx_from_thd(thd);
Rdb_transaction *tx = get_tx_from_thd(thd);
if (!tx->can_prepare()) {
return HA_EXIT_FAILURE;
}
Expand Down Expand Up @@ -3695,7 +3695,7 @@ static void rocksdb_commit_ordered(handlerton *hton, THD* thd, bool all)
// Same assert as InnoDB has
DBUG_ASSERT(all || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT |
OPTION_BEGIN)));
Rdb_transaction *&tx = get_tx_from_thd(thd);
Rdb_transaction *tx = get_tx_from_thd(thd);
if (!tx->is_two_phase()) {
/*
ordered_commit is supposedly slower as it is done sequentially
Expand Down Expand Up @@ -3727,7 +3727,7 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx)
rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true);

/* note: h->external_lock(F_UNLCK) is called after this function is called) */
Rdb_transaction *&tx = get_tx_from_thd(thd);
Rdb_transaction *tx = get_tx_from_thd(thd);

/* this will trigger saving of perf_context information */
Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
Expand Down Expand Up @@ -3800,7 +3800,7 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx)

static int rocksdb_rollback(handlerton *const hton, THD *const thd,
bool rollback_tx) {
Rdb_transaction *&tx = get_tx_from_thd(thd);
Rdb_transaction *tx = get_tx_from_thd(thd);
Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));

if (tx != nullptr) {
Expand Down Expand Up @@ -4607,7 +4607,7 @@ static int rocksdb_savepoint(handlerton *const hton, THD *const thd,

static int rocksdb_rollback_to_savepoint(handlerton *const hton, THD *const thd,
void *const savepoint) {
Rdb_transaction *&tx = get_tx_from_thd(thd);
Rdb_transaction *tx = get_tx_from_thd(thd);
return tx->rollback_to_savepoint(savepoint);
}

Expand Down Expand Up @@ -5346,49 +5346,6 @@ static int rocksdb_done_func(void *const p) {
error = 1;
}

/*
MariaDB: When the plugin is unloaded with UNINSTALL SONAME command, some
connections may still have Rdb_transaction objects.
These objects are not genuine transactions (as SQL layer makes sure that
a plugin that is being unloaded has no open tables), they are empty
Rdb_transaction objects that were left there to save on object
creation/deletion.
Go through the list and delete them.
*/
{
class Rdb_trx_deleter: public Rdb_tx_list_walker {
public:
std::set<Rdb_transaction*> rdb_trxs;

void process_tran(const Rdb_transaction *const tx) override {
/*
Check if the transaction is really empty. We only check
non-WriteBatch-based transactions, because there is no easy way to
check WriteBatch-based transactions.
*/
if (!tx->is_writebatch_trx()) {
const auto tx_impl = static_cast<const Rdb_transaction_impl *>(tx);
DBUG_ASSERT(tx_impl);
if (tx_impl->get_rdb_trx())
DBUG_ASSERT(0);
}
rdb_trxs.insert((Rdb_transaction*)tx);
};
} deleter;

Rdb_transaction::walk_tx_list(&deleter);

for (std::set<Rdb_transaction*>::iterator it= deleter.rdb_trxs.begin();
it != deleter.rdb_trxs.end();
++it)
{
// When a transaction is deleted, it removes itself from s_tx_list.
delete *it;
}
}

/*
destructors for static objects can be called at _exit(),
but we want to free the memory at dlclose()
Expand Down Expand Up @@ -13831,7 +13788,7 @@ int rocksdb_check_bulk_load(
return 1;
}

Rdb_transaction *&tx = get_tx_from_thd(thd);
Rdb_transaction *tx = get_tx_from_thd(thd);
if (tx != nullptr) {
const int rc = tx->finish_bulk_load();
if (rc != 0) {
Expand Down
12 changes: 8 additions & 4 deletions storage/rocksdb/mysql-test/rocksdb/r/mariadb_plugin.result
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
# MDEV-14843: Assertion `s_tx_list.size() == 0' failed in myrocks::Rdb_transaction::term_mutex
#
INSTALL SONAME 'ha_rocksdb';
connect con1,localhost,root,,test;
CREATE TABLE t1 (i INT) ENGINE=RocksDB;
insert into t1 values (1);
connect con1,localhost,root,,;
connection con1;
insert into test.t1 values (1);
connection default;
DROP TABLE t1;
connection default;
UNINSTALL SONAME 'ha_rocksdb';
Warnings:
Warning 1620 Plugin is busy and will be uninstalled on shutdown
SELECT ENGINE, SUPPORT FROM INFORMATION_SCHEMA.ENGINES WHERE ENGINE='ROCKSDB';
ENGINE SUPPORT
ROCKSDB NO
disconnect con1;
#
# MDEV-15686: Loading MyRocks plugin back after it has been unloaded causes a crash
#
Expand Down
14 changes: 8 additions & 6 deletions storage/rocksdb/mysql-test/rocksdb/t/mariadb_plugin.test
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,20 @@
INSTALL SONAME 'ha_rocksdb';
--enable_warnings

connect (con1,localhost,root,,test);
CREATE TABLE t1 (i INT) ENGINE=RocksDB;
insert into t1 values (1);

connect (con1,localhost,root,,);
connection con1;
insert into test.t1 values (1);
DROP TABLE t1;

connection default;

# Cleanup
DROP TABLE t1;
UNINSTALL SONAME 'ha_rocksdb';
SELECT ENGINE, SUPPORT FROM INFORMATION_SCHEMA.ENGINES WHERE ENGINE='ROCKSDB';
disconnect con1;
# Unfortunately this is the only more or less reliable way to wait until
# connection done ha_close_connections().
let $wait_condition= SELECT VARIABLE_VALUE=1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='Threads_cached';
--source include/wait_condition.inc

--echo #
--echo # MDEV-15686: Loading MyRocks plugin back after it has been unloaded causes a crash
Expand Down
4 changes: 2 additions & 2 deletions storage/rocksdb/rdb_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ namespace myrocks {
Since we cannot or don't want to change the API in any way, we can use this
mechanism to define readability tokens that look like C++ namespaces, but are
not enforced in any way by the compiler, since the pre-compiler strips them
out. However, on the calling side, code looks like my_core::thd_ha_data()
rather than plain a thd_ha_data() call. This technique adds an immediate
out. However, on the calling side, code looks like my_core::thd_get_ha_data()
rather than plain a thd_get_ha_data() call. This technique adds an immediate
visible cue on what type of API we are calling into.
*/

Expand Down

0 comments on commit a24dffd

Please sign in to comment.