Skip to content

Commit

Permalink
Parallel replication async deadlock kill
Browse files Browse the repository at this point in the history
When a deadlock kill is detected inside the storage engine, the kill
is not done immediately, to avoid calling back into the storage engine
kill_query method with various lock subsystem mutexes held. Instead the
kill is queued and done later by a slave background thread.

This patch in preparation for fixing TokuDB optimistic parallel
replication, as well as for removing locking hacks in InnoDB/XtraDB in
10.2.

Signed-off-by: Kristian Nielsen <knielsen at knielsen-hq.org>
  • Loading branch information
knielsen committed Sep 8, 2016
1 parent a02642b commit 7e0c9de
Show file tree
Hide file tree
Showing 9 changed files with 267 additions and 45 deletions.
11 changes: 11 additions & 0 deletions mysql-test/suite/perfschema/r/threads_mysql.result
Expand Up @@ -44,6 +44,16 @@ processlist_info NULL
unified_parent_thread_id unified parent_thread_id
role NULL
instrumented YES
name thread/sql/slave_background
type BACKGROUND
processlist_user NULL
processlist_host NULL
processlist_db NULL
processlist_command NULL
processlist_info NULL
unified_parent_thread_id unified parent_thread_id
role NULL
instrumented YES
CREATE TEMPORARY TABLE t1 AS
SELECT thread_id FROM performance_schema.threads
WHERE name LIKE 'thread/sql%';
Expand Down Expand Up @@ -105,4 +115,5 @@ parent_thread_name child_thread_name
thread/sql/event_scheduler thread/sql/event_worker
thread/sql/main thread/sql/one_connection
thread/sql/main thread/sql/signal_handler
thread/sql/main thread/sql/slave_background
thread/sql/one_connection thread/sql/event_scheduler
31 changes: 18 additions & 13 deletions sql/mysqld.cc
Expand Up @@ -382,7 +382,7 @@ static bool binlog_format_used= false;
LEX_STRING opt_init_connect, opt_init_slave;
mysql_cond_t COND_thread_cache;
static mysql_cond_t COND_flush_thread_cache;
mysql_cond_t COND_slave_init;
mysql_cond_t COND_slave_background;
static DYNAMIC_ARRAY all_options;

/* Global variables */
Expand Down Expand Up @@ -720,7 +720,7 @@ mysql_mutex_t
LOCK_crypt,
LOCK_global_system_variables,
LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
LOCK_connection_count, LOCK_error_messages, LOCK_slave_init;
LOCK_connection_count, LOCK_error_messages, LOCK_slave_background;

mysql_mutex_t LOCK_stats, LOCK_global_user_client_stats,
LOCK_global_table_stats, LOCK_global_index_stats;
Expand Down Expand Up @@ -902,7 +902,7 @@ PSI_mutex_key key_LOCK_gtid_waiting;

PSI_mutex_key key_LOCK_after_binlog_sync;
PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered,
key_LOCK_slave_init;
key_LOCK_slave_background;
PSI_mutex_key key_TABLE_SHARE_LOCK_share;

static PSI_mutex_info all_server_mutexes[]=
Expand Down Expand Up @@ -968,7 +968,7 @@ static PSI_mutex_info all_server_mutexes[]=
{ &key_LOCK_prepare_ordered, "LOCK_prepare_ordered", PSI_FLAG_GLOBAL},
{ &key_LOCK_after_binlog_sync, "LOCK_after_binlog_sync", PSI_FLAG_GLOBAL},
{ &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL},
{ &key_LOCK_slave_init, "LOCK_slave_init", PSI_FLAG_GLOBAL},
{ &key_LOCK_slave_background, "LOCK_slave_background", PSI_FLAG_GLOBAL},
{ &key_LOG_INFO_lock, "LOG_INFO::lock", 0},
{ &key_LOCK_thread_count, "LOCK_thread_count", PSI_FLAG_GLOBAL},
{ &key_LOCK_thread_cache, "LOCK_thread_cache", PSI_FLAG_GLOBAL},
Expand Down Expand Up @@ -1023,7 +1023,7 @@ PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy;
PSI_cond_key key_COND_rpl_thread_queue, key_COND_rpl_thread,
key_COND_rpl_thread_stop, key_COND_rpl_thread_pool,
key_COND_parallel_entry, key_COND_group_commit_orderer,
key_COND_prepare_ordered, key_COND_slave_init;
key_COND_prepare_ordered, key_COND_slave_background;
PSI_cond_key key_COND_wait_gtid, key_COND_gtid_ignore_duplicates;

static PSI_cond_info all_server_conds[]=
Expand Down Expand Up @@ -1073,15 +1073,15 @@ static PSI_cond_info all_server_conds[]=
{ &key_COND_parallel_entry, "COND_parallel_entry", 0},
{ &key_COND_group_commit_orderer, "COND_group_commit_orderer", 0},
{ &key_COND_prepare_ordered, "COND_prepare_ordered", 0},
{ &key_COND_slave_init, "COND_slave_init", 0},
{ &key_COND_slave_background, "COND_slave_background", 0},
{ &key_COND_wait_gtid, "COND_wait_gtid", 0},
{ &key_COND_gtid_ignore_duplicates, "COND_gtid_ignore_duplicates", 0}
};

PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert,
key_thread_handle_manager, key_thread_main,
key_thread_one_connection, key_thread_signal_hand,
key_thread_slave_init, key_rpl_parallel_thread;
key_thread_slave_background, key_rpl_parallel_thread;

static PSI_thread_info all_server_threads[]=
{
Expand All @@ -1107,7 +1107,7 @@ static PSI_thread_info all_server_threads[]=
{ &key_thread_main, "main", PSI_FLAG_GLOBAL},
{ &key_thread_one_connection, "one_connection", 0},
{ &key_thread_signal_hand, "signal_handler", PSI_FLAG_GLOBAL},
{ &key_thread_slave_init, "slave_init", PSI_FLAG_GLOBAL},
{ &key_thread_slave_background, "slave_background", PSI_FLAG_GLOBAL},
{ &key_rpl_parallel_thread, "rpl_parallel_thread", 0}
};

Expand Down Expand Up @@ -2268,8 +2268,8 @@ static void clean_up_mutexes()
mysql_cond_destroy(&COND_prepare_ordered);
mysql_mutex_destroy(&LOCK_after_binlog_sync);
mysql_mutex_destroy(&LOCK_commit_ordered);
mysql_mutex_destroy(&LOCK_slave_init);
mysql_cond_destroy(&COND_slave_init);
mysql_mutex_destroy(&LOCK_slave_background);
mysql_cond_destroy(&COND_slave_background);
DBUG_VOID_RETURN;
}

Expand Down Expand Up @@ -4638,9 +4638,9 @@ static int init_thread_environment()
MY_MUTEX_INIT_SLOW);
mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered,
MY_MUTEX_INIT_SLOW);
mysql_mutex_init(key_LOCK_slave_init, &LOCK_slave_init,
mysql_mutex_init(key_LOCK_slave_background, &LOCK_slave_background,
MY_MUTEX_INIT_SLOW);
mysql_cond_init(key_COND_slave_init, &COND_slave_init, NULL);
mysql_cond_init(key_COND_slave_background, &COND_slave_background, NULL);

#ifdef HAVE_OPENSSL
mysql_mutex_init(key_LOCK_des_key_file,
Expand Down Expand Up @@ -10131,6 +10131,9 @@ PSI_stage_info stage_waiting_for_rpl_thread_pool= { 0, "Waiting while replicatio
PSI_stage_info stage_master_gtid_wait_primary= { 0, "Waiting in MASTER_GTID_WAIT() (primary waiter)", 0};
PSI_stage_info stage_master_gtid_wait= { 0, "Waiting in MASTER_GTID_WAIT()", 0};
PSI_stage_info stage_gtid_wait_other_connection= { 0, "Waiting for other master connection to process GTID received on multiple master connections", 0};
PSI_stage_info stage_slave_background_process_request= { 0, "Processing requests", 0};
PSI_stage_info stage_slave_background_wait_request= { 0, "Waiting for requests", 0};
PSI_stage_info stage_waiting_for_deadlock_kill= { 0, "Waiting for parallel replication deadlock handling to complete", 0};

#ifdef HAVE_PSI_INTERFACE

Expand Down Expand Up @@ -10255,7 +10258,9 @@ PSI_stage_info *all_server_stages[]=
& stage_waiting_to_get_readlock,
& stage_master_gtid_wait_primary,
& stage_master_gtid_wait,
& stage_gtid_wait_other_connection
& stage_gtid_wait_other_connection,
& stage_slave_background_process_request,
& stage_slave_background_wait_request
};

PSI_socket_key key_socket_tcpip, key_socket_unix, key_socket_client_connection;
Expand Down
11 changes: 7 additions & 4 deletions sql/mysqld.h
Expand Up @@ -341,8 +341,8 @@ extern PSI_cond_key key_COND_wait_gtid, key_COND_gtid_ignore_duplicates;

extern PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert,
key_thread_handle_manager, key_thread_kill_server, key_thread_main,
key_thread_one_connection, key_thread_signal_hand, key_thread_slave_init,
key_rpl_parallel_thread;
key_thread_one_connection, key_thread_signal_hand,
key_thread_slave_background, key_rpl_parallel_thread;

extern PSI_file_key key_file_binlog, key_file_binlog_index, key_file_casetest,
key_file_dbopt, key_file_des_key_file, key_file_ERRMSG, key_select_to_file,
Expand Down Expand Up @@ -488,6 +488,9 @@ extern PSI_stage_info stage_waiting_for_rpl_thread_pool;
extern PSI_stage_info stage_master_gtid_wait_primary;
extern PSI_stage_info stage_master_gtid_wait;
extern PSI_stage_info stage_gtid_wait_other_connection;
extern PSI_stage_info stage_slave_background_process_request;
extern PSI_stage_info stage_slave_background_wait_request;
extern PSI_stage_info stage_waiting_for_deadlock_kill;

#ifdef HAVE_PSI_STATEMENT_INTERFACE
/**
Expand Down Expand Up @@ -556,7 +559,7 @@ extern mysql_mutex_t
LOCK_slave_list, LOCK_active_mi, LOCK_manager,
LOCK_global_system_variables, LOCK_user_conn,
LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_connection_count,
LOCK_slave_init;
LOCK_slave_background;
extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_thread_count;
#ifdef HAVE_OPENSSL
extern char* des_key_file;
Expand All @@ -568,7 +571,7 @@ extern mysql_rwlock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave;
extern mysql_rwlock_t LOCK_system_variables_hash;
extern mysql_cond_t COND_thread_count;
extern mysql_cond_t COND_manager;
extern mysql_cond_t COND_slave_init;
extern mysql_cond_t COND_slave_background;
extern int32 thread_running;
extern int32 thread_count, service_thread_count;

Expand Down
30 changes: 26 additions & 4 deletions sql/rpl_parallel.cc
Expand Up @@ -107,6 +107,25 @@ handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev)
}


/*
Wait for any pending deadlock kills. Since deadlock kills happen
asynchronously, we need to be sure they will be completed before starting a
new transaction. Otherwise the new transaction might suffer a spurious kill.
*/
static void
wait_for_pending_deadlock_kill(THD *thd, rpl_group_info *rgi)
{
PSI_stage_info old_stage;

mysql_mutex_lock(&thd->LOCK_wakeup_ready);
thd->ENTER_COND(&thd->COND_wakeup_ready, &thd->LOCK_wakeup_ready,
&stage_waiting_for_deadlock_kill, &old_stage);
while (rgi->killed_for_retry == rpl_group_info::RETRY_KILL_PENDING)
mysql_cond_wait(&thd->COND_wakeup_ready, &thd->LOCK_wakeup_ready);
thd->EXIT_COND(&old_stage);
}


static void
finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
rpl_parallel_entry *entry, rpl_group_info *rgi)
Expand Down Expand Up @@ -212,6 +231,8 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
entry->stop_on_error_sub_id= sub_id;
mysql_mutex_unlock(&entry->LOCK_parallel_entry);

if (rgi->killed_for_retry == rpl_group_info::RETRY_KILL_PENDING)
wait_for_pending_deadlock_kill(thd, rgi);
thd->clear_error();
thd->reset_killed();
/*
Expand Down Expand Up @@ -604,7 +625,6 @@ convert_kill_to_deadlock_error(rpl_group_info *rgi)
{
thd->clear_error();
my_error(ER_LOCK_DEADLOCK, MYF(0));
rgi->killed_for_retry= false;
thd->reset_killed();
}
}
Expand Down Expand Up @@ -695,14 +715,16 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt,
thd->wait_for_commit_ptr->unregister_wait_for_prior_commit();
DBUG_EXECUTE_IF("inject_mdev8031", {
/* Simulate that we get deadlock killed at this exact point. */
rgi->killed_for_retry= true;
rgi->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED;
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->killed= KILL_CONNECTION;
mysql_mutex_unlock(&thd->LOCK_thd_data);
});
rgi->cleanup_context(thd, 1);
wait_for_pending_deadlock_kill(thd, rgi);
thd->reset_killed();
thd->clear_error();
rgi->killed_for_retry = rpl_group_info::RETRY_KILL_NONE;

/*
If we retry due to a deadlock kill that occurred during the commit step, we
Expand Down Expand Up @@ -841,7 +863,7 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt,
{
/* Simulate that we get deadlock killed during open_binlog(). */
thd->reset_for_next_command();
rgi->killed_for_retry= true;
rgi->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED;
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->killed= KILL_CONNECTION;
mysql_mutex_unlock(&thd->LOCK_thd_data);
Expand Down Expand Up @@ -1741,7 +1763,7 @@ rpl_parallel_thread::get_rgi(Relay_log_info *rli, Gtid_log_event *gtid_ev,
rgi->relay_log= rli->last_inuse_relaylog;
rgi->retry_start_offset= rli->future_event_relay_log_pos-event_size;
rgi->retry_event_count= 0;
rgi->killed_for_retry= false;
rgi->killed_for_retry= rpl_group_info::RETRY_KILL_NONE;

return rgi;
}
Expand Down
7 changes: 6 additions & 1 deletion sql/rpl_rli.h
Expand Up @@ -712,7 +712,12 @@ struct rpl_group_info
*/
SPECULATE_WAIT
} speculation;
bool killed_for_retry;
enum enum_retry_killed {
RETRY_KILL_NONE = 0,
RETRY_KILL_PENDING,
RETRY_KILL_KILLED
};
uchar killed_for_retry;

rpl_group_info(Relay_log_info *rli_);
~rpl_group_info();
Expand Down

0 comments on commit 7e0c9de

Please sign in to comment.