Skip to content

Commit b08448d

Browse files
committed
MDEV-20612: Partition lock_sys.latch
We replace the old lock_sys.mutex (which was renamed to lock_sys.latch) with a combination of a global lock_sys.latch and table or page hash lock mutexes. The global lock_sys.latch can be acquired in exclusive mode, or it can be acquired in shared mode and another mutex will be acquired to protect the locks for a particular page or a table. This is inspired by mysql/mysql-server@1d259b8 but the optimization of lock_release() will be done in the next commit. Also, we will interleave mutexes with the hash table elements, similar to how buf_pool.page_hash was optimized in commit 5155a30 (MDEV-22871). dict_table_t::autoinc_trx: Use Atomic_relaxed. dict_table_t::autoinc_mutex: Use srw_mutex in order to reduce the memory footprint. On 64-bit Linux or OpenBSD, both this and the new dict_table_t::lock_mutex should be 32 bits and be stored in the same 64-bit word. On Microsoft Windows, the underlying SRWLOCK is 32 or 64 bits, and on other systems, sizeof(pthread_mutex_t) can be much larger. ib_lock_t::trx_locks, trx_lock_t::trx_locks: Document the new rules. Writers must assert lock_sys.is_writer() || trx->mutex_is_owner(). LockGuard: A RAII wrapper for acquiring a page hash table lock. LockGGuard: Like LockGuard, but when Galera Write-Set Replication is enabled, we must acquire all shards, for updating arbitrary trx_locks. LockMultiGuard: A RAII wrapper for acquiring two page hash table locks. lock_rec_create_wsrep(), lock_table_create_wsrep(): Special Galera conflict resolution in non-inlined functions in order to keep the common code paths shorter. lock_sys_t::prdt_page_free_from_discard(): Refactored from lock_prdt_page_free_from_discard() and lock_rec_free_all_from_discard_page(). trx_t::commit_tables(): Replaces trx_update_mod_tables_timestamp(). lock_release(): Let trx_t::commit_tables() invalidate the query cache for those tables that were actually modified by the transaction. Merge lock_check_dict_lock() to lock_release(). We must never release lock_sys.latch while holding any lock_sys_t::hash_latch. Failure to do that could lead to memory corruption if the buffer pool is resized between the time lock_sys.latch is released and the hash_latch is released.
1 parent b01d8e1 commit b08448d

File tree

20 files changed

+1109
-831
lines changed

20 files changed

+1109
-831
lines changed

storage/innobase/btr/btr0btr.cc

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3355,11 +3355,10 @@ btr_lift_page_up(
33553355
const page_id_t id{block->page.id()};
33563356
/* Free predicate page locks on the block */
33573357
if (index->is_spatial()) {
3358-
LockMutexGuard g{SRW_LOCK_CALL};
3359-
lock_prdt_page_free_from_discard(
3360-
id, &lock_sys.prdt_page_hash);
3358+
lock_sys.prdt_page_free_from_discard(id);
3359+
} else {
3360+
lock_update_copy_and_discard(*father_block, id);
33613361
}
3362-
lock_update_copy_and_discard(*father_block, id);
33633362
}
33643363

33653364
/* Go upward to root page, decrementing levels by one. */
@@ -3609,10 +3608,7 @@ btr_compress(
36093608
}
36103609

36113610
/* No GAP lock needs to be worrying about */
3612-
LockMutexGuard g{SRW_LOCK_CALL};
3613-
lock_prdt_page_free_from_discard(
3614-
id, &lock_sys.prdt_page_hash);
3615-
lock_rec_free_all_from_discard_page(id);
3611+
lock_sys.prdt_page_free_from_discard(id);
36163612
} else {
36173613
btr_cur_node_ptr_delete(&father_cursor, mtr);
36183614
if (!dict_table_is_locking_disabled(index->table)) {
@@ -3762,10 +3758,7 @@ btr_compress(
37623758
merge_page, mtr);
37633759
}
37643760
const page_id_t id{block->page.id()};
3765-
LockMutexGuard g{SRW_LOCK_CALL};
3766-
lock_prdt_page_free_from_discard(
3767-
id, &lock_sys.prdt_page_hash);
3768-
lock_rec_free_all_from_discard_page(id);
3761+
lock_sys.prdt_page_free_from_discard(id);
37693762
} else {
37703763

37713764
compressed = btr_cur_pessimistic_delete(&err, TRUE,

storage/innobase/btr/btr0cur.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1999,12 +1999,13 @@ btr_cur_search_to_nth_level_func(
19991999
trx_t* trx = thr_get_trx(cursor->thr);
20002000
lock_prdt_t prdt;
20012001

2002-
{
2003-
LockMutexGuard g{SRW_LOCK_CALL};
2004-
lock_init_prdt_from_mbr(
2005-
&prdt, &cursor->rtr_info->mbr, mode,
2006-
trx->lock.lock_heap);
2007-
}
2002+
lock_sys.rd_lock(SRW_LOCK_CALL);
2003+
trx->mutex_lock();
2004+
lock_init_prdt_from_mbr(
2005+
&prdt, &cursor->rtr_info->mbr, mode,
2006+
trx->lock.lock_heap);
2007+
lock_sys.rd_unlock();
2008+
trx->mutex_unlock();
20082009

20092010
if (rw_latch == RW_NO_LATCH && height != 0) {
20102011
block->lock.s_lock();
@@ -3242,8 +3243,7 @@ btr_cur_ins_lock_and_undo(
32423243
/* Use on stack MBR variable to test if a lock is
32433244
needed. If so, the predicate (MBR) will be allocated
32443245
from lock heap in lock_prdt_insert_check_and_lock() */
3245-
lock_init_prdt_from_mbr(
3246-
&prdt, &mbr, 0, NULL);
3246+
lock_init_prdt_from_mbr(&prdt, &mbr, 0, nullptr);
32473247

32483248
if (dberr_t err = lock_prdt_insert_check_and_lock(
32493249
rec, btr_cur_get_block(cursor),

storage/innobase/dict/dict0dict.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,7 +1247,8 @@ inline void dict_sys_t::add(dict_table_t* table)
12471247

12481248
ulint fold = ut_fold_string(table->name.m_name);
12491249

1250-
new (&table->autoinc_mutex) std::mutex();
1250+
table->autoinc_mutex.init();
1251+
table->lock_mutex_init();
12511252

12521253
/* Look for a table with the same name: error if such exists */
12531254
{
@@ -2038,7 +2039,8 @@ void dict_sys_t::remove(dict_table_t* table, bool lru, bool keep)
20382039
UT_DELETE(table->vc_templ);
20392040
}
20402041

2041-
table->autoinc_mutex.~mutex();
2042+
table->autoinc_mutex.destroy();
2043+
table->lock_mutex_destroy();
20422044

20432045
if (keep) {
20442046
return;

storage/innobase/gis/gis0sea.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,9 +1197,7 @@ rtr_check_discard_page(
11971197

11981198
mysql_mutex_unlock(&index->rtr_track->rtr_active_mutex);
11991199

1200-
LockMutexGuard g{SRW_LOCK_CALL};
1201-
lock_prdt_page_free_from_discard(id, &lock_sys.prdt_hash);
1202-
lock_prdt_page_free_from_discard(id, &lock_sys.prdt_page_hash);
1200+
lock_sys.prdt_page_free_from_discard(id, true);
12031201
}
12041202

12051203
/** Structure acts as functor to get the optimistic access of the page.

storage/innobase/handler/ha_innodb.cc

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2253,7 +2253,7 @@ ha_innobase::innobase_reset_autoinc(
22532253
if (error == DB_SUCCESS) {
22542254

22552255
dict_table_autoinc_initialize(m_prebuilt->table, autoinc);
2256-
m_prebuilt->table->autoinc_mutex.unlock();
2256+
m_prebuilt->table->autoinc_mutex.wr_unlock();
22572257
}
22582258

22592259
return(error);
@@ -2685,13 +2685,13 @@ static bool innobase_query_caching_table_check_low(
26852685
For read-only transaction: should satisfy (1) and (3)
26862686
For read-write transaction: should satisfy (1), (2), (3) */
26872687

2688-
if (trx->id && trx->id < table->query_cache_inv_trx_id) {
2688+
const trx_id_t inv = table->query_cache_inv_trx_id;
2689+
2690+
if (trx->id && trx->id < inv) {
26892691
return false;
26902692
}
26912693

2692-
if (trx->read_view.is_open()
2693-
&& trx->read_view.low_limit_id()
2694-
< table->query_cache_inv_trx_id) {
2694+
if (trx->read_view.is_open() && trx->read_view.low_limit_id() < inv) {
26952695
return false;
26962696
}
26972697

@@ -5359,7 +5359,7 @@ initialize_auto_increment(dict_table_t* table, const Field* field)
53595359

53605360
const unsigned col_no = innodb_col_no(field);
53615361

5362-
table->autoinc_mutex.lock();
5362+
table->autoinc_mutex.wr_lock();
53635363

53645364
table->persistent_autoinc = static_cast<uint16_t>(
53655365
dict_table_get_nth_col_pos(table, col_no, NULL) + 1)
@@ -5390,7 +5390,7 @@ initialize_auto_increment(dict_table_t* table, const Field* field)
53905390
innobase_get_int_col_max_value(field));
53915391
}
53925392

5393-
table->autoinc_mutex.unlock();
5393+
table->autoinc_mutex.wr_unlock();
53945394
}
53955395

53965396
/** Open an InnoDB table
@@ -7192,7 +7192,7 @@ ha_innobase::innobase_lock_autoinc(void)
71927192
switch (innobase_autoinc_lock_mode) {
71937193
case AUTOINC_NO_LOCKING:
71947194
/* Acquire only the AUTOINC mutex. */
7195-
m_prebuilt->table->autoinc_mutex.lock();
7195+
m_prebuilt->table->autoinc_mutex.wr_lock();
71967196
break;
71977197

71987198
case AUTOINC_NEW_STYLE_LOCKING:
@@ -7206,14 +7206,14 @@ ha_innobase::innobase_lock_autoinc(void)
72067206
case SQLCOM_REPLACE:
72077207
case SQLCOM_END: // RBR event
72087208
/* Acquire the AUTOINC mutex. */
7209-
m_prebuilt->table->autoinc_mutex.lock();
7209+
m_prebuilt->table->autoinc_mutex.wr_lock();
72107210
/* We need to check that another transaction isn't
72117211
already holding the AUTOINC lock on the table. */
72127212
if (!m_prebuilt->table->n_waiting_or_granted_auto_inc_locks) {
72137213
/* Do not fall back to old style locking. */
72147214
DBUG_RETURN(error);
72157215
}
7216-
m_prebuilt->table->autoinc_mutex.unlock();
7216+
m_prebuilt->table->autoinc_mutex.wr_unlock();
72177217
}
72187218
/* Use old style locking. */
72197219
/* fall through */
@@ -7225,7 +7225,7 @@ ha_innobase::innobase_lock_autoinc(void)
72257225
if (error == DB_SUCCESS) {
72267226

72277227
/* Acquire the AUTOINC mutex. */
7228-
m_prebuilt->table->autoinc_mutex.lock();
7228+
m_prebuilt->table->autoinc_mutex.wr_lock();
72297229
}
72307230
break;
72317231

@@ -7253,7 +7253,7 @@ ha_innobase::innobase_set_max_autoinc(
72537253
if (error == DB_SUCCESS) {
72547254

72557255
dict_table_autoinc_update_if_greater(m_prebuilt->table, auto_inc);
7256-
m_prebuilt->table->autoinc_mutex.unlock();
7256+
m_prebuilt->table->autoinc_mutex.wr_unlock();
72577257
}
72587258

72597259
return(error);
@@ -12634,7 +12634,7 @@ create_table_info_t::create_table_update_dict()
1263412634
autoinc = 1;
1263512635
}
1263612636

12637-
innobase_table->autoinc_mutex.lock();
12637+
innobase_table->autoinc_mutex.wr_lock();
1263812638
dict_table_autoinc_initialize(innobase_table, autoinc);
1263912639

1264012640
if (innobase_table->is_temporary()) {
@@ -12662,7 +12662,7 @@ create_table_info_t::create_table_update_dict()
1266212662
}
1266312663
}
1266412664

12665-
innobase_table->autoinc_mutex.unlock();
12665+
innobase_table->autoinc_mutex.wr_unlock();
1266612666
}
1266712667

1266812668
innobase_parse_hint_from_comment(m_thd, innobase_table, m_form->s);
@@ -15910,7 +15910,7 @@ ha_innobase::innobase_get_autoinc(
1591015910
/* It should have been initialized during open. */
1591115911
if (*value == 0) {
1591215912
m_prebuilt->autoinc_error = DB_UNSUPPORTED;
15913-
m_prebuilt->table->autoinc_mutex.unlock();
15913+
m_prebuilt->table->autoinc_mutex.wr_unlock();
1591415914
}
1591515915
}
1591615916

@@ -15934,7 +15934,7 @@ ha_innobase::innobase_peek_autoinc(void)
1593415934

1593515935
innodb_table = m_prebuilt->table;
1593615936

15937-
innodb_table->autoinc_mutex.lock();
15937+
innodb_table->autoinc_mutex.wr_lock();
1593815938

1593915939
auto_inc = dict_table_autoinc_read(innodb_table);
1594015940

@@ -15943,7 +15943,7 @@ ha_innobase::innobase_peek_autoinc(void)
1594315943
" '" << innodb_table->name << "'";
1594415944
}
1594515945

15946-
innodb_table->autoinc_mutex.unlock();
15946+
innodb_table->autoinc_mutex.wr_unlock();
1594715947

1594815948
return(auto_inc);
1594915949
}
@@ -16050,7 +16050,7 @@ ha_innobase::get_auto_increment(
1605016050
/* Out of range number. Let handler::update_auto_increment()
1605116051
take care of this */
1605216052
m_prebuilt->autoinc_last_value = 0;
16053-
m_prebuilt->table->autoinc_mutex.unlock();
16053+
m_prebuilt->table->autoinc_mutex.wr_unlock();
1605416054
*nb_reserved_values= 0;
1605516055
return;
1605616056
}
@@ -16093,7 +16093,7 @@ ha_innobase::get_auto_increment(
1609316093
m_prebuilt->autoinc_offset = offset;
1609416094
m_prebuilt->autoinc_increment = increment;
1609516095

16096-
m_prebuilt->table->autoinc_mutex.unlock();
16096+
m_prebuilt->table->autoinc_mutex.wr_unlock();
1609716097
}
1609816098

1609916099
/*******************************************************************//**
@@ -17998,7 +17998,6 @@ int wsrep_innobase_kill_one_trx(THD *bf_thd, trx_t *victim_trx, bool signal)
1799817998
{
1799917999
ut_ad(bf_thd);
1800018000
ut_ad(victim_trx);
18001-
lock_sys.assert_locked();
1800218001
ut_ad(victim_trx->mutex_is_owner());
1800318002

1800418003
DBUG_ENTER("wsrep_innobase_kill_one_trx");
@@ -18059,6 +18058,7 @@ int wsrep_innobase_kill_one_trx(THD *bf_thd, trx_t *victim_trx, bool signal)
1805918058
} else if (victim_trx->lock.wait_lock) {
1806018059
mysql_mutex_lock(&lock_sys.wait_mutex);
1806118060
if (lock_t* wait_lock = victim_trx->lock.wait_lock) {
18061+
lock_sys.assert_locked(*wait_lock);
1806218062
DBUG_ASSERT(victim_trx->is_wsrep());
1806318063
WSREP_DEBUG("victim has wait flag: %lu",
1806418064
thd_get_thread_id(thd));

storage/innobase/ibuf/ibuf0ibuf.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3280,8 +3280,8 @@ ibuf_insert_low(
32803280
ibuf_mtr_commit(&bitmap_mtr);
32813281
goto fail_exit;
32823282
} else {
3283-
LockMutexGuard g{SRW_LOCK_CALL};
3284-
if (lock_sys.get_first(page_id)) {
3283+
LockGuard g{lock_sys.rec_hash, page_id};
3284+
if (lock_sys.rec_hash.get_first(page_id)) {
32853285
goto commit_exit;
32863286
}
32873287
}

0 commit comments

Comments
 (0)