From 5569132ffebba3fd2e37964543f658ed24d8caaf Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Tue, 19 Aug 2014 19:28:35 +0300 Subject: [PATCH] MDEV-6450 - MariaDB crash on Power8 when built with advance tool chain Part of this work is based on Stewart Smitch's memory barrier and lower priori patches for power8. - Added memory syncronization for innodb & xtradb for power8. - Added HAVE_WINDOWS_MM_FENCE to CMakeList.txt - Added os_isync to fix a syncronization problem on power - Added log_get_lsn_nowait which is now used srv_error_monitor_thread to ensur if log mutex is locked. All changes done both for InnoDB and Xtradb --- include/my_cpu.h | 44 ++++++++++++++++++++++++ storage/innobase/CMakeLists.txt | 39 ++++++++++++++++++++++ storage/innobase/include/log0log.h | 7 ++++ storage/innobase/include/log0log.ic | 20 +++++++++++ storage/innobase/include/os0sync.h | 48 +++++++++++++++++++++++++++ storage/innobase/include/sync0rw.ic | 6 ++-- storage/innobase/include/sync0sync.ic | 3 ++ storage/innobase/srv/srv0srv.c | 13 ++++++-- storage/innobase/sync/sync0arr.c | 3 ++ storage/innobase/sync/sync0rw.c | 22 ++++++++++-- storage/innobase/sync/sync0sync.c | 7 +++- storage/xtradb/CMakeLists.txt | 39 ++++++++++++++++++++++ storage/xtradb/include/log0log.h | 7 ++++ storage/xtradb/include/log0log.ic | 20 +++++++++++ storage/xtradb/include/os0sync.h | 48 +++++++++++++++++++++++++++ storage/xtradb/include/sync0rw.ic | 6 ++-- storage/xtradb/include/sync0sync.ic | 3 ++ storage/xtradb/srv/srv0srv.c | 13 ++++++-- storage/xtradb/sync/sync0arr.c | 3 ++ storage/xtradb/sync/sync0rw.c | 20 +++++++++-- storage/xtradb/sync/sync0sync.c | 7 +++- 21 files changed, 359 insertions(+), 19 deletions(-) create mode 100644 include/my_cpu.h diff --git a/include/my_cpu.h b/include/my_cpu.h new file mode 100644 index 0000000000000..026b92c1b7452 --- /dev/null +++ b/include/my_cpu.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2013, MariaDB foundation Ab and SkySQL + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1307 USA +*/ + +/* instructions for specific cpu's */ + +/* + Macros for adjusting thread priority (hardware multi-threading) + The defines are the same ones used by the linux kernel +*/ + +#if defined(__powerpc__) +/* Very low priority */ +#define HMT_very_low() asm volatile("or 31,31,31") +/* Low priority */ +#define HMT_low() asm volatile("or 1,1,1") +/* Medium low priority */ +#define HMT_medium_low() asm volatile("or 6,6,6") +/* Medium priority */ +#define HMT_medium() asm volatile("or 2,2,2") +/* Medium high priority */ +#define HMT_medium_high() asm volatile("or 5,5,5") +/* High priority */ +#define HMT_high() asm volatile("or 3,3,3") +#else +#define HMT_very_low() +#define HMT_low() +#define HMT_medium_low() +#define HMT_medium() +#define HMT_medium_high() +#define HMT_high() +#endif diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index e4455630bc821..1e1b74f82cd51 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -85,12 +85,39 @@ IF(NOT CMAKE_CROSSCOMPILING) }" HAVE_IB_GCC_ATOMIC_BUILTINS ) + CHECK_C_SOURCE_RUNS( + "#include + int main() + { + __sync_synchronize(); + return(0); + }" + HAVE_IB_GCC_SYNC_SYNCHRONISE + ) + CHECK_C_SOURCE_RUNS( + "#include + int main() + { + __atomic_thread_fence(__ATOMIC_ACQUIRE); + __atomic_thread_fence(__ATOMIC_RELEASE); + return(0); + }" + HAVE_IB_GCC_ATOMIC_THREAD_FENCE + ) ENDIF() IF(HAVE_IB_GCC_ATOMIC_BUILTINS) ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1) ENDIF() +IF(HAVE_IB_GCC_SYNC_SYNCHRONISE) + ADD_DEFINITIONS(-DHAVE_IB_GCC_SYNC_SYNCHRONISE=1) +ENDIF() + +IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE) + ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1) +ENDIF() + # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not IF(NOT CMAKE_CROSSCOMPILING) CHECK_C_SOURCE_RUNS( @@ -169,10 +196,21 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") return(0); } " HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) + CHECK_C_SOURCE_COMPILES( + "#include + int main() { + __machine_r_barrier(); + __machine_w_barrier(); + return(0); + }" + HAVE_IB_MACHINE_BARRIER_SOLARIS) ENDIF() IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1) ENDIF() + IF(HAVE_IB_MACHINE_BARRIER_SOLARIS) + ADD_DEFINITIONS(-DHAVE_IB_MACHINE_BARRIER_SOLARIS=1) + ENDIF() ENDIF() @@ -190,6 +228,7 @@ ENDIF() IF(MSVC) ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS) + ADD_DEFINITIONS(-DHAVE_WINDOWS_MM_FENCE) ENDIF() diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index b0e5e9bda3b54..fab91b5c5c275 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -154,6 +154,13 @@ UNIV_INLINE ib_uint64_t log_get_lsn(void); /*=============*/ +/************************************************************//** +Gets the current lsn. +@return current lsn */ +UNIV_INLINE +lsn_t +log_get_lsn_nowait(void); +/*=============*/ /**************************************************************** Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic index 8eca3b911d037..a55bbeff78922 100644 --- a/storage/innobase/include/log0log.ic +++ b/storage/innobase/include/log0log.ic @@ -411,6 +411,26 @@ log_get_lsn(void) return(lsn); } +/************************************************************//** +Gets the current lsn with a trylock +@return current lsn or 0 if false*/ +UNIV_INLINE +lsn_t +log_get_lsn_nowait(void) +/*=============*/ +{ + lsn_t lsn; + + if (mutex_enter_nowait(&(log_sys->mutex))) + return 0; + + lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + return(lsn); +} + /**************************************************************** Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h index c6672aa73b663..32fd27960f30d 100644 --- a/storage/innobase/include/os0sync.h +++ b/storage/innobase/include/os0sync.h @@ -416,6 +416,54 @@ clobbered */ "Mutexes and rw_locks use InnoDB's own implementation" #endif +/** barrier definitions for memory ordering */ +#ifdef HAVE_IB_GCC_ATOMIC_THREAD_FENCE +# define HAVE_MEMORY_BARRIER +# define os_rmb __atomic_thread_fence(__ATOMIC_ACQUIRE) +# define os_wmb __atomic_thread_fence(__ATOMIC_RELEASE) +#ifdef __powerpc__ +# define os_isync __asm __volatile ("isync":::"memory") +#else +#define os_isync do { } while(0) +#endif + +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "GCC builtin __atomic_thread_fence() is used for memory barrier" + +#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE) +# define HAVE_MEMORY_BARRIER +# define os_rmb __sync_synchronize() +# define os_wmb __sync_synchronize() +# define os_isync __sync_synchronize() +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "GCC builtin __sync_synchronize() is used for memory barrier" + +#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS) +# define HAVE_MEMORY_BARRIER +# include +# define os_rmb __machine_r_barrier() +# define os_wmb __machine_w_barrier() +# define os_isync os_rmb; os_wmb +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "Soralis memory ordering functions are used for memory barrier" + +#elif defined(HAVE_WINDOWS_MM_FENCE) +# define HAVE_MEMORY_BARRIER +# include +# define os_rmb _mm_lfence() +# define os_wmb _mm_sfence() +# define os_isync os_rmb; os_wmb +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "_mm_lfence() and _mm_sfence() are used for memory barrier" + +#else +# define os_rmb do { } while(0) +# define os_wmb do { } while(0) +# define os_isync do { } while(0) +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "Memory barrier is not used" +#endif + #ifndef UNIV_NONINL #include "os0sync.ic" #endif diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic index 28d0611a673d5..a25aa19d3aa06 100644 --- a/storage/innobase/include/sync0rw.ic +++ b/storage/innobase/include/sync0rw.ic @@ -200,14 +200,14 @@ rw_lock_lock_word_decr( ulint amount) /*!< in: amount to decrement */ { #ifdef INNODB_RW_LOCKS_USE_ATOMICS - lint local_lock_word = lock->lock_word; - while (local_lock_word > 0) { + lint local_lock_word; + os_rmb; + while ((local_lock_word= lock->lock_word) > 0) { if (os_compare_and_swap_lint(&lock->lock_word, local_lock_word, local_lock_word - amount)) { return(TRUE); } - local_lock_word = lock->lock_word; } return(FALSE); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic index 6958faa5c6f9e..ec352e8a5fef6 100644 --- a/storage/innobase/include/sync0sync.ic +++ b/storage/innobase/include/sync0sync.ic @@ -92,6 +92,7 @@ mutex_test_and_set( ut_a(mutex->lock_word == 0); mutex->lock_word = 1; + os_wmb; } return((byte)ret); @@ -147,6 +148,7 @@ mutex_get_waiters( ptr = &(mutex->waiters); + os_rmb; return(*ptr); /* Here we assume that the read of a single word from memory is atomic */ } @@ -181,6 +183,7 @@ mutex_exit_func( to wake up possible hanging threads if they are missed in mutex_signal_object. */ + os_isync; if (mutex_get_waiters(mutex) != 0) { mutex_signal_object(mutex); diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 739535be849cb..90f72f5adf321 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -411,7 +411,12 @@ UNIV_INTERN ibool srv_use_checksums = TRUE; UNIV_INTERN ulong srv_replication_delay = 0; /*-------------------------------------------*/ +#ifdef HAVE_MEMORY_BARRIER +/* No idea to wait long with memory barriers */ +UNIV_INTERN ulong srv_n_spin_wait_rounds = 15; +#else UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; +#endif UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; UNIV_INTERN ulong srv_thread_sleep_delay = 10000; UNIV_INTERN ulong srv_spin_wait_delay = 6; @@ -2459,9 +2464,10 @@ srv_error_monitor_thread( /* Try to track a strange bug reported by Harald Fuchs and others, where the lsn seems to decrease at times */ - new_lsn = log_get_lsn(); + /* We have to use nowait to ensure we don't block */ + new_lsn= log_get_lsn_nowait(); - if (new_lsn < old_lsn) { + if (new_lsn && new_lsn < old_lsn) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Error: old log sequence number %llu" @@ -2473,7 +2479,8 @@ srv_error_monitor_thread( ut_ad(0); } - old_lsn = new_lsn; + if (new_lsn) + old_lsn = new_lsn; if (difftime(time(NULL), srv_last_monitor_time) > 60) { /* We referesh InnoDB Monitor values so that averages are diff --git a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c index ea4d496e6b592..ba1d5cd8e61fa 100644 --- a/storage/innobase/sync/sync0arr.c +++ b/storage/innobase/sync/sync0arr.c @@ -791,6 +791,7 @@ sync_arr_cell_can_wake_up( lock = cell->wait_object; + os_rmb; if (lock->lock_word > 0) { /* Either unlocked or only read locked. */ @@ -802,6 +803,7 @@ sync_arr_cell_can_wake_up( lock = cell->wait_object; /* lock_word == 0 means all readers have left */ + os_rmb; if (lock->lock_word == 0) { return(TRUE); @@ -810,6 +812,7 @@ sync_arr_cell_can_wake_up( lock = cell->wait_object; /* lock_word > 0 means no writer or reserved writer */ + os_rmb; if (lock->lock_word > 0) { return(TRUE); diff --git a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c index 8de9b40ef6745..21e3d57287a50 100644 --- a/storage/innobase/sync/sync0rw.c +++ b/storage/innobase/sync/sync0rw.c @@ -40,6 +40,7 @@ Created 9/11/1995 Heikki Tuuri #include "srv0srv.h" #include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */ #include "ha_prototypes.h" +#include "my_cpu.h" /* IMPLEMENTATION OF THE RW_LOCK @@ -390,15 +391,19 @@ rw_lock_s_lock_spin( lock_loop: /* Spin waiting for the writer field to become free */ + os_rmb; + HMT_low(); while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } i++; + os_rmb; } - - if (i == SYNC_SPIN_ROUNDS) { + HMT_medium(); + if (lock->lock_word <= 0) + { os_thread_yield(); } @@ -498,16 +503,19 @@ rw_lock_x_lock_wait( ulint index; ulint i = 0; + os_rmb; ut_ad(lock->lock_word <= 0); - + HMT_low(); while (lock->lock_word < 0) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } if(i < SYNC_SPIN_ROUNDS) { i++; + os_rmb; continue; } + HMT_medium(); /* If there is still a reader, then go to sleep.*/ rw_x_spin_round_count += i; @@ -544,7 +552,9 @@ rw_lock_x_lock_wait( sync_array_free_cell(sync_primary_wait_array, index); } + HMT_low(); } + HMT_medium(); rw_x_spin_round_count += i; } @@ -582,6 +592,8 @@ rw_lock_x_lock_low( file_name, line); } else { + if (!pass) + os_rmb; /* Decrement failed: relock or failed lock */ if (!pass && lock->recursive && os_thread_eq(lock->writer_thread, curr_thread)) { @@ -647,6 +659,8 @@ rw_lock_x_lock_func( } /* Spin waiting for the lock_word to become free */ + os_rmb; + HMT_low(); while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { if (srv_spin_wait_delay) { @@ -655,7 +669,9 @@ rw_lock_x_lock_func( } i++; + os_rmb; } + HMT_medium(); if (i == SYNC_SPIN_ROUNDS) { os_thread_yield(); } else { diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c index fba43ad859c3a..cd81cccfc5ae2 100644 --- a/storage/innobase/sync/sync0sync.c +++ b/storage/innobase/sync/sync0sync.c @@ -47,6 +47,7 @@ Created 9/5/1995 Heikki Tuuri # include "srv0start.h" /* srv_is_being_started */ #endif /* UNIV_SYNC_DEBUG */ #include "ha_prototypes.h" +#include "my_cpu.h" /* REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX @@ -473,6 +474,8 @@ mutex_set_waiters( ptr = &(mutex->waiters); + os_wmb; + *ptr = n; /* Here we assume that the write of a single word in memory is atomic */ } @@ -520,13 +523,15 @@ mutex_spin_wait( spin_loop: ut_d(mutex->count_spin_loop++); + HMT_low(); while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } - + os_rmb; // Ensure future reads sees new values i++; } + HMT_medium(); if (i == SYNC_SPIN_ROUNDS) { #ifdef UNIV_DEBUG diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt index 5cbdddfa778f0..50f05159d109e 100644 --- a/storage/xtradb/CMakeLists.txt +++ b/storage/xtradb/CMakeLists.txt @@ -117,6 +117,25 @@ IF(NOT CMAKE_CROSSCOMPILING) }" HAVE_IB_GCC_ATOMIC_BUILTINS_64 ) + CHECK_C_SOURCE_RUNS( + "#include + int main() + { + __sync_synchronize(); + return(0); + }" + HAVE_IB_GCC_SYNC_SYNCHRONISE + ) + CHECK_C_SOURCE_RUNS( + "#include + int main() + { + __atomic_thread_fence(__ATOMIC_ACQUIRE); + __atomic_thread_fence(__ATOMIC_RELEASE); + return(0); + }" + HAVE_IB_GCC_ATOMIC_THREAD_FENCE + ) ENDIF() IF(HAVE_IB_GCC_ATOMIC_BUILTINS) @@ -127,6 +146,14 @@ IF(HAVE_IB_GCC_ATOMIC_BUILTINS_64) ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_64=1) ENDIF() +IF(HAVE_IB_GCC_SYNC_SYNCHRONISE) + ADD_DEFINITIONS(-DHAVE_IB_GCC_SYNC_SYNCHRONISE=1) +ENDIF() + +IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE) + ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1) +ENDIF() + # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not IF(NOT CMAKE_CROSSCOMPILING) CHECK_C_SOURCE_RUNS( @@ -205,10 +232,21 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") return(0); } " HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) + CHECK_C_SOURCE_COMPILES( + "#include + int main() { + __machine_r_barrier(); + __machine_w_barrier(); + return(0); + }" + HAVE_IB_MACHINE_BARRIER_SOLARIS) ENDIF() IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1) ENDIF() + IF(HAVE_IB_MACHINE_BARRIER_SOLARIS) + ADD_DEFINITIONS(-DHAVE_IB_MACHINE_BARRIER_SOLARIS=1) + ENDIF() ENDIF() @@ -226,6 +264,7 @@ ENDIF() IF(MSVC) ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS) + ADD_DEFINITIONS(-DHAVE_WINDOWS_MM_FENCE) # Avoid "unreferenced label" warning in generated file GET_FILENAME_COMPONENT(_SRC_DIR ${CMAKE_CURRENT_LIST_FILE} PATH) diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index 18ae51cb0cafc..450d19e2a572c 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -162,6 +162,13 @@ UNIV_INLINE ib_uint64_t log_get_lsn(void); /*=============*/ +/************************************************************//** +Gets the current lsn. +@return current lsn */ +UNIV_INLINE +lsn_t +log_get_lsn_nowait(void); +/*=============*/ /**************************************************************** Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic index 4a881b1a032aa..e4218861b6627 100644 --- a/storage/xtradb/include/log0log.ic +++ b/storage/xtradb/include/log0log.ic @@ -426,6 +426,26 @@ log_get_lsn(void) return(lsn); } +/************************************************************//** +Gets the current lsn with a trylock +@return current lsn or 0 if false*/ +UNIV_INLINE +lsn_t +log_get_lsn_nowait(void) +/*=============*/ +{ + lsn_t lsn; + + if (mutex_enter_nowait(&(log_sys->mutex))) + return 0; + + lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + return(lsn); +} + /**************************************************************** Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 60ee5dca08f1c..1bd0efe9a185a 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -436,6 +436,54 @@ clobbered */ "Mutexes and rw_locks use InnoDB's own implementation" #endif +/** barrier definitions for memory ordering */ +#ifdef HAVE_IB_GCC_ATOMIC_THREAD_FENCE +# define HAVE_MEMORY_BARRIER +# define os_rmb __atomic_thread_fence(__ATOMIC_ACQUIRE) +# define os_wmb __atomic_thread_fence(__ATOMIC_RELEASE) +#ifdef __powerpc__ +# define os_isync __asm __volatile ("isync":::"memory") +#else +#define os_isync do { } while(0) +#endif + +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "GCC builtin __atomic_thread_fence() is used for memory barrier" + +#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE) +# define HAVE_MEMORY_BARRIER +# define os_rmb __sync_synchronize() +# define os_wmb __sync_synchronize() +# define os_isync __sync_synchronize() +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "GCC builtin __sync_synchronize() is used for memory barrier" + +#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS) +# define HAVE_MEMORY_BARRIER +# include +# define os_rmb __machine_r_barrier() +# define os_wmb __machine_w_barrier() +# define os_isync os_rmb; os_wmb +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "Soralis memory ordering functions are used for memory barrier" + +#elif defined(HAVE_WINDOWS_MM_FENCE) +# define HAVE_MEMORY_BARRIER +# include +# define os_rmb _mm_lfence() +# define os_wmb _mm_sfence() +# define os_isync os_rmb; os_wmb +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "_mm_lfence() and _mm_sfence() are used for memory barrier" + +#else +# define os_rmb do { } while(0) +# define os_wmb do { } while(0) +# define os_isync do { } while(0) +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "Memory barrier is not used" +#endif + #ifndef UNIV_NONINL #include "os0sync.ic" #endif diff --git a/storage/xtradb/include/sync0rw.ic b/storage/xtradb/include/sync0rw.ic index 706ccbc00de05..08baee30670f0 100644 --- a/storage/xtradb/include/sync0rw.ic +++ b/storage/xtradb/include/sync0rw.ic @@ -200,14 +200,14 @@ rw_lock_lock_word_decr( ulint amount) /*!< in: amount to decrement */ { #ifdef INNODB_RW_LOCKS_USE_ATOMICS - lint local_lock_word = lock->lock_word; - while (local_lock_word > 0) { + lint local_lock_word; + os_rmb; + while ((local_lock_word= lock->lock_word) > 0) { if (os_compare_and_swap_lint(&lock->lock_word, local_lock_word, local_lock_word - amount)) { return(TRUE); } - local_lock_word = lock->lock_word; } return(FALSE); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ diff --git a/storage/xtradb/include/sync0sync.ic b/storage/xtradb/include/sync0sync.ic index 73e7379cac18e..acc389bfad97a 100644 --- a/storage/xtradb/include/sync0sync.ic +++ b/storage/xtradb/include/sync0sync.ic @@ -92,6 +92,7 @@ mutex_test_and_set( ut_a(mutex->lock_word == 0); mutex->lock_word = 1; + os_wmb; } return((byte)ret); @@ -147,6 +148,7 @@ mutex_get_waiters( ptr = &(mutex->waiters); + os_rmb; return(*ptr); /* Here we assume that the read of a single word from memory is atomic */ } @@ -181,6 +183,7 @@ mutex_exit_func( to wake up possible hanging threads if they are missed in mutex_signal_object. */ + os_isync; if (mutex_get_waiters(mutex) != 0) { mutex_signal_object(mutex); diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index d3598185ecc8a..9555466c9bf59 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -440,7 +440,12 @@ UNIV_INTERN ulong srv_pass_corrupt_table = 0; /* 0:disable 1:enable */ UNIV_INTERN ulint srv_dict_size_limit = 0; /*-------------------------------------------*/ +#ifdef HAVE_MEMORY_BARRIER +/* No idea to wait long with memory barriers */ +UNIV_INTERN ulong srv_n_spin_wait_rounds = 15; +#else UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; +#endif UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; UNIV_INTERN ulong srv_thread_sleep_delay = 10000; UNIV_INTERN ulong srv_spin_wait_delay = 6; @@ -2929,9 +2934,10 @@ srv_error_monitor_thread( /* Try to track a strange bug reported by Harald Fuchs and others, where the lsn seems to decrease at times */ - new_lsn = log_get_lsn(); + /* We have to use nowait to ensure we don't block */ + new_lsn= log_get_lsn_nowait(); - if (new_lsn < old_lsn) { + if (new_lsn && new_lsn < old_lsn) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Error: old log sequence number %llu" @@ -2943,7 +2949,8 @@ srv_error_monitor_thread( ut_ad(0); } - old_lsn = new_lsn; + if (new_lsn) + old_lsn = new_lsn; if (difftime(time(NULL), srv_last_monitor_time) > 60) { /* We referesh InnoDB Monitor values so that averages are diff --git a/storage/xtradb/sync/sync0arr.c b/storage/xtradb/sync/sync0arr.c index 7f27fd926ebb5..503cc38d0da29 100644 --- a/storage/xtradb/sync/sync0arr.c +++ b/storage/xtradb/sync/sync0arr.c @@ -815,6 +815,7 @@ sync_arr_cell_can_wake_up( lock = cell->wait_object; + os_rmb; if (lock->lock_word > 0) { /* Either unlocked or only read locked. */ @@ -826,6 +827,7 @@ sync_arr_cell_can_wake_up( lock = cell->wait_object; /* lock_word == 0 means all readers have left */ + os_rmb; if (lock->lock_word == 0) { return(TRUE); @@ -834,6 +836,7 @@ sync_arr_cell_can_wake_up( lock = cell->wait_object; /* lock_word > 0 means no writer or reserved writer */ + os_rmb; if (lock->lock_word > 0) { return(TRUE); diff --git a/storage/xtradb/sync/sync0rw.c b/storage/xtradb/sync/sync0rw.c index 5068d1679c0a9..17f37edc56d3f 100644 --- a/storage/xtradb/sync/sync0rw.c +++ b/storage/xtradb/sync/sync0rw.c @@ -40,6 +40,7 @@ Created 9/11/1995 Heikki Tuuri #include "srv0srv.h" #include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */ #include "ha_prototypes.h" +#include "my_cpu.h" /* IMPLEMENTATION OF THE RW_LOCK @@ -390,15 +391,19 @@ rw_lock_s_lock_spin( lock_loop: /* Spin waiting for the writer field to become free */ + os_rmb; + HMT_low(); while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } i++; + os_rmb; } - if (i == SYNC_SPIN_ROUNDS) { + HMT_medium(); + if (lock->lock_word <= 0) { os_thread_yield(); } @@ -495,16 +500,19 @@ rw_lock_x_lock_wait( ulint index; ulint i = 0; + os_rmb; ut_ad(lock->lock_word <= 0); - + HMT_low(); while (lock->lock_word < 0) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } if(i < SYNC_SPIN_ROUNDS) { i++; + os_rmb; continue; } + HMT_medium(); /* If there is still a reader, then go to sleep.*/ rw_x_spin_round_count += i; @@ -541,7 +549,9 @@ rw_lock_x_lock_wait( sync_array_free_cell(sync_primary_wait_array, index); } + HMT_low(); } + HMT_medium(); rw_x_spin_round_count += i; } @@ -579,6 +589,8 @@ rw_lock_x_lock_low( file_name, line); } else { + if (!pass) + os_rmb; /* Decrement failed: relock or failed lock */ if (!pass && lock->recursive && os_thread_eq(lock->writer_thread, curr_thread)) { @@ -644,6 +656,8 @@ rw_lock_x_lock_func( } /* Spin waiting for the lock_word to become free */ + os_rmb; + HMT_low(); while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { if (srv_spin_wait_delay) { @@ -652,7 +666,9 @@ rw_lock_x_lock_func( } i++; + os_rmb; } + HMT_medium(); if (i == SYNC_SPIN_ROUNDS) { os_thread_yield(); } else { diff --git a/storage/xtradb/sync/sync0sync.c b/storage/xtradb/sync/sync0sync.c index 25f96d9817ae8..948d211bc030c 100644 --- a/storage/xtradb/sync/sync0sync.c +++ b/storage/xtradb/sync/sync0sync.c @@ -44,6 +44,7 @@ Created 9/5/1995 Heikki Tuuri # include "srv0start.h" /* srv_is_being_started */ #endif /* UNIV_SYNC_DEBUG */ #include "ha_prototypes.h" +#include "my_cpu.h" /* REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX @@ -481,6 +482,8 @@ mutex_set_waiters( ptr = &(mutex->waiters); + os_wmb; + *ptr = n; /* Here we assume that the write of a single word in memory is atomic */ #endif @@ -523,13 +526,15 @@ mutex_spin_wait( spin_loop: ut_d(mutex->count_spin_loop++); + HMT_low(); while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } - + os_rmb; // Ensure future reads sees new values i++; } + HMT_medium(); if (i == SYNC_SPIN_ROUNDS) { #ifdef UNIV_DEBUG