Skip to content

Commit 5e929ee

Browse files
committed
MDEV-19845: Define my_timer_cycles() inline
On clang, use __builtin_readcyclecounter() when available. Hinted by Sergey Vojtovich. (This may lead to runtime failure on ARM systems. The hardware should be available on ARMv8 (AArch64), but access to it may require special privileges.) We remove support for the proprietary Sun Microsystems compiler, and rely on clang or the __GNUC__ assembler syntax instead. For now, we retain support for IA-64 (Itanium) and 32-bit SPARC, even though those platforms are likely no longer widely used. We remove support for clock_gettime(CLOCK_SGI_CYCLE), because Silicon Graphics ceased supporting IRIX in December 2013. This was the only cycle timer interface available for MIPS. On PowerPC, we rely on the GCC 4.8 __builtin_ppc_get_timebase() (or clang __builtin_readcyclecounter()), which should be equivalent to the old assembler code on both 64-bit and 32-bit targets.
1 parent 1635ea9 commit 5e929ee

File tree

9 files changed

+124
-295
lines changed

9 files changed

+124
-295
lines changed

cmake/os/SunOS.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ CHECK_C_SOURCE_RUNS(
7070

7171

7272
# Check is special processor flag needs to be set on older GCC
73-
#that defaults to v8 sparc . Code here is taken from my_rdtsc.c
73+
#that defaults to v8 sparc . Code here is taken from my_rdtsc.h
7474
IF(CMAKE_COMPILER_IS_GNUCC AND CMAKE_SIZEOF_VOID_P EQUAL 4
7575
AND CMAKE_SYSTEM_PROCESSOR MATCHES "sparc")
7676
SET(SOURCE

cmake/os/WindowsCache.cmake

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ SET(HAVE_ACCESS 1 CACHE INTERNAL "")
2525
SET(HAVE_ALARM CACHE INTERNAL "")
2626
SET(HAVE_ALLOCA_H CACHE INTERNAL "")
2727
SET(HAVE_ARPA_INET_H CACHE INTERNAL "")
28-
SET(HAVE_ASM_MSR_H CACHE INTERNAL "")
2928
SET(HAVE_BACKTRACE CACHE INTERNAL "")
3029
SET(HAVE_BACKTRACE_SYMBOLS CACHE INTERNAL "")
3130
SET(HAVE_BACKTRACE_SYMBOLS_FD CACHE INTERNAL "")
@@ -132,7 +131,6 @@ SET(HAVE_PTHREAD_YIELD_NP CACHE INTERNAL "")
132131
SET(HAVE_PTHREAD_YIELD_ZERO_ARG CACHE INTERNAL "")
133132
SET(HAVE_PUTENV 1 CACHE INTERNAL "")
134133
SET(HAVE_PWD_H CACHE INTERNAL "")
135-
SET(HAVE_RDTSCLL CACHE INTERNAL "")
136134
SET(HAVE_READDIR_R CACHE INTERNAL "")
137135
SET(HAVE_READLINK CACHE INTERNAL "")
138136
SET(HAVE_READ_REAL_TIME CACHE INTERNAL "")

config.h.cmake

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
#cmakedefine _GNU_SOURCE 1
2222
#cmakedefine HAVE_ALLOCA_H 1
2323
#cmakedefine HAVE_ARPA_INET_H 1
24-
#cmakedefine HAVE_ASM_MSR_H 1
2524
#cmakedefine HAVE_ASM_TERMBITS_H 1
2625
#cmakedefine HAVE_CRYPT_H 1
2726
#cmakedefine HAVE_CURSES_H 1
@@ -186,7 +185,6 @@
186185
#cmakedefine HAVE_POSIX_FALLOCATE 1
187186
#cmakedefine HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE 1
188187
#cmakedefine HAVE_PREAD 1
189-
#cmakedefine HAVE_RDTSCLL 1
190188
#cmakedefine HAVE_READ_REAL_TIME 1
191189
#cmakedefine HAVE_PTHREAD_ATTR_CREATE 1
192190
#cmakedefine HAVE_PTHREAD_ATTR_GETGUARDSIZE 1

configure.cmake

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -437,8 +437,6 @@ SET(CMAKE_REQUIRED_FLAGS)
437437
CHECK_INCLUDE_FILES(time.h HAVE_TIME_H)
438438
CHECK_INCLUDE_FILES(sys/time.h HAVE_SYS_TIME_H)
439439
CHECK_INCLUDE_FILES(sys/times.h HAVE_SYS_TIMES_H)
440-
CHECK_INCLUDE_FILES(asm/msr.h HAVE_ASM_MSR_H)
441-
#msr.h has rdtscll()
442440

443441
CHECK_INCLUDE_FILES(ia64intrin.h HAVE_IA64INTRIN_H)
444442

@@ -453,9 +451,6 @@ CHECK_FUNCTION_EXISTS(ftime HAVE_FTIME)
453451
CHECK_FUNCTION_EXISTS(time HAVE_TIME)
454452
# We can use time() on Macintosh if there is no ftime().
455453

456-
CHECK_FUNCTION_EXISTS(rdtscll HAVE_RDTSCLL)
457-
# I doubt that we'll ever reach the check for this.
458-
459454

460455
#
461456
# Tests for symbols

include/my_rdtsc.h

Lines changed: 110 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* Copyright (c) 2008 MySQL AB, 2009 Sun Microsystems, Inc.
2-
Use is subject to license terms.
2+
Copyright (c) 2019, MariaDB Corporation.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License as published by
@@ -23,6 +23,20 @@
2323
#ifndef MY_RDTSC_H
2424
#define MY_RDTSC_H
2525

26+
# ifndef __has_builtin
27+
# define __has_builtin(x) 0 /* Compatibility with non-clang compilers */
28+
# endif
29+
# if __has_builtin(__builtin_readcyclecounter)
30+
# elif defined _WIN32
31+
# include <intrin.h>
32+
# elif defined __i386__ || defined __x86_64__
33+
# include <x86intrin.h>
34+
# elif defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
35+
# include <ia64intrin.h>
36+
# elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
37+
# include <sys/times.h>
38+
# endif
39+
2640
/**
2741
Characteristics of a timer.
2842
*/
@@ -62,12 +76,104 @@ C_MODE_START
6276

6377
/**
6478
A cycle timer.
79+
80+
On clang, we use __builtin_readcyclecounter().
81+
On other compilers:
82+
83+
On IA-32 and AMD64, we use the RDTSC instruction.
84+
On IA-64, we read the ar.itc register.
85+
On SPARC, we read the tick register.
86+
On POWER, we read the Time Base Register (which is not really a cycle count
87+
but a separate counter with less than nanosecond resolution).
88+
On IBM S/390 System z we use the STCK instruction.
89+
On ARM, we probably should use the Generic Timer, but should figure out
90+
how to ensure that it can be accessed.
91+
92+
Sadly, we have nothing for the Digital Alpha, MIPS, Motorola m68k,
93+
HP PA-RISC or other non-mainstream (or obsolete) processors.
94+
95+
TODO: consider C++11 std::chrono::high_resolution_clock.
96+
97+
We fall back to gethrtime() where available.
98+
99+
On the platforms that do not have a CYCLE timer,
100+
"wait" events are initialized to use NANOSECOND instead of CYCLE
101+
during performance_schema initialization (at the server startup).
102+
103+
Linux performance monitor (see "man perf_event_open") can
104+
provide cycle counter on the platforms that do not have
105+
other kinds of cycle counters. But we don't use it so far.
106+
107+
ARM notes
108+
---------
109+
During tests on ARMv7 Debian, perf_even_open() based cycle counter provided
110+
too low frequency with too high overhead:
111+
MariaDB [performance_schema]> SELECT * FROM performance_timers;
112+
+-------------+-----------------+------------------+----------------+
113+
| TIMER_NAME | TIMER_FREQUENCY | TIMER_RESOLUTION | TIMER_OVERHEAD |
114+
+-------------+-----------------+------------------+----------------+
115+
| CYCLE | 689368159 | 1 | 970 |
116+
| NANOSECOND | 1000000000 | 1 | 308 |
117+
| MICROSECOND | 1000000 | 1 | 417 |
118+
| MILLISECOND | 1000 | 1000 | 407 |
119+
| TICK | 127 | 1 | 612 |
120+
+-------------+-----------------+------------------+----------------+
121+
Therefore, it was decided not to use perf_even_open() on ARM
122+
(i.e. go without CYCLE and have "wait" events use NANOSECOND by default).
123+
65124
@return the current timer value, in cycles.
66125
*/
67-
ulonglong my_timer_cycles(void);
126+
static inline ulonglong my_timer_cycles(void)
127+
{
128+
# if __has_builtin(__builtin_readcyclecounter)
129+
return __builtin_readcyclecounter();
130+
# elif defined _WIN32 || defined __i386__ || defined __x86_64__
131+
return __rdtsc();
132+
# elif defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
133+
return (ulonglong) __getReg(_IA64_REG_AR_ITC); /* (3116) */
134+
#elif defined(__GNUC__) && defined(__ia64__)
135+
{
136+
ulonglong result;
137+
__asm __volatile__ ("mov %0=ar.itc" : "=r" (result));
138+
return result;
139+
}
140+
#elif defined __GNUC__ && defined __powerpc__
141+
return __builtin_ppc_get_timebase();
142+
#elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64)
143+
{
144+
ulonglong result;
145+
__asm __volatile__ ("rd %%tick,%0" : "=r" (result));
146+
return result;
147+
}
148+
#elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64)
149+
{
150+
union {
151+
ulonglong wholeresult;
152+
struct {
153+
ulong high;
154+
ulong low;
155+
} splitresult;
156+
} result;
157+
__asm __volatile__ ("rd %%tick,%1; srlx %1,32,%0" : "=r" (result.splitresult.high), "=r" (result.splitresult.low));
158+
return result.wholeresult;
159+
}
160+
#elif defined(__GNUC__) && defined(__s390__)
161+
/* covers both s390 and s390x */
162+
{
163+
ulonglong result;
164+
__asm__ __volatile__ ("stck %0" : "=Q" (result) : : "cc");
165+
return result;
166+
}
167+
#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
168+
/* gethrtime may appear as either cycle or nanosecond counter */
169+
return (ulonglong) gethrtime();
170+
#else
171+
return 0;
172+
#endif
173+
}
68174

69175
/**
70-
A namoseconds timer.
176+
A nanosecond timer.
71177
@return the current timer value, in nanoseconds.
72178
*/
73179
ulonglong my_timer_nanoseconds(void);
@@ -98,33 +204,22 @@ void my_timer_init(MY_TIMER_INFO *mti);
98204

99205
C_MODE_END
100206

101-
#define MY_TIMER_ROUTINE_ASM_X86 1
102-
#define MY_TIMER_ROUTINE_ASM_X86_64 2
103-
#define MY_TIMER_ROUTINE_RDTSCLL 3
104-
#define MY_TIMER_ROUTINE_ASM_X86_WIN 4
105207
#define MY_TIMER_ROUTINE_RDTSC 5
106208
#define MY_TIMER_ROUTINE_ASM_IA64 6
107-
#define MY_TIMER_ROUTINE_ASM_PPC 7
108-
#define MY_TIMER_ROUTINE_SGI_CYCLE 8
209+
#define MY_TIMER_ROUTINE_PPC_GET_TIMEBASE 7
109210
#define MY_TIMER_ROUTINE_GETHRTIME 9
110211
#define MY_TIMER_ROUTINE_READ_REAL_TIME 10
111212
#define MY_TIMER_ROUTINE_CLOCK_GETTIME 11
112-
#define MY_TIMER_ROUTINE_NXGETTIME 12
113213
#define MY_TIMER_ROUTINE_GETTIMEOFDAY 13
114214
#define MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER 14
115215
#define MY_TIMER_ROUTINE_GETTICKCOUNT 15
116216
#define MY_TIMER_ROUTINE_TIME 16
117217
#define MY_TIMER_ROUTINE_TIMES 17
118218
#define MY_TIMER_ROUTINE_FTIME 18
119-
#define MY_TIMER_ROUTINE_ASM_PPC64 19
120-
#define MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC64 20
121-
#define MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC32 21
122-
#define MY_TIMER_ROUTINE_ASM_SUNPRO_I386 22
123219
#define MY_TIMER_ROUTINE_ASM_GCC_SPARC64 23
124220
#define MY_TIMER_ROUTINE_ASM_GCC_SPARC32 24
125221
#define MY_TIMER_ROUTINE_MACH_ABSOLUTE_TIME 25
126222
#define MY_TIMER_ROUTINE_GETSYSTEMTIMEASFILETIME 26
127-
#define MY_TIMER_ROUTINE_ASM_SUNPRO_X86_64 27
128223
#define MY_TIMER_ROUTINE_ASM_S390 28
129224

130225
#endif

mysys/CMakeLists.txt

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,6 @@ IF(HAVE_ALARM)
5959
SET(MYSYS_SOURCES ${MYSYS_SOURCES} my_alarm.c)
6060
ENDIF()
6161

62-
IF(CMAKE_SYSTEM_NAME MATCHES "SunOS" AND CMAKE_C_COMPILER_ID MATCHES "SunPro")
63-
# Inline assembly template for rdtsc
64-
SET_SOURCE_FILES_PROPERTIES(my_rdtsc.c
65-
PROPERTIES COMPILE_FLAGS "${CMAKE_CURRENT_SOURCE_DIR}/my_timer_cycles.il")
66-
ENDIF()
67-
6862
IF(HAVE_LINUX_LARGE_PAGES)
6963
SET(MYSYS_SOURCES ${MYSYS_SOURCES} my_largepage.c)
7064
ENDIF()

mysys/my_cpu.c

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,12 @@
1616
#include <my_global.h>
1717
#include <my_atomic.h>
1818
#include <my_cpu.h>
19+
#include <my_rdtsc.h>
1920

2021
#ifdef HAVE_PAUSE_INSTRUCTION
2122
/** How many times to invoke PAUSE in a loop */
2223
unsigned my_cpu_relax_multiplier = 200;
2324

24-
# include <stdint.h>
25-
26-
# ifdef _MSC_VER
27-
# include <intrin.h>
28-
# else
29-
# include <x86intrin.h>
30-
# endif
31-
3225
#define PAUSE4 MY_RELAX_CPU(); MY_RELAX_CPU(); MY_RELAX_CPU(); MY_RELAX_CPU()
3326
#define PAUSE16 PAUSE4; PAUSE4; PAUSE4; PAUSE4
3427

@@ -70,12 +63,12 @@ unsigned my_cpu_relax_multiplier = 200;
7063
*/
7164
void my_cpu_init(void)
7265
{
73-
uint64_t t0, t1, t2;
74-
t0= __rdtsc();
66+
ulonglong t0, t1, t2;
67+
t0= my_timer_cycles();
7568
PAUSE16;
76-
t1= __rdtsc();
69+
t1= my_timer_cycles();
7770
PAUSE16;
78-
t2= __rdtsc();
71+
t2= my_timer_cycles();
7972
if (t2 - t1 > 30 * 16 && t1 - t0 > 30 * 16)
8073
my_cpu_relax_multiplier= 20;
8174
}

0 commit comments

Comments
 (0)