diff --git a/benchmarks/lockhammer/Makefile b/benchmarks/lockhammer/Makefile index 1ee06a4..513267d 100644 --- a/benchmarks/lockhammer/Makefile +++ b/benchmarks/lockhammer/Makefile @@ -6,20 +6,26 @@ LDFLAGS=-lpthread lh_%: tests/%.h include/atomics.h src/lockhammer.c ${CC} ${CFLAGS} -DATOMIC_TEST=\"$<\" src/lockhammer.c -o build/$@ ${LDFLAGS} -all: \ - lh_swap_mutex \ +TARGET_ARCH:=$(shell ${CC} -dumpmachine | cut -d '-' -f 1) + +TEST_TARGETS=lh_swap_mutex \ lh_event_mutex \ lh_cas_event_mutex \ lh_cas_lockref \ lh_cas_rw_lock \ lh_incdec_refcount \ lh_ticket_spinlock \ - lh_hybrid_spinlock \ - lh_hybrid_spinlock_fastdequeue \ lh_queued_spinlock \ lh_empty \ lh_jvm_objectmonitor +ifeq ($(TARGET_ARCH),aarch64) + TEST_TARGETS+=lh_hybrid_spinlock \ + lh_hybrid_spinlock_fastdequeue +endif + +all: ${TEST_TARGETS} + lh_event_mutex: ../../ext/mysql/event_mutex.h include/atomics.h ../../ext/mysql/include/ut_atomics.h src/lockhammer.c ${CC} ${CFLAGS} -DATOMIC_TEST=\"$<\" src/lockhammer.c -o build/$@ ${LDFLAGS} diff --git a/benchmarks/lockhammer/include/atomics.h b/benchmarks/lockhammer/include/atomics.h index 3a8d6fb..9dafe24 100644 --- a/benchmarks/lockhammer/include/atomics.h +++ b/benchmarks/lockhammer/include/atomics.h @@ -34,6 +34,9 @@ #ifndef initialize_lock #define initialize_lock(lock, thread) #endif +#ifndef parse_test_args + #define parse_test_args(args, argc, argv) +#endif static inline void spin_wait (unsigned long wait_iter) { #if defined(__aarch64__) diff --git a/benchmarks/lockhammer/include/lockhammer.h b/benchmarks/lockhammer/include/lockhammer.h new file mode 100644 index 0000000..7fc1963 --- /dev/null +++ b/benchmarks/lockhammer/include/lockhammer.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017, The Linux Foundation. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of The Linux Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __LOCKHAMMER_H__ +#define __LOCKHAMMER_H__ + +struct thread_args { + unsigned long ncores; + unsigned long nthrds; + unsigned long iter; + unsigned long *lock; + unsigned long *rst; + unsigned long *nsec; + unsigned long *depth; + unsigned long *nstart; + unsigned long hold, post; +}; +typedef struct thread_args thread_args; + +struct test_args { + unsigned long nthrds; + unsigned long nacqrs; + unsigned long ncrit; + unsigned long nparallel; +}; +typedef struct test_args test_args; + +#endif diff --git a/benchmarks/lockhammer/scripts/sweep.sh b/benchmarks/lockhammer/scripts/sweep.sh index a61f752..f90d534 100755 --- a/benchmarks/lockhammer/scripts/sweep.sh +++ b/benchmarks/lockhammer/scripts/sweep.sh @@ -48,7 +48,7 @@ do fi echo Test: ${1} CPU: exectx=$c Date: `date` 1>&2 - sudo ../build/lh_${1} $c ${acquires} ${2} ${3} + sudo ../build/lh_${1} -t $c -a ${acquires} -c ${2} -p ${3} sleep 5s fi done diff --git a/benchmarks/lockhammer/src/lockhammer.c b/benchmarks/lockhammer/src/lockhammer.c index c0f404e..cb8a8ba 100644 --- a/benchmarks/lockhammer/src/lockhammer.c +++ b/benchmarks/lockhammer/src/lockhammer.c @@ -38,37 +38,32 @@ #include #include #include +#include #include +#include "lockhammer.h" + #include ATOMIC_TEST uint64_t test_lock = 0; uint64_t sync_lock = 0; uint64_t ready_lock = 0; -struct arg { - unsigned long ncores; - unsigned long nthrds; - unsigned long iter; - unsigned long *lock; - unsigned long *rst; - unsigned long *nsec; - unsigned long *depth; - unsigned long *nstart; - unsigned long hold, post; -}; -typedef struct arg arg; - void* hmr(void *); +void print_usage (char *invoc) { + fprintf(stderr, + "Usage: %s\n\t[-t threads]\n\t[-a acquires per thread]\n\t" + "[-c critical iterations]\n\t[-p parallelizable iterations]\n\t" + "[-- ]\n", invoc); +} + int main(int argc, char** argv) { struct sched_param sparam; unsigned long i; - unsigned long num_cores, num_threads; - unsigned long locks_per_thread; - unsigned long lock_hold_work, non_lock_work; + unsigned long num_cores; unsigned long result; unsigned long sched_elapsed = 0, real_elapsed = 0; unsigned long start_ns = 0; @@ -76,35 +71,79 @@ int main(int argc, char** argv) num_cores = sysconf(_SC_NPROCESSORS_ONLN); - if (argc == 1) { - num_threads = num_cores; - locks_per_thread = 50000; - lock_hold_work = 0; - non_lock_work = 0; - } - else if (argc == 5) { - num_threads = atoi(argv[1]); - /* Do not allow number of threads to exceed online cores - in order to prevent deadlock ... */ - num_threads = num_threads > num_cores ? num_cores : num_threads; - locks_per_thread = atoi(argv[2]); - lock_hold_work = atoi(argv[3]); - non_lock_work = atoi(argv[4]); - } - else { - fprintf(stderr, "Usage: %s [ ]\n", argv[0]); - return 1; + /* Set defaults for all command line options */ + test_args args = { .nthrds = num_cores, + .nacqrs = 50000, + .ncrit = 0, + .nparallel = 0 }; + + opterr = 0; + + while ((i = getopt(argc, argv, "t:a:c:p:")) != -1) + { + long optval = 0; + switch (i) { + case 't': + optval = strtol(optarg, (char **) NULL, 10); + /* Do not allow number of threads to exceed online cores + in order to prevent deadlock ... */ + if (optval < 0) { + fprintf(stderr, "ERROR: thread count must be positive.\n"); + return 1; + } + else if (optval <= num_cores) { + args.nthrds = optval; + } + else { + fprintf(stderr, "WARNING: limiting thread count to online cores (%d).\n", num_cores); + } + break; + case 'a': + optval = strtol(optarg, (char **) NULL, 10); + if (optval < 0) { + fprintf(stderr, "ERROR: acquire count must be positive.\n"); + return 1; + } + else { + args.nacqrs = optval; + } + break; + case 'c': + optval = strtol(optarg, (char **) NULL, 10); + if (optval < 0) { + fprintf(stderr, "ERROR: critical iteration count must be positive.\n"); + return 1; + } + else { + args.ncrit = optval; + } + break; + case 'p': + optval = strtol(optarg, (char **) NULL, 10); + if (optval < 0) { + fprintf(stderr, "ERROR: parallel iteration count must be positive.\n"); + return 1; + } + else { + args.nparallel = optval; + } + break; + case '?': + default: + print_usage(argv[0]); + return 1; + } } - pthread_t hmr_threads[num_threads]; + parse_test_args(args, argc - optind, &argv[optind]); + + pthread_t hmr_threads[args.nthrds]; pthread_attr_t hmr_attr; - unsigned long hmrs[num_threads]; - unsigned long hmrtime[num_threads]; /* can't touch this */ - unsigned long hmrdepth[num_threads]; + unsigned long hmrs[args.nthrds]; + unsigned long hmrtime[args.nthrds]; /* can't touch this */ + unsigned long hmrdepth[args.nthrds]; struct timespec tv_time; - for (i = 0; i < num_threads; ++i) hmrs[i] = 0; - /* Select the FIFO scheduler. This prevents interruption of the lockhammer test threads allowing for more precise measuremnet of lock acquisition rate, especially for mutex type locks where @@ -128,23 +167,24 @@ int main(int argc, char** argv) initialize_lock(&test_lock, num_cores); - arg args[num_threads]; - for (i = 0; i < num_threads; ++i) { - args[i].ncores = num_cores; - args[i].nthrds = num_threads; - args[i].iter = locks_per_thread; - args[i].lock = &test_lock; - args[i].rst = &hmrs[i]; - args[i].nsec = &hmrtime[i]; - args[i].depth = &hmrdepth[i]; - args[i].nstart = &start_ns; - args[i].hold = lock_hold_work; - args[i].post = non_lock_work; - - pthread_create(&hmr_threads[i], &hmr_attr, hmr, (void*)(&args[i])); + thread_args t_args[args.nthrds]; + for (i = 0; i < args.nthrds; ++i) { + hmrs[i] = 0; + t_args[i].ncores = num_cores; + t_args[i].nthrds = args.nthrds; + t_args[i].iter = args.nacqrs; + t_args[i].lock = &test_lock; + t_args[i].rst = &hmrs[i]; + t_args[i].nsec = &hmrtime[i]; + t_args[i].depth = &hmrdepth[i]; + t_args[i].nstart = &start_ns; + t_args[i].hold = args.ncrit; + t_args[i].post = args.nparallel; + + pthread_create(&hmr_threads[i], &hmr_attr, hmr, (void*)(&t_args[i])); } - for (i = 0; i < num_threads; ++i) { + for (i = 0; i < args.nthrds; ++i) { result = pthread_join(hmr_threads[i], NULL); } /* "Marshal" thread will collect start time once all threads have @@ -155,7 +195,7 @@ int main(int argc, char** argv) pthread_attr_destroy(&hmr_attr); result = 0; - for (i = 0; i < num_threads; ++i) { + for (i = 0; i < args.nthrds; ++i) { result += hmrs[i]; sched_elapsed += hmrtime[i]; /* Average lock "depth" is an algorithm-specific auxiliary metric @@ -164,7 +204,7 @@ int main(int argc, char** argv) call to lock_acquire and accumulated per-thread. These results are then aggregated and averaged here so that an overall view of the run's contention level can be determined. */ - avg_lock_depth += ((double) hmrdepth[i] / (double) hmrs[i]) / (double) num_threads; + avg_lock_depth += ((double) hmrdepth[i] / (double) hmrs[i]) / (double) args.nthrds; } fprintf(stderr, "%ld lock loops\n", result); @@ -175,7 +215,7 @@ int main(int argc, char** argv) fprintf(stderr, "%lf average depth\n", avg_lock_depth); printf("%ld, %f, %lf, %lf, %lf\n", - num_threads, + args.nthrds, ((float) sched_elapsed / (float) real_elapsed), ((double) sched_elapsed)/ ((double) result), ((double) real_elapsed) / ((double) result), @@ -185,7 +225,7 @@ int main(int argc, char** argv) void* hmr(void *ptr) { unsigned long nlocks = 0; - arg *x = (arg*)ptr; + thread_args *x = (thread_args*)ptr; int rval; unsigned long *lock = x->lock; unsigned long target_locks = x->iter;