diff --git a/benchmarks/lockhammer/Makefile b/benchmarks/lockhammer/Makefile
index 1ee06a4..513267d 100644
--- a/benchmarks/lockhammer/Makefile
+++ b/benchmarks/lockhammer/Makefile
@@ -6,20 +6,26 @@ LDFLAGS=-lpthread
 lh_%: tests/%.h include/atomics.h src/lockhammer.c 
 	${CC} ${CFLAGS} -DATOMIC_TEST=\"$<\" src/lockhammer.c -o build/$@ ${LDFLAGS}
 
-all: \
-	lh_swap_mutex \
+TARGET_ARCH:=$(shell ${CC} -dumpmachine | cut -d '-' -f 1)
+
+TEST_TARGETS=lh_swap_mutex \
 	lh_event_mutex \
 	lh_cas_event_mutex \
 	lh_cas_lockref \
 	lh_cas_rw_lock \
 	lh_incdec_refcount \
 	lh_ticket_spinlock \
-	lh_hybrid_spinlock \
-	lh_hybrid_spinlock_fastdequeue \
 	lh_queued_spinlock \
 	lh_empty \
 	lh_jvm_objectmonitor
 
+ifeq ($(TARGET_ARCH),aarch64)
+	TEST_TARGETS+=lh_hybrid_spinlock \
+		lh_hybrid_spinlock_fastdequeue
+endif
+
+all: ${TEST_TARGETS}
+
 lh_event_mutex: ../../ext/mysql/event_mutex.h include/atomics.h ../../ext/mysql/include/ut_atomics.h src/lockhammer.c
 	${CC} ${CFLAGS} -DATOMIC_TEST=\"$<\" src/lockhammer.c -o build/$@ ${LDFLAGS}
 
diff --git a/benchmarks/lockhammer/include/atomics.h b/benchmarks/lockhammer/include/atomics.h
index 3a8d6fb..9dafe24 100644
--- a/benchmarks/lockhammer/include/atomics.h
+++ b/benchmarks/lockhammer/include/atomics.h
@@ -34,6 +34,9 @@
 #ifndef initialize_lock
 	#define initialize_lock(lock, thread)
 #endif
+#ifndef parse_test_args
+	#define parse_test_args(args, argc, argv)
+#endif
 
 static inline void spin_wait (unsigned long wait_iter) {
 #if defined(__aarch64__)
diff --git a/benchmarks/lockhammer/include/lockhammer.h b/benchmarks/lockhammer/include/lockhammer.h
new file mode 100644
index 0000000..7fc1963
--- /dev/null
+++ b/benchmarks/lockhammer/include/lockhammer.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ *
+ * SPDX-License-Identifier:    BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *     * Neither the name of The Linux Foundation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __LOCKHAMMER_H__
+#define __LOCKHAMMER_H__
+
+struct thread_args {
+    unsigned long ncores;
+    unsigned long nthrds;
+    unsigned long iter;
+    unsigned long *lock;
+    unsigned long *rst;
+    unsigned long *nsec;
+    unsigned long *depth;
+    unsigned long *nstart;
+    unsigned long hold, post;
+};
+typedef struct thread_args thread_args;
+
+struct test_args {
+    unsigned long nthrds;
+    unsigned long nacqrs;
+    unsigned long ncrit;
+    unsigned long nparallel;
+};
+typedef struct test_args test_args;
+
+#endif
diff --git a/benchmarks/lockhammer/scripts/sweep.sh b/benchmarks/lockhammer/scripts/sweep.sh
index a61f752..f90d534 100755
--- a/benchmarks/lockhammer/scripts/sweep.sh
+++ b/benchmarks/lockhammer/scripts/sweep.sh
@@ -48,7 +48,7 @@ do
 		fi
 
 		echo Test: ${1} CPU: exectx=$c Date: `date` 1>&2
-		sudo ../build/lh_${1} $c ${acquires} ${2} ${3}
+		sudo ../build/lh_${1} -t $c -a ${acquires} -c ${2} -p ${3}
 		sleep 5s
 	fi
 done
diff --git a/benchmarks/lockhammer/src/lockhammer.c b/benchmarks/lockhammer/src/lockhammer.c
index c0f404e..cb8a8ba 100644
--- a/benchmarks/lockhammer/src/lockhammer.c
+++ b/benchmarks/lockhammer/src/lockhammer.c
@@ -38,37 +38,32 @@
 #include <pthread.h>
 #include <sys/types.h>
 #include <time.h>
+#include <string.h>
 #include <fcntl.h>
 
+#include "lockhammer.h"
+
 #include ATOMIC_TEST
 
 uint64_t test_lock = 0;
 uint64_t sync_lock = 0;
 uint64_t ready_lock = 0;
 
-struct arg {
-    unsigned long ncores;
-    unsigned long nthrds;
-    unsigned long iter;
-    unsigned long *lock;
-    unsigned long *rst;
-    unsigned long *nsec;
-    unsigned long *depth;
-    unsigned long *nstart;
-    unsigned long hold, post;
-};
-typedef struct arg arg;
-
 void* hmr(void *);
 
+void print_usage (char *invoc) {
+    fprintf(stderr,
+            "Usage: %s\n\t[-t threads]\n\t[-a acquires per thread]\n\t"
+            "[-c critical iterations]\n\t[-p parallelizable iterations]\n\t"
+            "[-- <test specific arguments>]\n", invoc);
+}
+
 int main(int argc, char** argv)
 {
     struct sched_param sparam;
 
     unsigned long i;
-    unsigned long num_cores, num_threads;
-    unsigned long locks_per_thread;
-    unsigned long lock_hold_work, non_lock_work;
+    unsigned long num_cores;
     unsigned long result;
     unsigned long sched_elapsed = 0, real_elapsed = 0;
     unsigned long start_ns = 0;
@@ -76,35 +71,79 @@ int main(int argc, char** argv)
 
     num_cores = sysconf(_SC_NPROCESSORS_ONLN);
 
-    if (argc == 1) {
-        num_threads = num_cores;
-        locks_per_thread = 50000;
-        lock_hold_work = 0;
-        non_lock_work = 0;
-    }
-    else if (argc == 5) {
-        num_threads = atoi(argv[1]);
-        /* Do not allow number of threads to exceed online cores
-           in order to prevent deadlock ... */
-        num_threads = num_threads > num_cores ? num_cores : num_threads;
-        locks_per_thread = atoi(argv[2]);
-        lock_hold_work = atoi(argv[3]);
-        non_lock_work = atoi(argv[4]);
-    }
-    else {
-        fprintf(stderr, "Usage: %s [<cores> <threads per core> <critical loops> <post-release loops>]\n", argv[0]);
-        return 1;
+    /* Set defaults for all command line options */
+    test_args args = { .nthrds = num_cores,
+                       .nacqrs = 50000,
+                       .ncrit = 0,
+                       .nparallel = 0 };
+
+    opterr = 0;
+
+    while ((i = getopt(argc, argv, "t:a:c:p:")) != -1)
+    {
+        long optval = 0;
+        switch (i) {
+          case 't':
+            optval = strtol(optarg, (char **) NULL, 10);
+            /* Do not allow number of threads to exceed online cores
+               in order to prevent deadlock ... */
+            if (optval < 0) {
+                fprintf(stderr, "ERROR: thread count must be positive.\n");
+                return 1;
+            }
+            else if (optval <= num_cores) {
+                args.nthrds = optval;
+            }
+            else {
+                fprintf(stderr, "WARNING: limiting thread count to online cores (%d).\n", num_cores);
+            }
+            break;
+          case 'a':
+            optval = strtol(optarg, (char **) NULL, 10);
+            if (optval < 0) {
+                fprintf(stderr, "ERROR: acquire count must be positive.\n");
+                return 1;
+            }
+            else {
+                args.nacqrs = optval;
+            }
+            break;
+          case 'c':
+            optval = strtol(optarg, (char **) NULL, 10);
+            if (optval < 0) {
+                fprintf(stderr, "ERROR: critical iteration count must be positive.\n");
+                return 1;
+            }
+            else {
+                args.ncrit = optval;
+            }
+            break;
+          case 'p':
+            optval = strtol(optarg, (char **) NULL, 10);
+            if (optval < 0) {
+                fprintf(stderr, "ERROR: parallel iteration count must be positive.\n");
+                return 1;
+            }
+            else {
+                args.nparallel = optval;
+            }
+            break;
+          case '?':
+          default:
+            print_usage(argv[0]);
+            return 1;
+        }
     }
 
-    pthread_t hmr_threads[num_threads];
+    parse_test_args(args, argc - optind, &argv[optind]);
+
+    pthread_t hmr_threads[args.nthrds];
     pthread_attr_t hmr_attr;
-    unsigned long hmrs[num_threads];
-    unsigned long hmrtime[num_threads]; /* can't touch this */
-    unsigned long hmrdepth[num_threads];
+    unsigned long hmrs[args.nthrds];
+    unsigned long hmrtime[args.nthrds]; /* can't touch this */
+    unsigned long hmrdepth[args.nthrds];
     struct timespec tv_time;
 
-    for (i = 0; i < num_threads; ++i) hmrs[i] = 0;
-
     /* Select the FIFO scheduler.  This prevents interruption of the
        lockhammer test threads allowing for more precise measuremnet of
        lock acquisition rate, especially for mutex type locks where
@@ -128,23 +167,24 @@ int main(int argc, char** argv)
 
     initialize_lock(&test_lock, num_cores);
 
-    arg args[num_threads];
-    for (i = 0; i < num_threads; ++i) {
-        args[i].ncores = num_cores;
-        args[i].nthrds = num_threads;
-        args[i].iter = locks_per_thread;
-        args[i].lock = &test_lock;
-        args[i].rst = &hmrs[i];
-        args[i].nsec = &hmrtime[i];
-        args[i].depth = &hmrdepth[i];
-        args[i].nstart = &start_ns;
-        args[i].hold = lock_hold_work;
-        args[i].post = non_lock_work;
-
-        pthread_create(&hmr_threads[i], &hmr_attr, hmr, (void*)(&args[i]));
+    thread_args t_args[args.nthrds];
+    for (i = 0; i < args.nthrds; ++i) {
+        hmrs[i] = 0;
+        t_args[i].ncores = num_cores;
+        t_args[i].nthrds = args.nthrds;
+        t_args[i].iter = args.nacqrs;
+        t_args[i].lock = &test_lock;
+        t_args[i].rst = &hmrs[i];
+        t_args[i].nsec = &hmrtime[i];
+        t_args[i].depth = &hmrdepth[i];
+        t_args[i].nstart = &start_ns;
+        t_args[i].hold = args.ncrit;
+        t_args[i].post = args.nparallel;
+
+        pthread_create(&hmr_threads[i], &hmr_attr, hmr, (void*)(&t_args[i]));
     }
 
-    for (i = 0; i < num_threads; ++i) {
+    for (i = 0; i < args.nthrds; ++i) {
         result = pthread_join(hmr_threads[i], NULL);
     }
     /* "Marshal" thread will collect start time once all threads have
@@ -155,7 +195,7 @@ int main(int argc, char** argv)
     pthread_attr_destroy(&hmr_attr);
 
     result = 0;
-    for (i = 0; i < num_threads; ++i) {
+    for (i = 0; i < args.nthrds; ++i) {
         result += hmrs[i];
         sched_elapsed += hmrtime[i];
         /* Average lock "depth" is an algorithm-specific auxiliary metric
@@ -164,7 +204,7 @@ int main(int argc, char** argv)
            call to lock_acquire and accumulated per-thread.  These results
            are then aggregated and averaged here so that an overall view
            of the run's contention level can be determined. */
-        avg_lock_depth += ((double) hmrdepth[i] / (double) hmrs[i]) / (double) num_threads;
+        avg_lock_depth += ((double) hmrdepth[i] / (double) hmrs[i]) / (double) args.nthrds;
     }
 
     fprintf(stderr, "%ld lock loops\n", result);
@@ -175,7 +215,7 @@ int main(int argc, char** argv)
     fprintf(stderr, "%lf average depth\n", avg_lock_depth);
 
     printf("%ld, %f, %lf, %lf, %lf\n",
-           num_threads,
+           args.nthrds,
            ((float) sched_elapsed / (float) real_elapsed),
            ((double) sched_elapsed)/ ((double) result),
            ((double) real_elapsed) / ((double) result),
@@ -185,7 +225,7 @@ int main(int argc, char** argv)
 void* hmr(void *ptr)
 {
     unsigned long nlocks = 0;
-    arg *x = (arg*)ptr;
+    thread_args *x = (thread_args*)ptr;
     int rval;
     unsigned long *lock = x->lock;
     unsigned long target_locks = x->iter;