From 2e3ee1a71b3285b0e4dccb13aa8132df94791f2b Mon Sep 17 00:00:00 2001 From: Valentin Antonescu Date: Thu, 21 Mar 2013 15:40:08 -0400 Subject: [PATCH] Added the tiny thread spinlock and started the tiny thread usage implementation changes. --- Makefile | 75 ++++++++---------- bitset.h | 15 ++-- fast_mutex.h | 248 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ pat.h | 48 ++---------- threading.h | 77 +++++-------------- 5 files changed, 315 insertions(+), 148 deletions(-) create mode 100644 fast_mutex.h diff --git a/Makefile b/Makefile index d96cb18..f34bbdb 100644 --- a/Makefile +++ b/Makefile @@ -5,13 +5,10 @@ SEQAN_DIR = SeqAn-1.1 SEQAN_INC = -I $(SEQAN_DIR) INC = $(SEQAN_INC) -GCC_PREFIX = $(shell dirname `which gcc`) -GCC_SUFFIX = -CC = $(GCC_PREFIX)/gcc$(GCC_SUFFIX) -CPP = $(GCC_PREFIX)/g++$(GCC_SUFFIX) +CPP = g++ CXX = $(CPP) +CC = gcc HEADERS = $(wildcard *.h) -BOWTIE_PTHREADS = 1 BOWTIE_MM = 1 BOWTIE_SHARED_MEM = 1 EXTRA_FLAGS = @@ -25,64 +22,62 @@ WINDOWS = 0 CYGWIN = 0 MINGW = 0 ifneq (,$(findstring CYGWIN,$(shell uname))) -WINDOWS = 1 -CYGWIN = 1 -# POSIX memory-mapped files not currently supported on Windows -BOWTIE_MM = 0 -BOWTIE_SHARED_MEM = 0 + WINDOWS = 1 + CYGWIN = 1 + # POSIX memory-mapped files not currently supported on Windows + BOWTIE_MM = 0 + BOWTIE_SHARED_MEM = 0 else -ifneq (,$(findstring MINGW,$(shell uname))) -WINDOWS = 1 -CYGWIN = 1 -# POSIX memory-mapped files not currently supported on Windows -BOWTIE_MM = 0 -BOWTIE_SHARED_MEM = 0 -endif + ifneq (,$(findstring MINGW,$(shell uname))) + WINDOWS = 1 + CYGWIN = 1 + # POSIX memory-mapped files not currently supported on Windows + BOWTIE_MM = 0 + BOWTIE_SHARED_MEM = 0 + endif endif MACOS = 0 ifneq (,$(findstring Darwin,$(shell uname))) -MACOS = 1 + MACOS = 1 endif LINUX = 0 ifneq (,$(findstring Linux,$(shell uname))) -LINUX = 1 -EXTRA_FLAGS += -Wl,--hash-style=both + LINUX = 1 + EXTRA_FLAGS += -Wl,--hash-style=both endif MM_DEF = ifeq (1,$(BOWTIE_MM)) -MM_DEF = -DBOWTIE_MM + MM_DEF = -DBOWTIE_MM endif SHMEM_DEF = ifeq (1,$(BOWTIE_SHARED_MEM)) -SHMEM_DEF = -DBOWTIE_SHARED_MEM + SHMEM_DEF = -DBOWTIE_SHARED_MEM endif PTHREAD_PKG = PTHREAD_LIB = PTHREAD_DEF = -ifeq (1,$(BOWTIE_PTHREADS)) -PTHREAD_DEF = -DBOWTIE_PTHREADS -PTHREAD_LIB = -lpthread + ifeq (1,$(MINGW)) -# pthreads for windows forces us to be specific about the library -EXTRA_FLAGS = -static-libgcc -static-libstdc++ -PTHREAD_LIB = -lpthread -endif + PTHREAD_LIB = + EXTRA_FLAGS = -static-libgcc -static-libstdc++ +else + PTHREAD_LIB = -lpthread endif PREFETCH_LOCALITY = 2 PREF_DEF = -DPREFETCH_LOCALITY=$(PREFETCH_LOCALITY) -LIBS = -SEARCH_LIBS = $(PTHREAD_LIB) +LIBS = $(PTHREAD_LIB) +SEARCH_LIBS = BUILD_LIBS = INSPECT_LIBS = ifeq (1,$(MINGW)) -BUILD_LIBS = $(PTHREAD_LIB) -INSPECT_LIBS = $(PTHREAD_LIB) + BUILD_LIBS = + INSPECT_LIBS = endif OTHER_CPPS = ccnt_lut.cpp ref_read.cpp alphabet.cpp shmem.cpp \ @@ -100,16 +95,18 @@ VERSION = $(shell cat VERSION) # msys will always be 32 bit so look at the cpu arch instead. ifneq (,$(findstring AMD64,$(PROCESSOR_ARCHITEW6432))) -BITS=64 + ifeq (1,$(MINGW)) + BITS=64 + endif endif # Convert BITS=?? to a -m flag BITS_FLAG = ifeq (32,$(BITS)) -BITS_FLAG = -m32 + BITS_FLAG = -m32 endif ifeq (64,$(BITS)) -BITS_FLAG = -m64 + BITS_FLAG = -m64 endif DEBUG_FLAGS = -O0 -g3 $(BITS_FLAG) @@ -144,16 +141,10 @@ GENERAL_LIST = $(wildcard scripts/*.sh) \ TUTORIAL \ VERSION -# This is helpful on Windows under MinGW/MSYS, where Make might go for -# the Windows FIND tool instead. -FIND=$(shell which find) - SRC_PKG_LIST = $(wildcard *.h) \ $(wildcard *.hh) \ $(wildcard *.c) \ $(wildcard *.cpp) \ - $(shell $(FIND) SeqAn-1.1 -name "*.h") \ - $(shell $(FIND) SeqAn-1.1 -name "*.txt") \ doc/strip_markdown.pl \ Makefile \ $(GENERAL_LIST) diff --git a/bitset.h b/bitset.h index 2ae9f3a..153ae85 100644 --- a/bitset.h +++ b/bitset.h @@ -57,7 +57,6 @@ class SyncBitset { * error message and quit if allocation fails. */ SyncBitset(uint32_t sz, const char *errmsg = NULL) : _errmsg(errmsg) { - MUTEX_INIT(_lock); uint32_t nwords = (sz >> 5)+1; // divide by 32 and add 1 try { _words = new uint32_t[nwords]; @@ -94,9 +93,9 @@ class SyncBitset { */ bool test(uint32_t i) { bool ret; - MUTEX_LOCK(_lock); + mutex_m.lock(); ret = testUnsync(i); - MUTEX_UNLOCK(_lock); + mutex_m.unlock(); return ret; } @@ -105,7 +104,7 @@ class SyncBitset { * it has been set. Uses synchronization. */ void set(uint32_t i) { - MUTEX_LOCK(_lock); + mutex_m.lock(); while(i >= _sz) { // Slow path: bitset needs to be expanded before the // specified bit can be set @@ -118,7 +117,7 @@ class SyncBitset { assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 0); _words[i >> 5] |= (1 << (i & 0x1f)); assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 1); - MUTEX_UNLOCK(_lock); + mutex_m.unlock(); } /** @@ -126,7 +125,7 @@ class SyncBitset { * synchronization. */ void setOver(uint32_t i) { - MUTEX_LOCK(_lock); + mutex_m.lock(); while(i >= _sz) { // Slow path: bitset needs to be expanded before the // specified bit can be set @@ -138,7 +137,7 @@ class SyncBitset { assert_lt(i, _sz); _words[i >> 5] |= (1 << (i & 0x1f)); assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 1); - MUTEX_UNLOCK(_lock); + mutex_m.unlock(); } @@ -156,7 +155,7 @@ class SyncBitset { const char *_errmsg; // error message if an allocation fails uint32_t _sz; // size as # of bits - MUTEX_T _lock; // mutex + MUTEX_T mutex_m; // mutex uint32_t *_words; // storage }; diff --git a/fast_mutex.h b/fast_mutex.h new file mode 100644 index 0000000..4d4b7cc --- /dev/null +++ b/fast_mutex.h @@ -0,0 +1,248 @@ +/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; -*- +Copyright (c) 2010-2012 Marcus Geelnard + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ + +#ifndef _FAST_MUTEX_H_ +#define _FAST_MUTEX_H_ + +/// @file + +// Which platform are we on? +#if !defined(_TTHREAD_PLATFORM_DEFINED_) + #if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__) + #define _TTHREAD_WIN32_ + #else + #define _TTHREAD_POSIX_ + #endif + #define _TTHREAD_PLATFORM_DEFINED_ +#endif + +// Check if we can support the assembly language level implementation (otherwise +// revert to the system API) +#if (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || \ + (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || \ + (defined(__GNUC__) && (defined(__ppc__))) + #define _FAST_MUTEX_ASM_ +#else + #define _FAST_MUTEX_SYS_ +#endif + +#if defined(_TTHREAD_WIN32_) + #ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN + #define __UNDEF_LEAN_AND_MEAN + #endif + #include + #ifdef __UNDEF_LEAN_AND_MEAN + #undef WIN32_LEAN_AND_MEAN + #undef __UNDEF_LEAN_AND_MEAN + #endif +#else + #ifdef _FAST_MUTEX_ASM_ + #include + #else + #include + #endif +#endif + +namespace tthread { + +/// Fast mutex class. +/// This is a mutual exclusion object for synchronizing access to shared +/// memory areas for several threads. It is similar to the tthread::mutex class, +/// but instead of using system level functions, it is implemented as an atomic +/// spin lock with very low CPU overhead. +/// +/// The \c fast_mutex class is NOT compatible with the \c condition_variable +/// class (however, it IS compatible with the \c lock_guard class). It should +/// also be noted that the \c fast_mutex class typically does not provide +/// as accurate thread scheduling as a the standard \c mutex class does. +/// +/// Because of the limitations of the class, it should only be used in +/// situations where the mutex needs to be locked/unlocked very frequently. +/// +/// @note The "fast" version of this class relies on inline assembler language, +/// which is currently only supported for 32/64-bit Intel x86/AMD64 and +/// PowerPC architectures on a limited number of compilers (GNU g++ and MS +/// Visual C++). +/// For other architectures/compilers, system functions are used instead. +class fast_mutex { + public: + /// Constructor. +#if defined(_FAST_MUTEX_ASM_) + fast_mutex() : mLock(0) {} +#else + fast_mutex() + { + #if defined(_TTHREAD_WIN32_) + InitializeCriticalSection(&mHandle); + #elif defined(_TTHREAD_POSIX_) + pthread_mutex_init(&mHandle, NULL); + #endif + } +#endif + +#if !defined(_FAST_MUTEX_ASM_) + /// Destructor. + ~fast_mutex() + { + #if defined(_TTHREAD_WIN32_) + DeleteCriticalSection(&mHandle); + #elif defined(_TTHREAD_POSIX_) + pthread_mutex_destroy(&mHandle); + #endif + } +#endif + + /// Lock the mutex. + /// The method will block the calling thread until a lock on the mutex can + /// be obtained. The mutex remains locked until \c unlock() is called. + /// @see lock_guard + inline void lock() + { +#if defined(_FAST_MUTEX_ASM_) + bool gotLock; + do { + gotLock = try_lock(); + if(!gotLock) + { + #if defined(_TTHREAD_WIN32_) + Sleep(0); + #elif defined(_TTHREAD_POSIX_) + sched_yield(); + #endif + } + } while(!gotLock); +#else + #if defined(_TTHREAD_WIN32_) + EnterCriticalSection(&mHandle); + #elif defined(_TTHREAD_POSIX_) + pthread_mutex_lock(&mHandle); + #endif +#endif + } + + /// Try to lock the mutex. + /// The method will try to lock the mutex. If it fails, the function will + /// return immediately (non-blocking). + /// @return \c true if the lock was acquired, or \c false if the lock could + /// not be acquired. + inline bool try_lock() + { +#if defined(_FAST_MUTEX_ASM_) + int oldLock; + #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + asm volatile ( + "movl $1,%%eax\n\t" + "xchg %%eax,%0\n\t" + "movl %%eax,%1\n\t" + : "=m" (mLock), "=m" (oldLock) + : + : "%eax", "memory" + ); + #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + int *ptrLock = &mLock; + __asm { + mov eax,1 + mov ecx,ptrLock + xchg eax,[ecx] + mov oldLock,eax + } + #elif defined(__GNUC__) && (defined(__ppc__)) + int newLock = 1; + asm volatile ( + "\n1:\n\t" + "lwarx %0,0,%1\n\t" + "cmpwi 0,%0,0\n\t" + "bne- 2f\n\t" + "stwcx. %2,0,%1\n\t" + "bne- 1b\n\t" + "isync\n" + "2:\n\t" + : "=&r" (oldLock) + : "r" (&mLock), "r" (newLock) + : "cr0", "memory" + ); + #endif + return (oldLock == 0); +#else + #if defined(_TTHREAD_WIN32_) + return TryEnterCriticalSection(&mHandle) ? true : false; + #elif defined(_TTHREAD_POSIX_) + return (pthread_mutex_trylock(&mHandle) == 0) ? true : false; + #endif +#endif + } + + /// Unlock the mutex. + /// If any threads are waiting for the lock on this mutex, one of them will + /// be unblocked. + inline void unlock() + { +#if defined(_FAST_MUTEX_ASM_) + #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + asm volatile ( + "movl $0,%%eax\n\t" + "xchg %%eax,%0\n\t" + : "=m" (mLock) + : + : "%eax", "memory" + ); + #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + int *ptrLock = &mLock; + __asm { + mov eax,0 + mov ecx,ptrLock + xchg eax,[ecx] + } + #elif defined(__GNUC__) && (defined(__ppc__)) + asm volatile ( + "sync\n\t" // Replace with lwsync where possible? + : : : "memory" + ); + mLock = 0; + #endif +#else + #if defined(_TTHREAD_WIN32_) + LeaveCriticalSection(&mHandle); + #elif defined(_TTHREAD_POSIX_) + pthread_mutex_unlock(&mHandle); + #endif +#endif + } + + private: +#if defined(_FAST_MUTEX_ASM_) + int mLock; +#else + #if defined(_TTHREAD_WIN32_) + CRITICAL_SECTION mHandle; + #elif defined(_TTHREAD_POSIX_) + pthread_mutex_t mHandle; + #endif +#endif +}; + +} + +#endif // _FAST_MUTEX_H_ + diff --git a/pat.h b/pat.h index 2360198..22badc9 100644 --- a/pat.h +++ b/pat.h @@ -404,7 +404,6 @@ class PatternSource { throw 1; } } - MUTEX_INIT(lock_); } virtual ~PatternSource() { } @@ -524,17 +523,7 @@ class PatternSource { */ void lock() { if(!doLocking_) return; // no contention -#ifdef USE_SPINLOCK - if(useSpinlock_) { - // User can ask to use the normal pthreads lock even if - // spinlocks are compiled in. - spinlock_.Enter(); - } else { -#endif - MUTEX_LOCK(lock_); -#ifdef USE_SPINLOCK - } -#endif + mutex_m.lock(); } /** @@ -543,17 +532,7 @@ class PatternSource { */ void unlock() { if(!doLocking_) return; // no contention -#ifdef USE_SPINLOCK - if(useSpinlock_) { - // User can ask to use the normal pthreads lock even if - // spinlocks are compiled in. - spinlock_.Leave(); - } else { -#endif - MUTEX_UNLOCK(lock_); -#ifdef USE_SPINLOCK - } -#endif + mutex_m.unlock(); } /** @@ -627,10 +606,7 @@ class PatternSource { /// if we expect bad I/O latency on some reads. bool useSpinlock_; bool randomizeQuals_; /// true -> mess up qualities in a random way -#ifdef USE_SPINLOCK - SpinLock spinlock_; -#endif - MUTEX_T lock_; /// mutex for locking critical regions + MUTEX_T mutex_m; /// mutex for locking critical regions bool verbose_; }; @@ -641,7 +617,6 @@ class PatternSource { class PairedPatternSource { public: PairedPatternSource(uint32_t seed) { - MUTEX_INIT(lock_); seed_ = seed; } virtual ~PairedPatternSource() { } @@ -656,30 +631,19 @@ class PairedPatternSource { * fields is being updated. */ void lock() { -#ifdef USE_SPINLOCK - spinlock_.Enter(); -#else - MUTEX_LOCK(lock_); -#endif + mutex_m.lock(); } /** * Unlock this PairedPatternSource. */ void unlock() { -#ifdef USE_SPINLOCK - spinlock_.Leave(); -#else - MUTEX_UNLOCK(lock_); -#endif + mutex_m.unlock(); } protected: -#ifdef USE_SPINLOCK - SpinLock spinlock_; -#endif - MUTEX_T lock_; /// mutex for locking critical regions + MUTEX_T mutex_m; /// mutex for locking critical regions uint32_t seed_; }; diff --git a/threading.h b/threading.h index b6b5e0b..f95be7d 100644 --- a/threading.h +++ b/threading.h @@ -2,73 +2,38 @@ #define THREADING_H_ #include -#include "spinlock.h" +#include "tinythread.h" +#include "fast_mutex.h" -// Note that USE_SPINLOCK trumps BOWTIE_PTHREADS - -#ifdef BOWTIE_PTHREADS -#include -#endif - -#ifdef USE_SPINLOCK -# include "spinlock.h" -# define MUTEX_T SpinLock -# define MUTEX_INIT(l) -# define MUTEX_LOCK(l) (l).Enter() -# define MUTEX_UNLOCK(l) (l).Leave() +#ifdef NO_SPINLOCK +# define MUTEX_T tthread::mutex #else -# ifdef BOWTIE_PTHREADS -# define MUTEX_T pthread_mutex_t -# define MUTEX_INIT(l) pthread_mutex_init(&l, NULL) -# define MUTEX_LOCK(l) pthread_mutex_lock(&l) -# define MUTEX_UNLOCK(l) pthread_mutex_unlock(&l) -# else -# define MUTEX_T int -# define MUTEX_INIT(l) l = 0 -# define MUTEX_LOCK(l) l = 1 -# define MUTEX_UNLOCK(l) l = 0 -# endif /* BOWTIE_PTHREADS */ -#endif /* USE_SPINLOCK */ - -#ifdef BOWTIE_PTHREADS -static inline void joinThread(pthread_t th) { - int ret, *tmp; - if((ret = pthread_join(th, (void**)(int**)&tmp)) != 0) { - std::cerr << "Error: pthread_join returned non-zero status: " - << ret << std::endl; - throw 1; - } -} +# define MUTEX_T tthread::fast_mutex +#endif /* NO_SPINLOCK */ -static inline void createThread(pthread_t* th, - void *(*start_routine) (void *), - void *arg) -{ - int ret; - pthread_attr_t pt_attr; - pthread_attr_init(&pt_attr); - pthread_attr_setdetachstate(&pt_attr, PTHREAD_CREATE_JOINABLE); - pthread_attr_setstacksize(&pt_attr, 2 << 20); - if((ret = pthread_create(th, &pt_attr, start_routine, arg)) != 0) { - std::cerr << "Error: pthread_create returned non-zero status: " - << ret << std::endl; - throw 1; - } -} -#endif /** * Wrap a lock; obtain lock upon construction, release upon destruction. */ class ThreadSafe { public: - ThreadSafe(MUTEX_T* lock) { - lock_ = lock; - MUTEX_LOCK(*lock_); + ThreadSafe(MUTEX_T* ptr_mutex, bool locked = true) { + if(locked) { + this->ptr_mutex = ptr_mutex; + ptr_mutex->lock(); + } + else + this->ptr_mutex = NULL; } - ~ThreadSafe() { MUTEX_UNLOCK(*lock_); } + + ~ThreadSafe() { + if (ptr_mutex != NULL) + ptr_mutex->unlock(); + } + private: - MUTEX_T *lock_; + MUTEX_T *ptr_mutex; }; #endif +