Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Change allocation mechanism to use GHC internals

This commit changes the Data.Vector.SIMD.Mutable.MVector allocation
mechanism. It no longer uses _mm_malloc bindings through FFI, but the
internal newAlignedPinnedByteArray# function and some trickery.

Most likely highly non-portable. But brings SIMD vector speed on-par
with Storable.

Could be improved a little more by not using newForeignPtr_ but
GHC.ForeignPtr.PlainPtr if this were exported.
  • Loading branch information...
commit 5ec539167254435ef4e7d308706dcafae09504d2 1 parent a4f1374
@NicolasT authored
Showing with 18 additions and 26 deletions.
  1. +0 −15 cbits/vector-simd.c
  2. +18 −11 src/Data/Vector/SIMD/Mutable.hs
View
15 cbits/vector-simd.c
@@ -19,7 +19,6 @@
#include <stdint.h>
#include <xmmintrin.h>
-#include <mm_malloc.h>
//#define DEBUG
@@ -30,20 +29,6 @@
# define MSG
#endif
-void * _mm_malloc_stub(size_t size, size_t alignment) {
- void *ptr = _mm_malloc(size, alignment);
-
- MSG("_mm_malloc(%d, %d) = %p\n", size, alignment, ptr);
-
- return ptr;
-}
-
-void _mm_free_stub(void *ptr) {
- MSG("_mm_free(%p)\n", ptr);
-
- return _mm_free(ptr);
-}
-
void _vector_simd_xor_sse42(const uint32_t *a, const uint32_t *b, const uint32_t *o, ssize_t len) {
ssize_t i = 0,
todo = len / 32;
View
29 src/Data/Vector/SIMD/Mutable.hs
@@ -19,6 +19,7 @@
{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances, DeriveDataTypeable #-}
{-# LANGUAGE ScopedTypeVariables, ForeignFunctionInterface, EmptyDataDecls #-}
{-# LANGUAGE TypeSynonymInstances, FlexibleContexts #-}
+{-# LANGUAGE MagicHash, UnboxedTuples #-}
module Data.Vector.SIMD.Mutable (
MVector(..), IOVector, STVector,
@@ -47,6 +48,10 @@ import Control.Monad.ST
import Data.Vector.Storable.Internal (getPtr, updPtr)
+import GHC.Base
+import GHC.ForeignPtr (ForeignPtr(..))
+import GHC.Ptr (Ptr(..))
+
data MVector o s a = MVector {-# UNPACK #-} !Int
{-# UNPACK #-} !(ForeignPtr a)
deriving (Typeable)
@@ -129,21 +134,23 @@ instance (Storable a, Alignment o) => G.MVector (MVector o) a where
moveArray p q n
{-# INLINE basicUnsafeMove #-}
-foreign import ccall unsafe "_mm_malloc_stub" _mm_malloc :: CSize -> CSize -> IO (Ptr a)
-foreign import ccall unsafe "&_mm_free_stub" finalizer_mm_free :: FinalizerPtr a
-
mallocVector :: (Storable a, Alignment o) => Int -> o -> IO (ForeignPtr a)
mallocVector = doMalloc undefined
where
- doMalloc :: (Storable a', Alignment o') => a' -> Int -> o' -> IO (ForeignPtr a')
- doMalloc dummyA size dummyO = do
- ptr <- _mm_malloc (fromIntegral (size * sizeOf dummyA))
- (fromIntegral $ alignment dummyO)
- if ptr == nullPtr
- then ioError (IOError Nothing ResourceExhausted "_mm_malloc" "out of memory"
- Nothing Nothing)
- else newForeignPtr finalizer_mm_free ptr
+ doMalloc :: (Storable b, Alignment p) => b -> Int -> p -> IO (ForeignPtr b)
+ doMalloc b l p = IO $ \s ->
+ case newAlignedPinnedByteArray# bytes align s of { (# s', ba #) ->
+ case newForeignPtr_ (Ptr $ byteArrayContents# (unsafeCoerce# ba)) of { IO f ->
+ case f s' of { (# s''', p #) -> (# s''', p #) }
+ }
+ }
+ where
+ !(I# size) = sizeOf b
+ !(I# len) = l
+ !bytes = size *# len
+ !(I# align) = alignment p
{-# INLINE doMalloc #-}
+ {-# SPECIALIZE doMalloc :: W.Word8 -> Int -> A16 -> IO (ForeignPtr W.Word8) #-}
{-# INLINE mallocVector #-}
{-# SPECIALIZE mallocVector :: Int -> A16 -> IO (ForeignPtr W.Word8) #-}
Please sign in to comment.
Something went wrong with that request. Please try again.