Permalink
Browse files

Change allocation mechanism to use GHC internals

This commit changes the Data.Vector.SIMD.Mutable.MVector allocation
mechanism. It no longer uses _mm_malloc bindings through FFI, but the
internal newAlignedPinnedByteArray# function and some trickery.

Most likely highly non-portable. But brings SIMD vector speed on-par
with Storable.

Could be improved a little more by not using newForeignPtr_ but
GHC.ForeignPtr.PlainPtr if this were exported.
  • Loading branch information...
NicolasT committed Jul 8, 2012
1 parent a4f1374 commit 5ec539167254435ef4e7d308706dcafae09504d2
Showing with 18 additions and 26 deletions.
  1. +0 −15 cbits/vector-simd.c
  2. +18 −11 src/Data/Vector/SIMD/Mutable.hs
View
@@ -19,7 +19,6 @@
#include <stdint.h>
#include <xmmintrin.h>
-#include <mm_malloc.h>
//#define DEBUG
@@ -30,20 +29,6 @@
# define MSG
#endif
-void * _mm_malloc_stub(size_t size, size_t alignment) {
- void *ptr = _mm_malloc(size, alignment);
-
- MSG("_mm_malloc(%d, %d) = %p\n", size, alignment, ptr);
-
- return ptr;
-}
-
-void _mm_free_stub(void *ptr) {
- MSG("_mm_free(%p)\n", ptr);
-
- return _mm_free(ptr);
-}
-
void _vector_simd_xor_sse42(const uint32_t *a, const uint32_t *b, const uint32_t *o, ssize_t len) {
ssize_t i = 0,
todo = len / 32;
@@ -19,6 +19,7 @@
{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances, DeriveDataTypeable #-}
{-# LANGUAGE ScopedTypeVariables, ForeignFunctionInterface, EmptyDataDecls #-}
{-# LANGUAGE TypeSynonymInstances, FlexibleContexts #-}
+{-# LANGUAGE MagicHash, UnboxedTuples #-}
module Data.Vector.SIMD.Mutable (
MVector(..), IOVector, STVector,
@@ -47,6 +48,10 @@ import Control.Monad.ST
import Data.Vector.Storable.Internal (getPtr, updPtr)
+import GHC.Base
+import GHC.ForeignPtr (ForeignPtr(..))
+import GHC.Ptr (Ptr(..))
+
data MVector o s a = MVector {-# UNPACK #-} !Int
{-# UNPACK #-} !(ForeignPtr a)
deriving (Typeable)
@@ -129,21 +134,23 @@ instance (Storable a, Alignment o) => G.MVector (MVector o) a where
moveArray p q n
{-# INLINE basicUnsafeMove #-}
-foreign import ccall unsafe "_mm_malloc_stub" _mm_malloc :: CSize -> CSize -> IO (Ptr a)
-foreign import ccall unsafe "&_mm_free_stub" finalizer_mm_free :: FinalizerPtr a
-
mallocVector :: (Storable a, Alignment o) => Int -> o -> IO (ForeignPtr a)
mallocVector = doMalloc undefined
where
- doMalloc :: (Storable a', Alignment o') => a' -> Int -> o' -> IO (ForeignPtr a')
- doMalloc dummyA size dummyO = do
- ptr <- _mm_malloc (fromIntegral (size * sizeOf dummyA))
- (fromIntegral $ alignment dummyO)
- if ptr == nullPtr
- then ioError (IOError Nothing ResourceExhausted "_mm_malloc" "out of memory"
- Nothing Nothing)
- else newForeignPtr finalizer_mm_free ptr
+ doMalloc :: (Storable b, Alignment p) => b -> Int -> p -> IO (ForeignPtr b)
+ doMalloc b l p = IO $ \s ->
+ case newAlignedPinnedByteArray# bytes align s of { (# s', ba #) ->
+ case newForeignPtr_ (Ptr $ byteArrayContents# (unsafeCoerce# ba)) of { IO f ->
+ case f s' of { (# s''', p #) -> (# s''', p #) }
+ }
+ }
+ where
+ !(I# size) = sizeOf b
+ !(I# len) = l
+ !bytes = size *# len
+ !(I# align) = alignment p
{-# INLINE doMalloc #-}
+ {-# SPECIALIZE doMalloc :: W.Word8 -> Int -> A16 -> IO (ForeignPtr W.Word8) #-}
{-# INLINE mallocVector #-}
{-# SPECIALIZE mallocVector :: Int -> A16 -> IO (ForeignPtr W.Word8) #-}

0 comments on commit 5ec5391

Please sign in to comment.