Skip to content
Browse files

Folding now completely works for arrays over tuples

  • Loading branch information...
1 parent c9732dc commit 8ed3b5210cb4a522f5e4942db593d3cc7203300c @dybber dybber committed Sep 30, 2011
Showing with 66 additions and 3 deletions.
  1. +30 −1 Data/Array/Accelerate/OpenCL/Array/Data.hs
  2. +36 −2 Data/Array/Accelerate/OpenCL/Execute.hs
View
31 Data/Array/Accelerate/OpenCL/Array/Data.hs
@@ -17,7 +17,7 @@ module Data.Array.Accelerate.OpenCL.Array.Data (
-- indexArray, copyArray,
peekArray, pokeArray,
-- peekArrayAsync, pokeArrayAsync,
- marshalArrayData,
+ marshalArrayData, marshalLocalArray,
-- marshalTextureData,
existsArrayData, devicePtrs,
@@ -210,6 +210,29 @@ marshalArrayData adata = doMarshal AD.arrayElt adata
mkPrimDispatch(doMarshalPrim,marshalArrayDataPrim)
}
+-- |Wrap the device pointers corresponding to a host-side array into arguments that can be passed
+-- to a kernel upon invocation.
+--
+marshalLocalArray :: AD.ArrayElt e => Int -> AD.ArrayData e -> CIO [OpenCL.KernelArg]
+marshalLocalArray n adata = doMarshal AD.arrayElt adata
+ where
+ doMarshal :: ArrayEltR e -> AD.ArrayData e -> CIO [OpenCL.KernelArg]
+ doMarshal ArrayEltRunit _ = return []
+ doMarshal (ArrayEltRpair aeR1 aeR2) ad = (++) <$> doMarshal aeR1 (fst' ad)
+ <*> doMarshal aeR2 (snd' ad)
+ doMarshal aer ad = doMarshalPrim aer ad n
+ where
+ doMarshalPrim :: ArrayEltR e -> AD.ArrayData e -> Int -> CIO [OpenCL.KernelArg]
+ doMarshalPrim ArrayEltRint _ = marshalLocalArrayPrim (undefined :: Int)
+ doMarshalPrim ArrayEltRint8 _ = marshalLocalArrayPrim (undefined :: Int8)
+ doMarshalPrim ArrayEltRint16 _ = marshalLocalArrayPrim (undefined :: Int16)
+ doMarshalPrim ArrayEltRint32 _ = marshalLocalArrayPrim (undefined :: Int32)
+ doMarshalPrim ArrayEltRint64 _ = marshalLocalArrayPrim (undefined :: Int64)
+ doMarshalPrim ArrayEltRfloat _ = marshalLocalArrayPrim (undefined :: Float)
+ doMarshalPrim ArrayEltRdouble _ = marshalLocalArrayPrim (undefined :: Double)
+ doMarshalPrim _ _ = error "marshalling local (shared memory) array is errorneous"
+
+
-- -- |Bind the device memory arrays to the given texture reference(s), setting
-- -- appropriate type. The arrays are bound, and the list of textures thereby
-- -- consumed, in projection index order --- i.e. right-to-left
@@ -503,6 +526,12 @@ marshalArrayDataPrim :: ( AD.ArrayElt e, AD.ArrayPtrs e ~ Ptr a, DevicePtrs e ~
-> CIO [OpenCL.KernelArg]
marshalArrayDataPrim ad = return . OpenCL.MObjArg <$> getArray ad
+marshalLocalArrayPrim :: Storable e =>
+ e
+ -> Int
+ -> CIO [OpenCL.KernelArg]
+marshalLocalArrayPrim x n = return $ [OpenCL.LocalArrayArg x n]
+
-- -- Bind device memory to the given texture reference, setting appropriate type
-- --
View
38 Data/Array/Accelerate/OpenCL/Execute.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables #-}
-{-# LANGUAGE RankNTypes, TupleSections, TypeOperators, TypeSynonymInstances #-}
+{-# LANGUAGE RankNTypes, TupleSections, TypeOperators, TypeSynonymInstances,
+ FlexibleInstances #-}
-- |
-- Module : Data.Array.Accelerate.OpenCL.Execute
-- Copyright : [2008..2011] Manuel M T Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell
@@ -373,7 +374,7 @@ foldOp c kernel bindings acc aenv (Array sh0 in0)
| dim sh0 == 1 = do
cfg@(_,_,(_,g,_)) <- configure kernel acc (size sh0)
res@(Array _ out) <- newArray (bool c 1 (g > 1)) (toElt (fst sh0,g)) :: CIO (Array (dim:.Int) e)
- dispatch cfg bindings aenv (((((),size sh0),out),in0), OpenCL.LocalArrayArg (undefined :: Int) (size sh0))
+ dispatch cfg bindings aenv (((((),size sh0),out),in0), LocalArray out (size sh0))
freeArray in0
if g > 1 then foldOp c kernel bindings acc aenv res
else return (Array (fst sh0) out)
@@ -707,6 +708,39 @@ instance (Marshalable a, Marshalable b) => Marshalable (a,b) where
marshal (a,b) = (++) <$> marshal a <*> marshal b
+
+-- With OpenCL, all arrays must be allocated outside the kernels
+-- This includes, __local (shared memory) arrays.
+-- Use this type to specify that a kernel needs a local array of the given size.
+data Marshalable a => LocalArray a = LocalArray a Int
+
+instance (Marshalable a, Marshalable b,
+ Marshalable (LocalArray a), Marshalable (LocalArray b))
+ => Marshalable (LocalArray (a,b)) where
+ marshal (LocalArray (x1, x2) n) = (++) <$> marshal (LocalArray x1 n)
+ <*> marshal (LocalArray x2 n)
+
+instance AD.ArrayElt e => Marshalable (LocalArray (AD.ArrayData e)) where
+ marshal (LocalArray e n) = marshalLocalArray n e
+
+
+-- #define primLocalMarshalable(ty) \
+-- instance Marshalable (LocalArray ty) where \
+-- marshal (LocalArray x n) = return $ [OpenCL.LocalArrayArg x n]
+
+-- primLocalMarshalable(Int8)
+-- primLocalMarshalable(Int16)
+-- primLocalMarshalable(Int32)
+-- primLocalMarshalable(Int64)
+-- primLocalMarshalable(Word8)
+-- primLocalMarshalable(Word16)
+-- primLocalMarshalable(Word32)
+-- primLocalMarshalable(Word64)
+-- primLocalMarshalable(Float)
+-- primLocalMarshalable(Double)
+-- primLocalMarshalable((Ptr a))
+
+
-- Link the binary object implementing the computation, configure the kernel
-- launch parameters, and initiate the computation. This also handles lifting
-- and binding of array references from scalar expressions.

0 comments on commit 8ed3b52

Please sign in to comment.
Something went wrong with that request. Please try again.