Permalink
Browse files

Merge branch 'GenericOffChip'

  • Loading branch information...
2 parents bef2e07 + 9a9634c commit c44b9b273ea126a4b6a225d90637e87fd3bf8eec @rrnewton rrnewton committed May 4, 2012
View
@@ -8,7 +8,7 @@
# Set up some Variables
# --------------------------------------------------------------------------------
-OUR_PKGS= abstract-par/ monad-par-extras/ monad-par/ meta-par/
+OUR_PKGS= abstract-par/ abstract-par-offchip/ monad-par-extras/ monad-par/ meta-par/
# This isn't really meant to be distributed:
# meta-par-cuda/
@@ -92,6 +92,7 @@ test:
# Running a full test uses cabal-dev to sandbox the build.
validate:
$(MAKE) mega-install CABAL='cabal-dev' CABAL_ARGS='--enable-tests --disable-documentation'
+ (cd examples; $(MAKE) validate)
# force-reinstalls
# SANDBOX=`pwd`/cabal-dev
# pushd monad-par
@@ -109,8 +110,9 @@ doc:
rm -rf docs
mkdir docs
# Link EVERYTHING to Haddock:
- ${CABAL_INSTALL} ${ALL_PKGS} --enable-documentation \
- --haddock-html-location='http://hackage.haskell.org/packages/archive/$pkg/latest/doc/html' --with-haddock=${HADDOCK}
+ ${CABAL_INSTALL} ${ALL_GPU_PKGS} --enable-documentation \
+ --haddock-html-location='http://hackage.haskell.org/packages/archive/$pkg/latest/doc/html' \
+ --with-haddock=${HADDOCK} --force-reinstalls
mv */dist/doc/html/* docs/
mv ./Deques/*/dist/doc/html/* docs/
mv ./accelerate/*/dist/doc/html/* docs/
@@ -26,8 +26,12 @@ import qualified Data.Array.Accelerate.IO as IO
--------------------------------------------------------------------------------
-
-- | A class containing Accelerate-specific `Par` operations.
+--
+-- A minimal complete instance contains:
+-- * one of `runAcc` or `spawnAcc`
+-- * one of `runAccWith` or `spawnAccWith`
+-- * `compileAcc`.
class ParFuture iv p => ParAccelerate iv p where
-- | Run an Accelerate computation and wait for its result. In the
@@ -38,13 +42,37 @@ class ParFuture iv p => ParAccelerate iv p where
--
-- Moreover, when configured with a high-performance /CPU/ Accelerate backend
-- in the future this routine can enable automatic CPU/GPU work partitioning.
+ --
+ -- The specific Accelerate implementation is NOT specified when
+ -- calling `runAcc`. That choice is deferred to the point where
+ -- `runPar` is invoked for the scheduler in question.
runAcc :: (Arrays a) => Acc a -> p a
runAcc comp = spawnAcc comp >>= get
-- | Like `runAcc` but runs the Accelerate computation asynchronously.
spawnAcc :: (Arrays a) => Acc a -> p (iv a)
+ -- This default implementation is actually QUITE BAD. It's an
+ -- anti-pattern. We don't want to wait until the spawned
+ -- computation is executed to enqueue the GPU computation. This is
+ -- a problem with child-stealing Par implemenations, but not so much
+ -- with parent-stealing ones.
+ spawnAcc acc = spawn_ $ runAcc acc
+
+ -- | Prepare a GPU computation for repeated execution.
+ --
+ -- Typically, this is applied to its first argument once in an outer
+ -- scope then applied to its second argument repeatedly inside a loop.
+ --
+ -- Whereas the normal `runAcc` will /attempt/ to cache compiled
+ -- programs and avoid recompilation, this function guarantees no
+ -- recompilation and further avoids some overhead from re-executing
+ -- the Accelerate front-end.
+ --
+ -- See "Data.Array.Accelerate.CUDA.run1" for more explanation.
+ compileAcc :: (Arrays a, Arrays b) => (Acc a -> Acc b) -> a -> p b
+
- -- | Spawn an computation which may execute /either/ on the CPU or GPU
+-- | Spawn an computation which may execute /either/ on the CPU or GPU
-- based on runtime load. The CPU and GPU implementations may employ
-- completely different algorithms; this is an UNSAFE operation which
-- will not guarantee determinism unless the user ensures that the
@@ -57,6 +85,36 @@ class ParFuture iv p => ParAccelerate iv p where
--
unsafeHybrid :: Arrays b => (b -> a) -> (p a, Acc b) -> p (iv a)
+ -- This default implementation simply /always/ runs the GPU version:
+ unsafeHybrid cvrt (_, acc) = spawn_ $ do x <- runAcc acc
+ return (cvrt x)
+
+ ------------------------------------------------------------
+ -- * Control over selecting the Accelerate implementation.
+
+ -- Retrieve the Accelerate @run@ function that is the default for
+ -- this execution, i.e. the one used for `runAcc` or `spawnAcc`.
+ getDefaultAccImpl :: p (Acc a -> a)
+
+ -- | Like `runAcc` but specify a specific Accelerate implementation, e.g. @CUDA.run@.
+ runAccWith :: (Arrays a) => (Acc a -> a) -> Acc a -> p a
+ runAccWith runner comp = spawnAccWith runner comp >>= get
+
+ -- | Analogous to `runAccWith`.
+ spawnAccWith :: (Arrays a) => (Acc a -> a) -> Acc a -> p (iv a)
+ spawnAccWith runner acc = spawn_ $ runAccWith runner acc
+
+ -- | Analogous to other @*With@ functions.
+ unsafeHybridWith :: Arrays b => (Acc b -> b) -> (b -> a) -> (p a, Acc b) -> p (iv a)
+ -- This default implementation simply /always/ runs the GPU version:
+ unsafeHybridWith runner cvrt (_, acc) =
+ spawn_ $ do x <- runAccWith runner acc
+ return (cvrt x)
+
+ -- TODO: to be fully consistent we should perhaps have
+ -- compileAccWith, but that gets complicated.
+
+
--------------------------------------------------------------------------------
@@ -0,0 +1,129 @@
+{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies #-}
+{-# LANGUAGE KindSignatures, ConstraintKinds, TypeFamilies #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DefaultSignatures #-}
+{-# OPTIONS_GHC -Wall #-}
+
+-- | This module is conceptually part of "Control.Monad.Par.Class", but
+-- is factored into a separate package because it depends on
+-- ConstraintKinds, available only in GHC 7.4 and later.
+
+module Control.Monad.Par.OffChip
+ (
+ -- * The Class
+ ParOffChip(..),
+
+ -- * Example applications of `unsafeHybrid`
+--- unsafeHybridVector,
+-- unsafeHybridIArray
+ ) where
+
+import Control.Monad.Par.Class
+-- import Data.Array.IArray (IArray)
+-- import Foreign (Ptr, Storable)
+-- import qualified Data.Array.IArray as IArray
+-- import qualified Data.Vector.Storable as Vector
+import GHC.Exts (Constraint)
+
+--------------------------------------------------------------------------------
+
+-- | A generic interface for operations that happen outside of the CPU.
+class ParFuture ivar m => ParOffChip con ivar m where
+ -- | A constraint on types that must be sent off of the CPU. This
+ -- typically includes, but is not limited to, serializability.
+ type OffChipConstraint a :: Constraint
+
+ -- | Run an computation off of the CPU and wait for its result. A
+ -- common example is invoking a GPU computation.
+ --
+ -- > runOffChip runner foreignComp
+ --
+ -- From the type of this function you can see that, @runner@ /already/
+ -- has the capability to execute the foreign computations. The
+ -- purpose of using runOffChip is to inform the 'Par' CPU scheduler
+ -- that a blocking operation is about to occur.
+ --
+ -- This can result in better performance by enabling the CPU to do
+ -- other work while waiting for the off-chip computation to complete.
+ runOffChip :: (OffChipConstraint a) => (con a -> a) -> con a -> m a
+
+ -- | Non-blocking variant of `runOffChip`.
+ spawnOffChip :: (OffChipConstraint a) => (con a -> a) -> con a -> m (ivar a)
+-- default spawnOffChip :: (ParIVar ivar m, C a) => (con a -> a) -> con a -> m (ivar a)
+ spawnOffChip runner comp = spawn_ (runOffChip runner comp)
+
+ -- | Spawn an computation which may execute /either/ on the CPU or
+ -- off-chip based on runtime load. The CPU and off-chip
+ -- implementations may employ completely different algorithms.
+ -- Therefore, this is an UNSAFE operation which will not guarantee
+ -- determinism unless the user /ensures/ that the two algorithms are
+ -- equivalent.
+ --
+ -- Usage is:
+ --
+ -- > unsafeHybrid runner conversion (cpuVer, offChipVer)
+ --
+ -- As with `runOffChip`, the @runner@ invokes the actual off-chip
+ -- computation. The new parameter, @conversion@, converts results
+ -- from the off-chip computation to be of the same type as the CPU
+ -- version of the function. Finally, the pair argument contains two
+ -- complete computations, only one of which is invoked at runtime.
+ --
+ unsafeHybrid :: (OffChipConstraint b) => (con b -> b) -> (b -> a) -> (m a, con b) -> m (ivar a)
+
+--------------------------------------------------------------------------------
+
+#if 0
+-- | A class containing Accelerate-specific `Par` operations.
+class ParFuture iv p => ParAccelerate iv p where
+
+ -- | Run an Accelerate computation and wait for its result. In the
+ -- context of a `Par` computation this can result in better
+ -- performance than using an Accelerate-provided `run` function
+ -- directly, because this version enables the CPU work scheduler to do
+ -- other work while waiting for the GPU computation to complete.
+ --
+ -- Moreover, when configured with a high-performance /CPU/ Accelerate backend
+ -- in the future this routine can enable automatic CPU/GPU work partitioning.
+ runAcc :: (Arrays a) => (Acc a -> a) -> Acc a -> p a
+ runAcc comp = spawnAcc comp >>= get
+
+ -- | Like `runAcc` but runs the Accelerate computation asynchronously.
+ spawnAcc :: (Arrays a) => Acc a -> p (iv a)
+
+ -- | Spawn an computation which may execute /either/ on the CPU or GPU
+ -- based on runtime load. The CPU and GPU implementations may employ
+ -- completely different algorithms; this is an UNSAFE operation which
+ -- will not guarantee determinism unless the user ensures that the
+ -- result of both computations is always equivalent.
+ --
+ --
+ -- A common application of `unsafeHybrid` is the following:
+ --
+ -- > unsafeHybrid Data.Array.Accelerate.IO.toVector
+ --
+ unsafeHybrid :: Arrays b => (b -> a) -> (p a, Acc b) -> p (iv a)
+
+--------------------------------------------------------------------------------
+
+-- | An example application of `unsafeHybrid` for vectors.
+unsafeHybridVector :: (Vector.Storable a, Elt a,
+ IO.BlockPtrs (EltRepr a) ~ ((), Ptr a),
+ ParAccelerate iv p)
+ => (p (Vector.Vector a), Acc (Array DIM1 a))
+ -> p (iv (Vector.Vector a))
+-- /TODO/: make a variant with unrestricted 'Shape' that, e.g., yields
+-- a vector in row-major order.
+unsafeHybridVector = unsafeHybrid IO.toVector
+
+
+-- | An example application of `unsafeHybrid` for any IArray type.
+unsafeHybridIArray :: ( EltRepr ix ~ EltRepr sh
+ , IArray a e, IArray.Ix ix
+ , Shape sh, Elt ix, Elt e
+ , ParAccelerate iv p)
+ => (p (a ix e), Acc (Array sh e))
+ -> p (iv (a ix e))
+unsafeHybridIArray = unsafeHybrid toIArray
+ --IO.toArray
+#endif
@@ -0,0 +1,30 @@
+Copyright Simon Marlow 2011
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of Simon Marlow nor the names of other
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,31 @@
+Name: abstract-par-offchip
+Version: 0.3
+Synopsis:
+
+Description:
+
+-- Version history:
+--
+-- 0.3: Initial release.
+
+Homepage: https://github.com/simonmar/monad-par
+License: BSD3
+License-file: LICENSE
+Author: Adam Foltzer 2011-2012
+Maintainer: Ryan Newton <rrnewton@gmail.com>
+Copyright: (c) Adam Foltzer 2011-2012
+Stability: Experimental
+Category: Parallelism
+Build-type: Simple
+Cabal-version: >=1.8
+
+Library
+ Exposed-modules:
+ -- Provides the class ParOffChip:
+ Control.Monad.Par.OffChip
+
+ -- Only GHC 7.4 and greater have constraint kinds.
+ Build-depends: base >= 4.5
+ , abstract-par >= 0.3
+
+ ghc-options: -Wall
@@ -39,7 +39,6 @@ module Control.Monad.Par.Class
where
import Control.DeepSeq
-import GHC.Exts (Constraint)
--------------------------------------------------------------------------------
View
@@ -16,6 +16,12 @@ test:
ghc -threaded --make benchmark.hs -o benchmark.run
SHORTRUN=1 THREADS="1" ./benchmark.run
+validate:
+ ./generate_cabal.sh
+ ghc -threaded --make benchmark.hs -o benchmark.run
+# SHORTRUN=1 THREADS="1" ./benchmark.run
+ cabal-dev install -s ../cabal-dev/
+
clean:
cabal clean || echo
rm -rf bin
@@ -3,7 +3,8 @@
-- What is the overhead of spawning a single operation on the GPU?
-- In this microbenchmark we measure it.
-import Control.Monad.Par.Accelerate
+-- import Control.Monad.Par.Accelerate
+import Control.Monad.Par.OffChip
import Control.Monad.Par.Meta.AccSMP (runPar, get)
-- import System.Random.MWC
@@ -19,11 +20,22 @@ import Control.Exception (evaluate)
#ifdef USECUDA
import Foreign.CUDA.Driver.Device (initialise)
+import qualified Data.Array.Accelerate.CUDA as Run
+#else
+import qualified Data.Array.Accelerate.Interpreter as Run
#endif
-
--------------------------------------------------------------------------------
+-- Helpers:
+
+runAcc = runOffChip Run.run
+spawnAcc = spawnOffChip Run.run
+
+#ifdef USECUDA
+Run.run1 =
+#endif
+--------------------------------------------------------------------------------
-- Dot product
-- -----------
@@ -35,11 +47,9 @@ dotpAcc xs ys
in
A.fold (+) 0 (A.zipWith (*) xs' ys')
-
main = do
putStrLn "Measuring one roundtrip through the GPU:"
-
#ifdef USECUDA
start <- getPOSIXTime
initialise []
@@ -54,7 +64,7 @@ main = do
runone = do
start <- getPOSIXTime
- x <- evaluate$ runPar $ spawnAcc acc >>= get
+ x <- evaluate$ runPar $ runAcc acc
putStrLn$ "Result "++show x
end <- getPOSIXTime
return (end-start)
Oops, something went wrong.

0 comments on commit c44b9b2

Please sign in to comment.