/
Debug.hs
95 lines (79 loc) · 2.69 KB
/
Debug.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
-- |
-- Module : Data.Array.Accelerate.LLVM.PTX.Debug
-- Copyright : [2014..2017] Trevor L. McDonell
-- [2014..2014] Vinod Grover (NVIDIA Corporation)
-- License : BSD3
--
-- Maintainer : Trevor L. McDonell <tmcdonell@cse.unsw.edu.au>
-- Stability : experimental
-- Portability : non-portable (GHC extensions)
--
module Data.Array.Accelerate.LLVM.PTX.Debug (
module Data.Array.Accelerate.Debug,
module Data.Array.Accelerate.LLVM.PTX.Debug,
) where
import Data.Array.Accelerate.Debug hiding ( timed, elapsed )
import Foreign.CUDA.Driver.Stream ( Stream )
import qualified Foreign.CUDA.Driver.Event as Event
import Control.Concurrent
import Data.Time.Clock
import System.CPUTime
import Text.Printf
import GHC.Float
-- | Execute an action and time the results. The second argument specifies how
-- to format the output string given elapsed GPU and CPU time respectively
--
timed
:: Flag
-> (Double -> Double -> Double -> String)
-> Maybe Stream
-> IO ()
-> IO ()
{-# INLINE timed #-}
timed f msg =
monitorProcTime (getFlag f) (\t1 t2 t3 -> traceIO f (msg t1 t2 t3))
monitorProcTime
:: IO Bool
-> (Double -> Double -> Double -> IO ())
-> Maybe Stream
-> IO ()
-> IO ()
{-# INLINE monitorProcTime #-}
monitorProcTime enabled display stream action = do
yes <- if debuggingIsEnabled then enabled else return False
if yes
then do
gpuBegin <- Event.create []
gpuEnd <- Event.create []
wallBegin <- getCurrentTime
cpuBegin <- getCPUTime
Event.record gpuBegin stream
action
Event.record gpuEnd stream
cpuEnd <- getCPUTime
wallEnd <- getCurrentTime
-- Wait for the GPU to finish executing then display the timing execution
-- message. Do this in a separate thread so that the remaining kernels can
-- be queued asynchronously.
--
_ <- forkIO $ do
Event.block gpuEnd
diff <- Event.elapsedTime gpuBegin gpuEnd
let gpuTime = float2Double $ diff * 1E-3 -- milliseconds
cpuTime = fromIntegral (cpuEnd - cpuBegin) * 1E-12 -- picoseconds
wallTime = realToFrac (diffUTCTime wallEnd wallBegin)
Event.destroy gpuBegin
Event.destroy gpuEnd
--
display wallTime cpuTime gpuTime
--
return ()
else
action
{-# INLINE elapsed #-}
elapsed :: Double -> Double -> Double -> String
elapsed wallTime cpuTime gpuTime =
printf "%s (wall), %s (cpu), %s (gpu)"
(showFFloatSIBase (Just 3) 1000 wallTime "s")
(showFFloatSIBase (Just 3) 1000 cpuTime "s")
(showFFloatSIBase (Just 3) 1000 gpuTime "s")