Skip to content

Commit

Permalink
Better or worse?
Browse files Browse the repository at this point in the history
  • Loading branch information
fmaste committed Apr 17, 2024
1 parent 6911a82 commit 2d40a43
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 68 deletions.
52 changes: 26 additions & 26 deletions bench/stdout-tools/app/tq.hs
Expand Up @@ -15,57 +15,57 @@ Count lines
-rw-r--r-- 1 fmaste users 6.4G Apr 10 19:28 bench/stdout-tools/5nodes.stdout
> time cat bench/stdout-tools/5nodes.stdout | wc -l
real 0m1.946s
user 0m0.105s
sys 0m2.728s
real 0m2.039s
user 0m0.091s
sys 0m2.842s
> time jq --raw-input . bench/stdout-tools/5nodes.stdout | wc -l
25581640
real 1m30.707s
user 1m28.129s
sys 0m8.124s
real 1m30.745s
user 1m28.116s
sys 0m8.150s
> time cabal run tq -- --file big-node:bench/stdout-tools/5nodes.stdout --reducer count-lines
25581640
real 0m11.630s
user 0m10.836s
sys 0m0.826s
real 0m15.387s
user 0m13.194s
sys 0m2.173s
Count all the ns="Forge.Loop.StartLeadershipCheckPlus"
--------------------------------------------------------------------------------
-- Using jq for everything:
> time jq --raw-input --compact-output 'try fromjson | if (type == "object" and has("at")) then select(.ns=="Forge.Loop.StartLeadershipCheckPlus") else empty end' bench/stdout-tools/5nodes.stdout | wc -l
264150
real 1m30.615s
user 1m29.159s
sys 0m1.502s
real 1m28.688s
user 1m27.187s
sys 0m1.555s
-- Using jq but first filter non JSON lines with grep:
> time grep -E "^{.*" bench/stdout-tools/5nodes.stdout | jq --compact-output 'select(.ns == "Forge.Loop.StartLeadershipCheckPlus")' | wc -l
264150
real 1m9.828s
user 1m12.247s
sys 0m5.901s
real 1m10.258s
user 1m12.628s
sys 0m5.999s
$ time cabal run tq -- --file big-node:bench/stdout-tools/5nodes.stdout --reducer count-FLSLCP
264150
real 0m26.420s
user 0m25.654s
sys 0m0.837s
real 0m30.316s
user 0m28.140s
sys 0m2.167s
Heap changes
--------------------------------------------------------------------------------
> grep -E "^{.*" bench/stdout-tools/5stdout | jq 'select(.ns == "Resources") | .data.Heap' | uniq
real 1m5.810s
user 1m7.716s
sys 0m3.674s
> time grep -E "^{.*" bench/stdout-tools/5nodes.stdout | jq 'select(.ns == "Resources") | .data.Heap' | uniq
real 1m8.960s
user 1m11.298s
sys 0m5.972s
> time cabal run tq -- --file 5stdout:bench/stdout-tools/5stdout --reducer heap-changes
real 0m54.360s
user 0m53.606s
sys 0m0.873s
> time cabal run tq -- --file big-node:bench/stdout-tools/5nodes.stdout --reducer heap-changes
real 1m1.578s
user 0m59.291s
sys 0m2.264s
Heap changes (52 nodes)
--------------------------------------------------------------------------------
Expand Down
10 changes: 6 additions & 4 deletions bench/stdout-tools/src/Cardano/Tracer/Trace.hs
Expand Up @@ -70,10 +70,12 @@ fromJson text =
Text.drop 8 text''
-- Consume all the text until the next '"'.
(nsText, text'''') = {-# SCC "fromJson_break_ns" #-}
Text.break (== '"') (Text.drop 8 text''')
-- Drop closing '",' of 'ns' and leave the unconsumed Text, the
-- `Remainder`, as a new JSON object.
in Right $ Trace utcTime nsText ("{" <> Text.drop 2 text'''')
Text.break (== '"') text'''
-- Drop closing '",' of 'ns' and leave the unconsumed Text, the
-- `Remainder`, as a new JSON object.
remainderText = {-# SCC "fromJson_remainder" #-}
"{" <> Text.drop 2 text''''
in Right $ Trace utcTime nsText remainderText
-- Probably not a Trace JSON object.
(Left _) -> Left text
-- Assumption failed.
Expand Down
83 changes: 45 additions & 38 deletions bench/stdout-tools/src/Data/Log.hs
Expand Up @@ -66,10 +66,10 @@ lineFoldl'' handle decoder f initialAcc = do
data Decoder = Decoder
-- TODO: Strict or not? Let the function caller decide?
-- Function `nextLine` is all about pattern matching these things.
{ _unfinishedLine :: !Text.Text
, _textLeft :: !Text.Text
, _byteStringLeft :: !BS.ByteString
, _textDecoding :: !(BS.ByteString -> TextE.Decoding)
{ _unfinishedLine :: Text.Text
, _textLeft :: Text.Text
, _byteStringLeft :: BS.ByteString
, _textDecoding :: (BS.ByteString -> TextE.Decoding)
}

-- Use empty `Text`s and create an empty/initial `Data.Text.Encoding.Some`
Expand All @@ -86,12 +86,13 @@ nextLine handle (Decoder unfinishedLine "" "" continue) = {-# SCC "nextLine_1" #
-- Use `Data.Text.IO.hGetChunk` ? It uses an unknown buffer size!
bs <- {-# SCC "nextLine_1_hGet" #-}
BS.hGetNonBlocking handle (hGetBufferSizeMB * 1024 * 1024)
-- Also use BS.length ? To end if lower than requested ? But it's O(n)!
if bs == BS.empty
-- Last (or maybe first of an empty file) line and no more input available!
then return (unfinishedLine, Nothing)
-- Put the most common case first.
-- Use `BS.null` as much as possible because it's O(1).
if not $ BS.null bs
-- Call `newLine` again to handle the newly fetched ByteString.
else nextLine handle $ Decoder unfinishedLine "" bs continue
then nextLine handle $ Decoder unfinishedLine "" bs continue
-- Last (or maybe first of an empty file) line and no more input available!
else return (unfinishedLine, Nothing)
--------------------------------------------------------------------------------
-- UTF-8 decode: Maybe a partial line, no decoded Text and only some ByteString.
--------------------------------------------------------------------------------
Expand All @@ -109,32 +110,38 @@ nextLine handle (Decoder unfinishedLine text bs !continue) = {-# SCC "nextLine_3
--print ((5::Int, unfinishedLine, text, bs)::(Int,Text.Text,Text.Text,BS.ByteString))
let (consumed, remainder) = {-# SCC "nextLine_3_break" #-}
Text.break (== '\n') text
case remainder of
-- No newline character found!
-- break (== 1) [] -> ( [], [] )
-- break (== 1) [0,0,0] -> ( [0,0,0], [] )
"" -> {-# SCC "nextLine_3_newline_no" #-} do
nextLine handle $ Decoder (unfinishedLine <> text) "" bs continue
-- One newline character was found!
-- break (== 1) [1] -> ( [] , [1] )
-- break (== 1) [1,0,0] -> ( [] , [1,0,0] )
-- break (== 1) [0,0,1] -> ( [0,0] , [1] )
-- break (== 1) [0,1,0] -> ( [0] , [1,0] )
_ -> {-# SCC "nextLine_3_newline_yes" #-} do
-- Remove the `\n`.
-- If `reminder` is not `empty`, a `\n` was found and it's the first char.
let text' = {-# SCC "nextLine_3_drop" #-} Text.drop 1 remainder
return $ case consumed of
-- Next character was a newline. Return the line buffer with no append.
-- break (== 1) [1] -> ( [] , [1] )
-- break (== 1) [1,0,0] -> ( [] , [1,0,0] )
"" -> {-# SCC "nextLine_3_append_no" #-}
( unfinishedLine
, Just $ Decoder "" text' bs continue
)
-- break (== 1) [0,0,1] -> ( [0,0] , [1] )
-- break (== 1) [0,1,0] -> ( [0] , [1,0] )
_ -> {-# SCC "nextLine_3_append_yes" #-}
( unfinishedLine <> consumed
, Just $ Decoder "" text' bs continue
)
-- Put the most common case first.
-- Use `Text.null` as much as possible because it's O(1).
if not $ Text.null remainder
-- One newline character was found!
-- break (== 1) [1] -> ( [] , [1] )
-- break (== 1) [1,0,0] -> ( [] , [1,0,0] )
-- break (== 1) [0,0,1] -> ( [0,0] , [1] )
-- break (== 1) [0,1,0] -> ( [0] , [1,0] )
then {-# SCC "nextLine_3_newline_yes" #-}
-- Remove the `\n`.
-- If `reminder` is not `empty`, a `\n` was found.
let text' = {-# SCC "nextLine_3_drop" #-} Text.drop 1 remainder
-- Put the most common case first.
-- Use `Text.null` as much as possible because it's O(1).
in if not $ Text.null consumed
-- break (== 1) [0,0,1] -> ( [0,0] , [1] )
-- break (== 1) [0,1,0] -> ( [0] , [1,0] )
then return $
{-# SCC "nextLine_3_append_yes" #-}
( unfinishedLine <> consumed
, Just $ Decoder "" text' bs continue
)
-- Next character was a newline. Return the line buffer with no append.
-- break (== 1) [1] -> ( [] , [1] )
-- break (== 1) [1,0,0] -> ( [] , [1,0,0] )
else return $
{-# SCC "nextLine_3_append_no" #-}
( unfinishedLine
, Just $ Decoder "" text' bs continue
)
-- No newline character found!
-- break (== 1) [] -> ( [], [] )
-- break (== 1) [0,0,0] -> ( [0,0,0], [] )
else {-# SCC "nextLine_3_newline_no" #-} do
nextLine handle $ Decoder (unfinishedLine <> text) "" bs continue

0 comments on commit 2d40a43

Please sign in to comment.