From 2d40a43d0cdbc8ce36e3dcea063d39211b9a8e50 Mon Sep 17 00:00:00 2001 From: Federico Mastellone Date: Tue, 16 Apr 2024 13:29:16 +0000 Subject: [PATCH] Better or worse? --- bench/stdout-tools/app/tq.hs | 52 ++++++------ .../stdout-tools/src/Cardano/Tracer/Trace.hs | 10 ++- bench/stdout-tools/src/Data/Log.hs | 83 ++++++++++--------- 3 files changed, 77 insertions(+), 68 deletions(-) diff --git a/bench/stdout-tools/app/tq.hs b/bench/stdout-tools/app/tq.hs index 1993c37294d..0bb462ad88b 100644 --- a/bench/stdout-tools/app/tq.hs +++ b/bench/stdout-tools/app/tq.hs @@ -15,21 +15,21 @@ Count lines -rw-r--r-- 1 fmaste users 6.4G Apr 10 19:28 bench/stdout-tools/5nodes.stdout > time cat bench/stdout-tools/5nodes.stdout | wc -l -real 0m1.946s -user 0m0.105s -sys 0m2.728s +real 0m2.039s +user 0m0.091s +sys 0m2.842s > time jq --raw-input . bench/stdout-tools/5nodes.stdout | wc -l 25581640 -real 1m30.707s -user 1m28.129s -sys 0m8.124s +real 1m30.745s +user 1m28.116s +sys 0m8.150s > time cabal run tq -- --file big-node:bench/stdout-tools/5nodes.stdout --reducer count-lines 25581640 -real 0m11.630s -user 0m10.836s -sys 0m0.826s +real 0m15.387s +user 0m13.194s +sys 0m2.173s Count all the ns="Forge.Loop.StartLeadershipCheckPlus" -------------------------------------------------------------------------------- @@ -37,35 +37,35 @@ Count all the ns="Forge.Loop.StartLeadershipCheckPlus" -- Using jq for everything: > time jq --raw-input --compact-output 'try fromjson | if (type == "object" and has("at")) then select(.ns=="Forge.Loop.StartLeadershipCheckPlus") else empty end' bench/stdout-tools/5nodes.stdout | wc -l 264150 -real 1m30.615s -user 1m29.159s -sys 0m1.502s +real 1m28.688s +user 1m27.187s +sys 0m1.555s -- Using jq but first filter non JSON lines with grep: > time grep -E "^{.*" bench/stdout-tools/5nodes.stdout | jq --compact-output 'select(.ns == "Forge.Loop.StartLeadershipCheckPlus")' | wc -l 264150 -real 1m9.828s -user 1m12.247s -sys 0m5.901s +real 1m10.258s +user 1m12.628s +sys 0m5.999s $ time cabal run tq -- --file big-node:bench/stdout-tools/5nodes.stdout --reducer count-FLSLCP 264150 -real 0m26.420s -user 0m25.654s -sys 0m0.837s +real 0m30.316s +user 0m28.140s +sys 0m2.167s Heap changes -------------------------------------------------------------------------------- -> grep -E "^{.*" bench/stdout-tools/5stdout | jq 'select(.ns == "Resources") | .data.Heap' | uniq -real 1m5.810s -user 1m7.716s -sys 0m3.674s +> time grep -E "^{.*" bench/stdout-tools/5nodes.stdout | jq 'select(.ns == "Resources") | .data.Heap' | uniq +real 1m8.960s +user 1m11.298s +sys 0m5.972s -> time cabal run tq -- --file 5stdout:bench/stdout-tools/5stdout --reducer heap-changes -real 0m54.360s -user 0m53.606s -sys 0m0.873s +> time cabal run tq -- --file big-node:bench/stdout-tools/5nodes.stdout --reducer heap-changes +real 1m1.578s +user 0m59.291s +sys 0m2.264s Heap changes (52 nodes) -------------------------------------------------------------------------------- diff --git a/bench/stdout-tools/src/Cardano/Tracer/Trace.hs b/bench/stdout-tools/src/Cardano/Tracer/Trace.hs index 3b224eddd4e..65d08324205 100644 --- a/bench/stdout-tools/src/Cardano/Tracer/Trace.hs +++ b/bench/stdout-tools/src/Cardano/Tracer/Trace.hs @@ -70,10 +70,12 @@ fromJson text = Text.drop 8 text'' -- Consume all the text until the next '"'. (nsText, text'''') = {-# SCC "fromJson_break_ns" #-} - Text.break (== '"') (Text.drop 8 text''') - -- Drop closing '",' of 'ns' and leave the unconsumed Text, the - -- `Remainder`, as a new JSON object. - in Right $ Trace utcTime nsText ("{" <> Text.drop 2 text'''') + Text.break (== '"') text''' + -- Drop closing '",' of 'ns' and leave the unconsumed Text, the + -- `Remainder`, as a new JSON object. + remainderText = {-# SCC "fromJson_remainder" #-} + "{" <> Text.drop 2 text'''' + in Right $ Trace utcTime nsText remainderText -- Probably not a Trace JSON object. (Left _) -> Left text -- Assumption failed. diff --git a/bench/stdout-tools/src/Data/Log.hs b/bench/stdout-tools/src/Data/Log.hs index 08d35d34103..d84cb500dc8 100644 --- a/bench/stdout-tools/src/Data/Log.hs +++ b/bench/stdout-tools/src/Data/Log.hs @@ -66,10 +66,10 @@ lineFoldl'' handle decoder f initialAcc = do data Decoder = Decoder -- TODO: Strict or not? Let the function caller decide? -- Function `nextLine` is all about pattern matching these things. - { _unfinishedLine :: !Text.Text - , _textLeft :: !Text.Text - , _byteStringLeft :: !BS.ByteString - , _textDecoding :: !(BS.ByteString -> TextE.Decoding) + { _unfinishedLine :: Text.Text + , _textLeft :: Text.Text + , _byteStringLeft :: BS.ByteString + , _textDecoding :: (BS.ByteString -> TextE.Decoding) } -- Use empty `Text`s and create an empty/initial `Data.Text.Encoding.Some` @@ -86,12 +86,13 @@ nextLine handle (Decoder unfinishedLine "" "" continue) = {-# SCC "nextLine_1" # -- Use `Data.Text.IO.hGetChunk` ? It uses an unknown buffer size! bs <- {-# SCC "nextLine_1_hGet" #-} BS.hGetNonBlocking handle (hGetBufferSizeMB * 1024 * 1024) - -- Also use BS.length ? To end if lower than requested ? But it's O(n)! - if bs == BS.empty - -- Last (or maybe first of an empty file) line and no more input available! - then return (unfinishedLine, Nothing) + -- Put the most common case first. + -- Use `BS.null` as much as possible because it's O(1). + if not $ BS.null bs -- Call `newLine` again to handle the newly fetched ByteString. - else nextLine handle $ Decoder unfinishedLine "" bs continue + then nextLine handle $ Decoder unfinishedLine "" bs continue + -- Last (or maybe first of an empty file) line and no more input available! + else return (unfinishedLine, Nothing) -------------------------------------------------------------------------------- -- UTF-8 decode: Maybe a partial line, no decoded Text and only some ByteString. -------------------------------------------------------------------------------- @@ -109,32 +110,38 @@ nextLine handle (Decoder unfinishedLine text bs !continue) = {-# SCC "nextLine_3 --print ((5::Int, unfinishedLine, text, bs)::(Int,Text.Text,Text.Text,BS.ByteString)) let (consumed, remainder) = {-# SCC "nextLine_3_break" #-} Text.break (== '\n') text - case remainder of - -- No newline character found! - -- break (== 1) [] -> ( [], [] ) - -- break (== 1) [0,0,0] -> ( [0,0,0], [] ) - "" -> {-# SCC "nextLine_3_newline_no" #-} do - nextLine handle $ Decoder (unfinishedLine <> text) "" bs continue - -- One newline character was found! - -- break (== 1) [1] -> ( [] , [1] ) - -- break (== 1) [1,0,0] -> ( [] , [1,0,0] ) - -- break (== 1) [0,0,1] -> ( [0,0] , [1] ) - -- break (== 1) [0,1,0] -> ( [0] , [1,0] ) - _ -> {-# SCC "nextLine_3_newline_yes" #-} do - -- Remove the `\n`. - -- If `reminder` is not `empty`, a `\n` was found and it's the first char. - let text' = {-# SCC "nextLine_3_drop" #-} Text.drop 1 remainder - return $ case consumed of - -- Next character was a newline. Return the line buffer with no append. - -- break (== 1) [1] -> ( [] , [1] ) - -- break (== 1) [1,0,0] -> ( [] , [1,0,0] ) - "" -> {-# SCC "nextLine_3_append_no" #-} - ( unfinishedLine - , Just $ Decoder "" text' bs continue - ) - -- break (== 1) [0,0,1] -> ( [0,0] , [1] ) - -- break (== 1) [0,1,0] -> ( [0] , [1,0] ) - _ -> {-# SCC "nextLine_3_append_yes" #-} - ( unfinishedLine <> consumed - , Just $ Decoder "" text' bs continue - ) + -- Put the most common case first. + -- Use `Text.null` as much as possible because it's O(1). + if not $ Text.null remainder + -- One newline character was found! + -- break (== 1) [1] -> ( [] , [1] ) + -- break (== 1) [1,0,0] -> ( [] , [1,0,0] ) + -- break (== 1) [0,0,1] -> ( [0,0] , [1] ) + -- break (== 1) [0,1,0] -> ( [0] , [1,0] ) + then {-# SCC "nextLine_3_newline_yes" #-} + -- Remove the `\n`. + -- If `reminder` is not `empty`, a `\n` was found. + let text' = {-# SCC "nextLine_3_drop" #-} Text.drop 1 remainder + -- Put the most common case first. + -- Use `Text.null` as much as possible because it's O(1). + in if not $ Text.null consumed + -- break (== 1) [0,0,1] -> ( [0,0] , [1] ) + -- break (== 1) [0,1,0] -> ( [0] , [1,0] ) + then return $ + {-# SCC "nextLine_3_append_yes" #-} + ( unfinishedLine <> consumed + , Just $ Decoder "" text' bs continue + ) + -- Next character was a newline. Return the line buffer with no append. + -- break (== 1) [1] -> ( [] , [1] ) + -- break (== 1) [1,0,0] -> ( [] , [1,0,0] ) + else return $ + {-# SCC "nextLine_3_append_no" #-} + ( unfinishedLine + , Just $ Decoder "" text' bs continue + ) + -- No newline character found! + -- break (== 1) [] -> ( [], [] ) + -- break (== 1) [0,0,0] -> ( [0,0,0], [] ) + else {-# SCC "nextLine_3_newline_no" #-} do + nextLine handle $ Decoder (unfinishedLine <> text) "" bs continue