diff --git a/bench/process/collect.jq b/bench/process/collect.jq new file mode 100644 index 00000000000..467b08eb7c0 --- /dev/null +++ b/bench/process/collect.jq @@ -0,0 +1,12 @@ +def standard_run_desc(filename; sha256; format; ctime; mtime; branch; commit; config_name; iter; data): + { filename: filename + , sha256: sha256 + , format: format + , ctime: ctime + , mtime: mtime + , branch: branch + , commit: commit + , config_name: config_name + , iter: iter + , data: data + }; diff --git a/bench/process/membenches_v1.jq b/bench/process/membenches_v1.jq new file mode 100644 index 00000000000..fa20ee8ea58 --- /dev/null +++ b/bench/process/membenches_v1.jq @@ -0,0 +1,39 @@ +def format_specs: + +{ config_specs: + [{ key: "config", header: "Config name" + , path: ["config_name"]} + ,{ key: "flags", header: "RTS Flags" + , path: ["data", "flags"] + } + ,{ key: "failed", header: "Failed" + , path: ["data", "failed"] + # , post: not + }] +, result_specs: + [{ key: "RssAvg", header: "Avg RSS, MB" + , path: ["RSS", "avg"], round: true + } + ,{ key: "RssMax", header: "Max RSS, MB" + , path: ["RSS", "max"], round: true + } + ,{ key: "HeapAvg", header: "Avg heap, MB" + , path: ["Heap", "avg"], round: true + } + ,{ key: "HeapMax", header: "Max heap, MB" + , path: ["Heap", "max"], round: true + } + ,{ key: "WallSec", header: "Wall, s" + , path: ["totaltime"], round: true + } + ,{ key: "CpuMax", header: "OS CPU, s" + , path: ["CentiCpuMax"], scale: 100, round: true + } + ,{ key: "MutMax", header: "Mutator, s" + , path: ["CentiMutMax"], scale: 100, round: true + } + ,{ key: "GCSec", header: "GC time, s" + , path: ["SecGC"], round: true + } + ] +}; diff --git a/bench/process/process.jq b/bench/process/process.jq new file mode 100644 index 00000000000..c19037b5583 --- /dev/null +++ b/bench/process/process.jq @@ -0,0 +1,54 @@ +## +## Aggregation +## +def mean: if length == 0 + then 0 + else reduce .[] as $n (0; . + $n) / length end; +def pow2: . * .; +def variance: . | mean as $mean | map_values(. - $mean | pow2) | mean; +def stddev: . | (variance ?) | sqrt; + +def samples_to_variable (n): + stddev as $stddev + | (add / n) as $mean + | { "mean": $mean + , "stddev": $stddev + , "relstddev": (if $stddev == null then 0 else $stddev / $mean end) + , "raw": . + }; + +def varspec_to_variable (objects; nobjects): + .key as $key + | .path as $path + | objects + | { "\($key)": + map (.data + | getpath($path) + as $val + | if $val != null then $val + else error("Path \($path) unreachable among top level keys: \(keys)") + end + ) + | samples_to_variable(nobjects) + }; + +def description_from_headliner(x; rest): + map ( . as $spec + | (x | getpath($spec.path)) as $head + | (rest | map (getpath($spec.path) == $head) | all) as $coherence + | { "\($spec.key)": + (if $coherence == true + then $head + else error("Incoherence on config key: \($spec.key)") + end) }) + | add; + +def aggregate_config_runs_variables (config_specs; result_specs): + . as $runs + | .[0] as $headliner + | length as $nruns + | result_specs + | map(varspec_to_variable($runs; $nruns)) + | add + | (config_specs | description_from_headliner($headliner; $runs[1:])) + + .; diff --git a/bench/process/process.sh b/bench/process/process.sh new file mode 100755 index 00000000000..1f54634825b --- /dev/null +++ b/bench/process/process.sh @@ -0,0 +1,121 @@ +#!/bin/sh +# shellcheck disable=SC1090 +set -eu + +fail() { + echo -e "ERROR: $*" >&2 + exit 1 +} + +default_op='report' +default_format='csv' + +format=$default_format +header_footer='false' +no_progress= + +while test $# -ge 1 +do case "$1" in + --github ) format='github';; + --csv ) format='csv'; header_footer='false';; + --cls ) echo -en "\ec" >&2;; + --no-progress ) no_progress='true';; + --trace ) set -x;; + * ) break;; esac; shift; done + +op=${1:-$default_op}; shift || true + +main() { + PROCESS_ORIG_PWD=$(pwd) + pushd "$(dirname $0)" >/dev/null || exit 1 + case "${op}" in + collect ) op_collect "$@";; + process ) op_process;; + render ) op_render;; + + report ) op_collect "$@" | op_process | op_render;; + + call ) eval "$@";; + * ) echo "ERROR: operation must be one of: collect process render report" >&2; exit 1;; esac +} + +hardcoded_branch='membench' +hardcoded_commit='a7ee17d1af44b571c6e476916bd24ed65db97e15' + +function op_collect() { + local desc=${1?-USAGE: $0 collect DESCRIPTION [FORMAT] [DIR]} + local format=${2:-membenches_v1} + local literal_dir=${3:-.} + + local dir=$(pushd "$PROCESS_ORIG_PWD" >/dev/null; realpath "$literal_dir") + test -d "${dir}" -a -n "$(ls "${dir}"/*/*.json)" || + fail "${literal_dir} (realpath $dir) must be a writable directory with subdirectories containing JSON files with ${format} output schema" + + test -n "$no_progress" || echo -ne "Collecting runs in $dir: " >&2 + local args_global=( + --arg desc "$desc" + --arg format "$format" + --arg now "$(date --utc --iso-8601=seconds)" + ) +case $format in + membenches_v1 ) + { + for f in $dir/*/refined.json + do local fpad="$f " + test -n "$no_progress" || echo -n "$fpad" >&2 + local args_run=( + --arg filename "$f" + --arg format "$format" + --arg sha256 "$(sha256sum $f | cut -d' ' -f1)" + --arg ctime "$(stat --format=%w $f)" + --arg mtime "$(stat --format=%y $f)" + --arg config "$(echo $f | xargs dirname | xargs basename | cut -d- -f1)" + --arg iter "$(echo $f | xargs dirname | xargs basename | cut -d- -f2)" + --arg hardcoded_branch $hardcoded_branch + --arg hardcoded_commit $hardcoded_commit + --slurpfile data "$f" + ) + jq 'include "collect"; + + standard_run_desc($filename; $sha256; $format; $ctime; $mtime; $hardcoded_branch; $hardcoded_commit; $config; $iter; $data[0]) + ' "$f" "${args_global[@]}" "${args_run[@]}" + test -n "$no_progress" || printf "${fpad//?/\\b}" >&2 + done + test -n "$no_progress" || echo >&2; };; + * ) + fail "unknown result format: $format" +esac | + jq 'include "'"$format"'"; + + { description: $desc + , format: $format + , ctime: $now + , runs: . + } + + format_specs + ' --slurp "${args_global[@]}" +} + +function op_process() { + jq 'include "process"; + + . as $batch + | $batch.runs + | group_by(.config_name) + | map (aggregate_config_runs_variables ($batch.config_specs; $batch.result_specs)) + | $batch + + { configs: . } + ' +} + +function op_render() { + jq 'include "render"; + + render('"$header_footer"') + ' --raw-output +} + +### +### Main +### +main "$@" diff --git a/bench/process/render.jq b/bench/process/render.jq new file mode 100644 index 00000000000..98cfdc7582e --- /dev/null +++ b/bench/process/render.jq @@ -0,0 +1,90 @@ +## +## Presentation +## + +def decimal_pt: (. * 10 | floor) / 10; +def decimal_pt2: (. * 100 | floor) / 100; +def float_n(width): "\(.)" | .[:width + (if .[0:1] == "-" then 1 else 0 end)]; +def downscale(factor): . / factor; + +## Expect name of a simple numeric field. +def field(fname; f): + .[fname] as $val + | "\($val | f)"; + +## Expect name of a rich variable. +def var(fname; f): + .[fname] as $val + | "\($val.mean | f) | \("\($val.relstddev)" | .[:4])"; + +def render_config (format; cf; res): + . as $x + | (if format != "csv" then [null] else [] end + + + (cf + | map($x[.key])) + + + (res + | map(. as $spec + | $x[$spec.key] as $val + | [(($val.mean / ($val.scale // 1)) + | if $spec.round then ceil else . end) + , ($val.relstddev | tostring | .[:5])]) + | add)) + as $columns + + | ($columns | join(" | ")); + +def render_table_head (format; cf; res): + . + | (if format != "csv" then [null] else [] end + + + (cf + | map(.header)) + + + (res + | map([.header, "σ/μ"]) + | add)) + as $columns + + | if format == "github" + then [([null] + ($columns | map("--")) + [null]) + | join("|")] else [] end + + + [ $columns | join(" | ")]; + +def render_table: + .format as $format + | .config_specs as $config_specs + | .result_specs as $result_specs + | render_table_head (.format; .config_specs; .result_specs) + + (.configs | map (render_config ($format; $config_specs; $result_specs))); + +def add_header_footer(commits; run_counts; slot_counts): + . + | ([ "Parameters:\n" + , if run_counts | all(. == run_counts[0]) + then " Every value is mean of \(run_counts[0]) runs,\n" + else " Every value is mean of varying amount of runs (\(run_counts)).\n" end + , if slot_counts | all(. == slot_counts[0]) + then " Each run was syncing \(slot_counts[0]) slots, or \(slot_counts[0] / 21600 | floor) epochs over loopback, from a quiescent server.\n" + else " Runs were for varying amount of slots (\(slot_counts)).\n" end + , "\nLegend:\n" + , " wall = total_wall_seconds, total cpu = total_cpu_seconds\n" + , " total alloc = allocated_bytes, copied = copied_bytes, max live = max_live_bytes\n" + , " See https://github.com/ghc/ghc/blob/master/includes/RtsAPI.h for details.\n" + , " Each value is followed by σ/μ, i.e. relative stddev (or CoV).\n" + , " Δ% is change from baseline.\n" + , "\n\n"]) + .; + #| . + ["\n\n\(commits | map(" - \(.branch) / \(.commit) = https://github.com/'${github_user}'/cardano-node/tree/\(.commit)\n") | add)"]; + +def render(header_footer): + .commits as $commits +| .run_counts as $run_counts +| .slot_counts as $slot_counts +| render_table +| if header_footer == true + then add_header_footer($commits; $run_counts; $slot_counts) + else . end +| join("\n"); +