diff --git a/README.md b/README.md index 024c6a032..0447364ac 100644 --- a/README.md +++ b/README.md @@ -52,8 +52,8 @@ site (hosted at speed.yjit.org) with a simple command: This will move files from `./data/` into a `build` directory, generate all the html files, and start a web server where you can view the site at `localhost:8000`. -Some of the reports are built using `lib/yjit-metrics` and have templates in -`lib/yjit-metrics/report_templates`. +Some of the reports are built using `lib/yjit_metrics` and have templates in +`lib/yjit_metrics/report_templates`. The rest of the files that build the site are found beneath `site`. There are `erb` files to generate additional pages and the script that does all the file rendering in `site/_framework/`. @@ -87,7 +87,7 @@ Then a quick "git diff" in the pages directory can show you what, if anything, c ### Output Format Changes to the JSON format may require a bump to the `version` entry (`lib/yjit-metrics.rb`) -and translation logic in the code that processes the data (`lib/yjit-metrics/bench-results.rb`). +and translation logic in the code that processes the data (`lib/yjit_metrics/result_set.rb`). ### Tests diff --git a/basic_benchmark.rb b/basic_benchmark.rb index 977f6f2eb..4e395e146 100755 --- a/basic_benchmark.rb +++ b/basic_benchmark.rb @@ -20,7 +20,7 @@ require "optparse" require "fileutils" require "etc" -require_relative "lib/yjit-metrics" +require_relative "lib/yjit_metrics" # Default settings for benchmark sampling DEFAULT_WARMUP_ITRS = 15 # Number of un-reported warmup iterations to run before "counting" benchmark runs diff --git a/basic_report.rb b/basic_report.rb index b39577daf..4a7b7d05f 100755 --- a/basic_report.rb +++ b/basic_report.rb @@ -2,7 +2,7 @@ require "json" require "optparse" -require_relative "lib/yjit-metrics" +require_relative "lib/yjit_metrics" RESULT_SET = YJITMetrics::ResultSet.new diff --git a/continuous_reporting/benchmark_and_update.rb b/continuous_reporting/benchmark_and_update.rb index 422c1939a..42bd4b562 100755 --- a/continuous_reporting/benchmark_and_update.rb +++ b/continuous_reporting/benchmark_and_update.rb @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -require_relative "../lib/yjit-metrics" +require_relative "../lib/yjit_metrics" require 'fileutils' require 'net/http' diff --git a/continuous_reporting/create_json_params_file.rb b/continuous_reporting/create_json_params_file.rb index a7bad1d55..b29668483 100644 --- a/continuous_reporting/create_json_params_file.rb +++ b/continuous_reporting/create_json_params_file.rb @@ -3,7 +3,7 @@ require "optparse" require "json" -require_relative "../lib/yjit-metrics" +require_relative "../lib/yjit_metrics" # A particular run through the benchmarking system has a number of important parameters. # Most, though not all, are captured in the JSON file produced here. In general, diff --git a/continuous_reporting/file_benchmark_data_into_raw.rb b/continuous_reporting/file_benchmark_data_into_raw.rb index e53e8d4dc..978a9f271 100755 --- a/continuous_reporting/file_benchmark_data_into_raw.rb +++ b/continuous_reporting/file_benchmark_data_into_raw.rb @@ -5,7 +5,7 @@ require "fileutils" require "optparse" -require_relative "../lib/yjit-metrics" +require_relative "../lib/yjit_metrics" YJIT_RAW_DATA_REPO = File.join(YJITMetrics::ContinuousReporting::RAW_BENCHMARK_ROOT, "raw_benchmark_data") diff --git a/continuous_reporting/generate_and_upload_reports.rb b/continuous_reporting/generate_and_upload_reports.rb index 5cfa7408b..20fd72b22 100755 --- a/continuous_reporting/generate_and_upload_reports.rb +++ b/continuous_reporting/generate_and_upload_reports.rb @@ -6,7 +6,7 @@ require "optparse" require "rbconfig" -require_relative "../lib/yjit-metrics" +require_relative "../lib/yjit_metrics" ### Required repos, etc, to build from diff --git a/lib/yjit-metrics.rb b/lib/yjit-metrics.rb deleted file mode 100644 index 57be16432..000000000 --- a/lib/yjit-metrics.rb +++ /dev/null @@ -1,535 +0,0 @@ -# General-purpose benchmark management routines - -require 'benchmark' -require 'fileutils' -require 'tempfile' -require 'json' -require 'csv' -require 'erb' - -require_relative "./yjit-metrics/bench-results" -require_relative "./yjit-metrics/repo-management" -require_relative "./yjit-metrics/cli-interface" -require_relative "./yjit-metrics/continuous_reporting" - -# Require all source files in yjit-metrics/report_types/*.rb -Dir.glob("yjit-metrics/report_types/*.rb", base: __dir__).each do |report_type_file| - require_relative report_type_file -end - -# Require all source files in yjit-metrics/timeline_report_types/*.rb -Dir.glob("yjit-metrics/timeline_report_types/*.rb", base: __dir__).each do |report_type_file| - require_relative report_type_file -end - -module YJITMetrics - include RepoManagement - - extend self # Make methods callable as YJITMetrics.method_name - - HARNESS_PATH = File.expand_path(__dir__ + "/../metrics-harness") - - PLATFORMS = ["x86_64", "aarch64"] - - uname_platform = `uname -m`.chomp.downcase.sub(/^arm(\d+)$/, 'aarch\1') - PLATFORM = PLATFORMS.detect { |platform| uname_platform == platform } - raise("yjit-metrics only supports running on x86_64 and aarch64!") if !PLATFORM - - # This structure is returned by the benchmarking harness from a run. - JSON_RUN_FIELDS = %i(times warmups yjit_stats peak_mem_bytes failures_before_success benchmark_metadata ruby_metadata) - RunData = Struct.new(*JSON_RUN_FIELDS) do - def exit_status - 0 - end - - def success? - true - end - - def times_ms - self.times.map { |v| 1000.0 * v } - end - - def warmups_ms - self.warmups.map { |v| 1000.0 * v } - end - - def to_json - out = { "version": 2 } # Current version of the single-run data file format - JSON_RUN_FIELDS.each { |f| out[f.to_s] = self.send(f) } - out - end - - def self.from_json(json) - unless json["version"] == 2 - raise "This looks like out-of-date single-run data!" - end - - RunData.new(*JSON_RUN_FIELDS.map { |f| json[f.to_s] }) - end - end - - ErrorData = Struct.new(:exit_status, :error, :summary, keyword_init: true) do - def success? - false - end - end - - def chdir(dir, &block) - puts "### cd #{dir}" - Dir.chdir(dir, &block).tap do - puts "### cd #{Dir.pwd}" if block - end - end - - # Checked system - error if the command fails - def check_call(command) - # Use prefix to makes it easier to see in the log. - puts("\e[33m## [#{Time.now}] #{command}\e[00m") - - status = nil - Benchmark.realtime do - status = system(command) - end.tap do |time| - printf "\e[34m## (`#{command}` took %.2fs)\e[00m\n", time - end - - unless status - puts "\e[31mCommand #{command.inspect} failed in directory #{Dir.pwd}\e[00m" - raise RuntimeError.new - end - end - - def check_output(command) - output = IO.popen(command) do |io_obj| - io_obj.read - end - unless $?.success? - puts "Command #{command.inspect} failed in directory #{Dir.pwd}" - raise RuntimeError.new - end - output - end - - def run_harness_script_from_string(script, - local_popen: proc { |*args, **kwargs, &block| IO.popen(*args, **kwargs, &block) }, - crash_file_check: true, - do_echo: true) - run_info = {} - - os = os_type - - if crash_file_check - if os == :linux - FileUtils.rm_f("core") - elsif os == :mac - crash_pattern = "#{ENV['HOME']}/Library/Logs/DiagnosticReports/ruby_*.crash" - ruby_crash_files_before = Dir[crash_pattern].to_a - end - end - - tf = Tempfile.new("yjit-metrics-script") - tf.write(script) - tf.flush # Not flushing can result in successfully running an empty script - - script_output = nil - harness_script_pid = nil - worker_pid = nil - - # We basically always want this to sync immediately to console or logfile. - # If the library was run with nohup (or otherwise not connected to a tty) - # that won't happen by default. - $stdout.sync = true - - # Passing -l to bash makes sure to load .bash_profile for chruby. - err_r, err_w = IO.pipe - local_popen.call(["bash", "-l", tf.path], err: err_w) do |script_out_io| - harness_script_pid = script_out_io.pid - script_output = "" - loop do - begin - chunk = script_out_io.readpartial(1024) - - # The harness will print the worker PID before doing anything else. - if (worker_pid.nil? && chunk.include?("HARNESS PID")) - if chunk =~ /HARNESS PID: (\d+) -/ - worker_pid = $1.to_i - else - puts "Failed to read harness PID correctly from chunk: #{chunk.inspect}" - end - end - - print chunk if do_echo - script_output += chunk - rescue EOFError - # Cool, all done. - break - end - end - end - - err_w.close - script_err = err_r.read - print script_err if do_echo - - # This code and the ensure handler need to point to the same - # status structure so that both can make changes (e.g. to crash_files). - # We'd like this structure to be simple and serialisable -- it's - # passed back from the framework, more or less intact. - run_info.merge!({ - failed: !$?.success?, - crash_files: [], - exit_status: $?.exitstatus, - harness_script_pid: harness_script_pid, - worker_pid: worker_pid, - stderr: script_err, - output: script_output - }) - - return run_info - ensure - if(tf) - tf.close - tf.unlink - end - - if crash_file_check - if os == :linux - run_info[:crash_files] = [ "core" ] if File.exist?("core") - elsif os == :mac - # Horrifying realisation: it takes a short time after the segfault for the crash file to be written. - # Matching these up is really hard to do automatically, particularly when/if we're not sure if - # they'll be showing up at all. - sleep(1) if run_info[:failed] - - ruby_crash_files = Dir[crash_pattern].to_a - # If any new ruby_* crash files have appeared, include them. - run_info[:crash_files] = (ruby_crash_files - ruby_crash_files_before).sort - end - end - end - - def os_type - if RUBY_PLATFORM["darwin"] - :mac - elsif RUBY_PLATFORM["win"] - :win - else - :linux - end - end - - def per_os_checks - if os_type == :win - puts "Windows is not supported or tested yet. Best of luck!" - return - end - - if os_type == :mac - puts "Mac results are considered less stable for this benchmarking harness." - puts "Please assume you'll need more runs and more time for similar final quality." - return - end - - # Only available on intel systems - if !File.exist?('/sys/devices/system/cpu/intel_pstate/no_turbo') - return - end - - File.open('/sys/devices/system/cpu/intel_pstate/no_turbo', mode='r') do |file| - if file.read.strip != '1' - puts("You forgot to disable turbo: (note: sudo ./setup.sh will do this)") - puts(" sudo sh -c 'echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo'") - exit(-1) - end - end - - if !File.exist?('/sys/devices/system/cpu/intel_pstate/min_perf_pct') - return - end - - File.open('/sys/devices/system/cpu/intel_pstate/min_perf_pct', mode='r') do |file| - if file.read.strip != '100' - puts("You forgot to set the min perf percentage to 100: (note: sudo ./setup.sh will do this)") - puts(" sudo sh -c 'echo 100 > /sys/devices/system/cpu/intel_pstate/min_perf_pct'") - exit(-1) - end - end - end - - class BenchmarkList - attr_reader :yjit_bench_path - - def initialize(name_list:, yjit_bench_path:) - @name_list = name_list - @yjit_bench_path = File.expand_path(yjit_bench_path) - - bench_names = Dir.glob("*", base: "#{@yjit_bench_path}/benchmarks") - legal_bench_names = (bench_names + bench_names.map { |name| name.delete_suffix(".rb") }).uniq - @name_list.map! { |name| name.delete_suffix(".rb") } - - unknown_benchmarks = name_list - legal_bench_names - raise(RuntimeError.new("Unknown benchmarks: #{unknown_benchmarks.inspect}!")) if unknown_benchmarks.size > 0 - bench_names = @name_list if @name_list.size > 0 - raise "No testable benchmarks found!" if bench_names.empty? # This should presumably not happen after the "unknown" check - - @benchmark_script_by_name = {} - bench_names.each do |bench_name| - script_path = "#{@yjit_bench_path}/benchmarks/#{bench_name}" - - # Choose the first of these that exists - real_script_path = [script_path, script_path + ".rb", script_path + "/benchmark.rb"].detect { |path| File.exist?(path) && !File.directory?(path) } - raise "Could not find benchmark file starting from script path #{script_path.inspect}!" unless real_script_path - @benchmark_script_by_name[bench_name] = real_script_path - end - end - - # For now, benchmark_info returns a Hash. At some point it may want to get fancier. - def benchmark_info(name) - raise "Querying unknown benchmark name #{name.inspect}!" unless @benchmark_script_by_name[name] - { - name: name, - script_path: @benchmark_script_by_name[name], - } - end - - def to_a - @benchmark_script_by_name.keys.map { |name| benchmark_info(name) } - end - - # If we call .map, we'll pretend to be an array of benchmark_info hashes - def map - @benchmark_script_by_name.keys.map do |name| - yield benchmark_info(name) - end - end - end - - # Eventually we'd like to do fancy things with interesting settings. - # Before that, let's encapsulate the settings in a simple object so - # we can pass them around easily. - # - # Harness Settings are about how to sample the benchmark repeatedly - - # iteration counts, thresholds, etc. - class HarnessSettings - LEGAL_SETTINGS = [ :warmup_itrs, :min_benchmark_itrs, :min_benchmark_time ] - - def initialize(settings) - illegal_keys = settings.keys - LEGAL_SETTINGS - raise "Illegal settings given to HarnessSettings: #{illegal_keys.inspect}!" unless illegal_keys.empty? - @settings = settings - end - - def [](key) - @settings[key] - end - - def to_h - @settings - end - end - - # Shell Settings encapsulate how we run Ruby and the appropriate shellscript - # for each sampling run. That means which Ruby, which Ruby and shell options, - # what env vars to set, whether core dumps are enabled, what to do on error and more. - class ShellSettings - LEGAL_SETTINGS = [ :ruby_opts, :prefix, :chruby, :enable_core_dumps, :on_error, :bundler_version ] - - def initialize(settings) - illegal_keys = settings.keys - LEGAL_SETTINGS - raise "Illegal settings given to ShellSettings: #{illegal_keys.inspect}!" unless illegal_keys.empty? - @settings = settings - end - - def [](key) - @settings[key] - end - - def to_h - @settings - end - end - - # The yjit-metrics harness returns its data as a simple hash for that benchmark: - # - # { - # "times" => [ 2.3, 2.5, 2.7, 2.4, ...], # The benchmark returns times in seconds, not milliseconds - # "benchmark_metadata" => {...}, - # "ruby_metadata" => {...}, - # "yjit_stats" => {...}, # Note: yjit_stats may be empty, but is present. It's a hash, not an array. - # } - # - # This method returns a RunData struct. Note that only a single yjit stats - # hash is returned for all iterations combined, while times and warmups are - # arrays with sizes equal to the number of 'real' and warmup iterations, - # respectively. - # - # If on_error is specified it should be a proc that takes a hash. In case of - # an exception or a failing status returned by the harness script, - # that proc will be called with information about the error that occurred. - # If on_error raises (or re-raises) an exception then the benchmark run will - # stop. If no exception is raised, this method will collect no samples and - # will return nil. - def run_single_benchmark(benchmark_info, harness_settings:, shell_settings:, - run_script: proc { |s| run_harness_script_from_string(s) }) - - out_tempfile = Tempfile.new("yjit-metrics-single-run") - - env_vars = { - OUT_JSON_PATH: out_tempfile.path, - WARMUP_ITRS: harness_settings[:warmup_itrs], - MIN_BENCH_ITRS: harness_settings[:min_benchmark_itrs], - MIN_BENCH_TIME: harness_settings[:min_benchmark_time], - FORCE_BUNDLER_VERSION: shell_settings[:bundler_version], - } - - with_chruby = shell_settings[:chruby] - - script_template = ERB.new File.read(__dir__ + "/../metrics-harness/run_harness.sh.erb") - # These are used in the ERB template - template_settings = { - pre_benchmark_code: (with_chruby ? "chruby && chruby #{with_chruby}" : "") + "\n" + - (shell_settings[:enable_core_dumps] ? "ulimit -c unlimited" : ""), - pre_cmd: shell_settings[:prefix], - env_var_exports: env_vars.map { |key, val| "export #{key}='#{val}'" }.join("\n"), - ruby_opts: "-I#{HARNESS_PATH} " + shell_settings[:ruby_opts].map { |s| '"' + s + '"' }.join(" "), - script_path: benchmark_info[:script_path], - bundler_version: shell_settings[:bundler_version], - } - bench_script = script_template.result(binding) # Evaluate an Erb template with template_settings - - # Do the benchmarking - script_details = run_script.call(bench_script) - - if script_details[:failed] - # We shouldn't normally get a Ruby exception in the parent process. Instead the harness - # process fails and returns an exit status. We'll create an exception for the error - # handler to raise if it decides this is a fatal error. - result = ErrorData.new( - exit_status: script_details[:exit_status], - error: "Failure in benchmark test harness, exit status: #{script_details[:exit_status].inspect}", - summary: script_details[:stderr]&.lines&.detect { |l| l.match?(/\S/) }&.sub("#{Dir.pwd}", ".")&.strip, - ) - - STDERR.puts "-----" - STDERR.print bench_script - STDERR.puts "-----" - - if shell_settings[:on_error] - begin - # What should go in here? What should the interface be? Some things will - # be unavailable, depending what stage of the script got an error. - shell_settings[:on_error].call(script_details.merge({ - exception: result.error, - benchmark_name: benchmark_info[:name], - benchmark_path: benchmark_info[:script_path], - harness_settings: harness_settings.to_h, - shell_settings: shell_settings.to_h, - })) - rescue StandardError => error - result.error = error - end - end - - return result - end - - # Read the benchmark data - json_string_data = File.read out_tempfile.path - if json_string_data == "" - # The tempfile exists, so no read error... But no data returned. - raise "No error from benchmark, but no data was returned!" - end - single_bench_data = JSON.load(json_string_data) - obj = RunData.new(*JSON_RUN_FIELDS.map { |field| single_bench_data[field.to_s] }) - obj.yjit_stats = nil if obj.yjit_stats.nil? || obj.yjit_stats.empty? - - # Add per-benchmark metadata from this script to the data returned from the harness. - obj.benchmark_metadata.merge!({ - "benchmark_name" => benchmark_info[:name], - "benchmark_path" => benchmark_info[:script_path], - }) - - obj - ensure - if out_tempfile - out_tempfile.close - out_tempfile.unlink - end - end - - # This method combines run_data objects from multiple benchmark runs. - # - # It returns a benchmark data array of the following form: - # - # { - # "times" => { "yaml-load" => [[ 2.3, 2.5, 2.7, 2.4, ...],[...]] "psych" => [...] }, - # "warmups" => { "yaml-load" => [[ 2.3, 2.5, 2.7, 2.4, ...],[...]] "psych" => [...] }, - # "benchmark_metadata" => { "yaml-load" => {}, "psych" => { ... }, }, - # "ruby_metadata" => {...}, - # "yjit_stats" => { "yaml-load" => [{...}, {...}, ...] }, - # "peak_mem_bytes" => { "yaml-load" => [2343423, 2349341, ...], "psych" => [112234, ...], ... }, - # } - # - # For times, warmups, YJIT stats and benchmark metadata, that means there is a hash inside - # each top-level key for each benchmark name, e.g.: - # - # "times" => { "yaml-load" => [[ 2.3, 2.5, 2.7, 2.4, ...], [...], ...] } - # - # For times, warmups and YJIT stats that means the value of each hash value is an array. - # For times and warmups, the top-level array is the runs, and the sub-arrays are iterations - # in a single run. For YJIT stats, the top-level array is runs and the hash is the gathered - # YJIT stats for that run. - # - # If no valid data was successfully collected (e.g. a single benchmark was to run, but failed) - # then this method will return nil. - def merge_benchmark_data(all_run_data) - bench_data = { "version": 2 } - JSON_RUN_FIELDS.each { |f| bench_data[f.to_s] = {} } - - all_run_data.each do |run_data| - bench_name = run_data.benchmark_metadata["benchmark_name"] - - bench_data["times"][bench_name] ||= [] - bench_data["warmups"][bench_name] ||= [] - bench_data["yjit_stats"][bench_name] ||= [] - bench_data["peak_mem_bytes"][bench_name] ||= [] - bench_data["failures_before_success"][bench_name] ||= [] - - # Return times and warmups in milliseconds, not seconds - bench_data["times"][bench_name].push run_data.times_ms - bench_data["warmups"][bench_name].push run_data.warmups_ms - - bench_data["yjit_stats"][bench_name].push [run_data.yjit_stats] if run_data.yjit_stats - bench_data["peak_mem_bytes"][bench_name].push run_data.peak_mem_bytes - bench_data["failures_before_success"][bench_name].push run_data.failures_before_success - - # Benchmark metadata should be unique per-benchmark. In other words, - # we do *not* want to combine runs with different amounts of warmup, - # iterations, different env/gems, etc, into the same dataset. - bench_data["benchmark_metadata"][bench_name] ||= run_data.benchmark_metadata - if bench_data["benchmark_metadata"][bench_name] != run_data.benchmark_metadata - puts "#{bench_name} metadata 1: #{bench_data["benchmark_metadata"][bench_name].inspect}" - puts "#{bench_name} metadata 2: #{run_data.benchmark_metadata.inspect}" - puts "Benchmark metadata should not change for benchmark #{bench_name} in the same configuration!" - end - - # We don't save individual Ruby metadata for all benchmarks because it - # should be identical for all of them -- we use the same Ruby - # every time. Instead we save one copy of it, but we make sure - # on each subsequent benchmark that it returned exactly the same - # metadata about the Ruby version. - bench_data["ruby_metadata"] = run_data.ruby_metadata if bench_data["ruby_metadata"].empty? - if bench_data["ruby_metadata"] != run_data.ruby_metadata - puts "Ruby metadata 1: #{bench_data["ruby_metadata"].inspect}" - puts "Ruby metadata 2: #{run_data.ruby_metadata.inspect}" - raise "Ruby metadata should not change across a single set of benchmark runs in the same Ruby config!" - end - end - - # With error handlers it's possible that every benchmark had an error so there's no data to return. - return nil if bench_data["times"].empty? - - return bench_data - end -end diff --git a/lib/yjit-metrics/bench-results.rb b/lib/yjit-metrics/bench-results.rb deleted file mode 100644 index f4bf6737f..000000000 --- a/lib/yjit-metrics/bench-results.rb +++ /dev/null @@ -1,856 +0,0 @@ -# frozen_string_literal. - -require_relative "./theme" - -# Make sure YJITMetrics namespace is declared -module YJITMetrics; end - -# Statistical methods -module YJITMetrics::Stats - def sum(values) - return values.sum(0.0) - end - - def sum_or_nil(values) - return nil if values.nil? - sum(values) - end - - def mean(values) - return values.sum(0.0) / values.size - end - - def mean_or_nil(values) - return nil if values.nil? - mean(values) - end - - def geomean(values) - exponent = 1.0 / values.size - values.inject(1.0, &:*) ** exponent - end - - def geomean_or_nil(values) - return nil if values.nil? - geomean(values) - end - - def stddev(values) - return 0 if values.size <= 1 - - xbar = mean(values) - diff_sqrs = values.map { |v| (v-xbar)*(v-xbar) } - # Bessel's correction requires dividing by length - 1, not just length: - # https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation - variance = diff_sqrs.sum(0.0) / (values.length - 1) - return Math.sqrt(variance) - end - - def stddev_or_nil(values) - return nil if values.nil? - stddev(values) - end - - def rel_stddev(values) - stddev(values) / mean(values) - end - - def rel_stddev_or_nil(values) - return nil if values.nil? - rel_stddev(values) - end - - def rel_stddev_pct(values) - 100.0 * stddev(values) / mean(values) - end - - def rel_stddev_pct_or_nil(values) - return nil if values.nil? - rel_stddev_pct(values) - end - - # See https://en.wikipedia.org/wiki/Covariance#Definition and/or - # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Covariance (two-pass algorithm) - def covariance(x, y) - raise "Trying to take the covariance of two different-sized arrays!" if x.size != y.size - - x_mean = mean(x) - y_mean = mean(y) - - cov = 0.0 - (0...(x.size)).each do |i| - cov += (x[i] - x_mean) * (y[i] - y_mean) / x.size - end - - cov - end - - # See https://en.wikipedia.org/wiki/Pearson_correlation_coefficient - # I'm not convinced this is correct. It definitely doesn't match the least-squares correlation coefficient below. - def pearson_correlation(x, y) - raise "Trying to take the Pearson correlation of two different-sized arrays!" if x.size != y.size - - ## Some random Ruby guy method - #xx_prod = x.map { |xi| xi * xi } - #yy_prod = y.map { |yi| yi * yi } - #xy_prod = (0...(x.size)).map { |i| x[i] * y[i] } - # - #x_sum = x.sum - #y_sum = y.sum - # - #num = xy_prod.sum - (x_sum * y_sum) / x.size - #den = Math.sqrt(xx_prod.sum - x_sum ** 2.0 / x.size) * (yy_prod.sum - y_sum ** 2.0 / x.size) - # - #num/den - - # Wikipedia translation of the definition - x_mean = mean(x) - y_mean = mean(y) - num = (0...(x.size)).map { |i| (x[i] - x_mean) * (y[i] - y_mean) }.sum - den = Math.sqrt((0...(x.size)).map { |i| (x[i] - x_mean) ** 2.0 }.sum) * - Math.sqrt((0...(x.size)).map { |i| (y[i] - y_mean) ** 2.0 }.sum) - num / den - end - - # See https://mathworld.wolfram.com/LeastSquaresFitting.html - def least_squares_slope_intercept_and_correlation(x, y) - raise "Trying to take the least-squares slope of two different-sized arrays!" if x.size != y.size - - x_mean = mean(x) - y_mean = mean(y) - - xx_sum_of_squares = x.map { |xi| (xi - x_mean)**2.0 }.sum - yy_sum_of_squares = y.map { |yi| (yi - y_mean)**2.0 }.sum - xy_sum_of_squares = (0...(x.size)).map { |i| (x[i] - x_mean) * (y[i] - y_mean) }.sum - - slope = xy_sum_of_squares / xx_sum_of_squares - intercept = y_mean - slope * x_mean - - r_squared = xy_sum_of_squares ** 2.0 / (xx_sum_of_squares * yy_sum_of_squares) - - [slope, intercept, r_squared] - end - - # code taken from https://github.com/clbustos/statsample/blob/master/lib/statsample/regression/simple.rb#L74 - # (StatSample Ruby gem, simple linear regression.) - def simple_regression_slope(x, y) - raise "Trying to take the least-squares slope of two different-sized arrays!" if x.size != y.size - - x_mean = mean(x) - y_mean = mean(y) - - num = den = 0.0 - (0...x.size).each do |i| - num += (x[i] - x_mean) * (y[i] - y_mean) - den += (x[i] - x_mean)**2.0 - end - - slope = num / den - #intercept = y_mean - slope * x_mean - - slope - end -end - -# Encapsulate multiple benchmark runs across multiple Ruby configurations. -# Do simple calculations, reporting and file I/O. -# -# Note that a JSON file with many results can be quite large. -# Normally it's appropriate to store raw data as multiple JSON files -# that contain one set of runs each. Large multi-Ruby datasets -# may not be practical to save as full raw data. -class YJITMetrics::ResultSet - include YJITMetrics::Stats - - def initialize - @times = {} - @warmups = {} - @benchmark_metadata = {} - @ruby_metadata = {} - @yjit_stats = {} - @peak_mem = {} - @empty = true - end - - def empty? - @empty - end - - def config_names - @times.keys - end - - def platforms - @ruby_metadata.map { |config, hash| hash["platform"] }.uniq - end - - # "Fragments" are, in effect, a quick human-readable way to summarise a particular - # compile-time-plus-run-time Ruby configuration. Doing this in general would - # require serious AI, but we don't need it in general. We have a few specific - # cases we care about. - # - # Right now we're just checking the config name. It would be better, but harder, - # to actually verify the configuration from the config's Ruby metadata (and other - # metadata?) and make sure the config does what it's labelled as. - CONFIG_NAME_SPECIAL_CASE_FRAGMENTS = { - "prod_ruby_with_yjit" => "YJIT ", - "prev_ruby_yjit" => "YJIT ", - "prod_ruby_with_mjit" => "MJIT", - "ruby_30_with_mjit" => "MJIT-3.0", - "prod_ruby_no_jit" => "CRuby ", - "prev_ruby_no_jit" => "CRuby ", - "truffleruby" => "TruffleRuby", - "yjit_stats" => "YJIT Stats", - } - def table_of_configs_by_fragment(configs) - configs_by_fragment = {} - frag_by_length = CONFIG_NAME_SPECIAL_CASE_FRAGMENTS.keys.sort_by { |k| -k.length } # Sort longest-first - configs.each do |config| - longest_frag = frag_by_length.detect { |k| config.include?(k) } - unless longest_frag - raise "Trying to sort config #{config.inspect} by fragment, but no fragment matches!" - end - configs_by_fragment[longest_frag] ||= [] - configs_by_fragment[longest_frag] << config - end - configs_by_fragment - end - - # Add a table of configurations, distinguished by platform, compile-time config, runtime config and whatever - # else we can determine from config names and/or result data. Only include configurations for which we have - # results. Order by the req_configs order, if supplied, otherwise by order results were added in (internal - # hash table order.) - # NOTE: This is currently only used by variable_warmup_report which discards the actual human names - # (it gets used to select and order the configs). - def configs_with_human_names(req_configs = nil) - # Only use requested configs for which we have data - if req_configs - # Preserve req_configs order - c_n = config_names - only_configs = req_configs.select {|config| c_n.include?(config) } - else - only_configs = config_names() - end - - if only_configs.size == 0 - puts "No requested configurations have any data..." - puts "Requested configurations: #{req_configs.inspect} #{req_configs == nil ? "(nil means use all)" : ""}" - puts "Configs we have data for: #{@times.keys.inspect}" - raise("Can't generate human names table without any configurations!") - end - - configs_by_platform = {} - only_configs.each do |config| - config_platform = @ruby_metadata[config]["platform"] - configs_by_platform[config_platform] ||= [] - configs_by_platform[config_platform] << config - end - - # TODO: Get rid of this branch and the next and just use "human_name platform" consistently. - - # If each configuration only exists for a single platform, we'll use the platform names as human-readable names. - if configs_by_platform.values.map(&:size).max == 1 - out = {} - # Order output by req_config - req_configs.each do |config| - platform = configs_by_platform.detect { |platform, plat_configs| plat_configs.include?(config) } - out[platform] = config - end - return out - end - - # If all configurations are on the *same* platform, we'll use names like YJIT and MJIT and MJIT(3.0) - if configs_by_platform.size == 1 - # Sort list of configs by what fragments (Ruby version plus runtime config) they contain - by_fragment = table_of_configs_by_fragment(only_configs) - - # If no two configs have the same Ruby version plus runtime config, then that's how we'll name them. - frags_with_multiple_configs = by_fragment.keys.select { |frag| (by_fragment[frag] || []).length > 1 } - if frags_with_multiple_configs.empty? - out = {} - # Order by req_configs - req_configs.each do |config| - fragment = by_fragment.detect { |frag, configs| configs[0] == config }.first - human_name = insert_version_for_config(CONFIG_NAME_SPECIAL_CASE_FRAGMENTS[fragment], config) - out[human_name] = config - end - return out - end - - unsortable_configs = frags_with_multiple_configs.flat_map { |frag| by_fragment[frag] } - puts "Fragments with multiple configs: #{frags_with_multiple_configs.inspect}" - puts "Configs we can't sort by fragment: #{unsortable_configs.inspect}" - raise "We only have one platform, but we can't sort by fragment... Need finer distinctions!" - end - - # Okay. We have at least two platforms. Now things get stickier. - by_platform_and_fragment = {} - configs_by_platform.each do |platform, configs| - by_platform_and_fragment[platform] = table_of_configs_by_fragment(configs) - end - hard_to_name_configs = by_platform_and_fragment.values.flat_map(&:values).select { |configs| configs.size > 1 }.inject([], &:+).uniq - - # If no configuration shares *both* platform *and* fragment, we can name by platform and fragment. - if hard_to_name_configs.empty? - plat_frag_table = {} - by_platform_and_fragment.each do |platform, frag_table| - CONFIG_NAME_SPECIAL_CASE_FRAGMENTS.each do |fragment, human_name| - next unless frag_table[fragment] - single_config = frag_table[fragment][0] - human_name = insert_version_for_config(human_name, single_config) - plat_frag_table[single_config] = "#{human_name} #{platform}" - end - end - - # Now reorder the table by req_configs - out = {} - req_configs.each do |config| - out[plat_frag_table[config]] = config - end - return out - end - - raise "Complicated case in configs_with_human_names! Hard to distinguish between: #{hard_to_name_configs.inspect}!" - end - - # These objects have absolutely enormous internal data, and we don't want it printed out with - # every exception. - def inspect - "YJITMetrics::ResultSet<#{object_id}>" - end - - # A ResultSet normally expects to see results with this structure: - # - # { - # "times" => { "benchname1" => [ 11.7, 14.5, 16.7, ... ], "benchname2" => [...], ... }, - # "benchmark_metadata" => { "benchname1" => {...}, "benchname2" => {...}, ... }, - # "ruby_metadata" => {...}, - # "yjit_stats" => { "benchname1" => [{...}, {...}...], "benchname2" => [{...}, {...}, ...] } - # } - # - # Note that this input structure doesn't represent runs (subgroups of iterations), - # such as when restarting the benchmark and doing, say, 10 groups of 300 - # iterations. To represent that, you would call this method 10 times, once per - # run. Runs will be kept separate internally, but by default are returned as a - # combined single array. - # - # Every benchmark run is assumed to come with a corresponding metadata hash - # and (optional) hash of YJIT stats. However, there should normally only - # be one set of Ruby metadata, not one per benchmark run. Ruby metadata is - # assumed to be constant for a specific compiled copy of Ruby over all runs. - def add_for_config(config_name, benchmark_results, normalize_bench_names: true) - if !benchmark_results.has_key?("version") - puts "No version entry in benchmark results - falling back to version 1 file format." - - benchmark_results["times"].keys.each do |benchmark_name| - # v1 JSON files are always single-run, so wrap them in a one-element array. - benchmark_results["times"][benchmark_name] = [ benchmark_results["times"][benchmark_name] ] - benchmark_results["warmups"][benchmark_name] = [ benchmark_results["warmups"][benchmark_name] ] - benchmark_results["yjit_stats"][benchmark_name] = [ benchmark_results["yjit_stats"][benchmark_name] ] - - # Various metadata is still in the same format for v2. - end - elsif benchmark_results["version"] != 2 - raise "Getting data from JSON in bad format!" - else - # JSON file is marked as version 2, so all's well. - end - - @empty = false - - @times[config_name] ||= {} - benchmark_results["times"].each do |benchmark_name, times| - benchmark_name = benchmark_name.sub(/.rb$/, "") if normalize_bench_names - @times[config_name][benchmark_name] ||= [] - @times[config_name][benchmark_name].concat(times) - end - - @warmups[config_name] ||= {} - (benchmark_results["warmups"] || {}).each do |benchmark_name, warmups| - benchmark_name = benchmark_name.sub(/.rb$/, "") if normalize_bench_names - @warmups[config_name][benchmark_name] ||= [] - @warmups[config_name][benchmark_name].concat(warmups) - end - - @yjit_stats[config_name] ||= {} - benchmark_results["yjit_stats"].each do |benchmark_name, stats_array| - next if stats_array.nil? - stats_array.compact! - next if stats_array.empty? - benchmark_name = benchmark_name.sub(/.rb$/, "") if normalize_bench_names - @yjit_stats[config_name][benchmark_name] ||= [] - @yjit_stats[config_name][benchmark_name].concat(stats_array) - end - - @benchmark_metadata[config_name] ||= {} - benchmark_results["benchmark_metadata"].each do |benchmark_name, metadata_for_benchmark| - benchmark_name = benchmark_name.sub(/.rb$/, "") if normalize_bench_names - @benchmark_metadata[config_name][benchmark_name] ||= metadata_for_benchmark - if @benchmark_metadata[config_name][benchmark_name] != metadata_for_benchmark - # We don't print this warning only once because it's really bad, and because we'd like to show it for all - # relevant problem benchmarks. But mostly because it's really bad: don't combine benchmark runs with - # different settings into one result set. - $stderr.puts "WARNING: multiple benchmark runs of #{benchmark_name} in #{config_name} have different benchmark metadata!" - end - end - - @ruby_metadata[config_name] ||= benchmark_results["ruby_metadata"] - ruby_meta = @ruby_metadata[config_name] - if ruby_meta != benchmark_results["ruby_metadata"] && !@printed_ruby_metadata_warning - print "Ruby metadata is meant to *only* include information that should always be\n" + - " the same for the same Ruby executable. Please verify that you have not added\n" + - " inappropriate Ruby metadata or accidentally used the same name for two\n" + - " different Ruby executables. (Additional mismatches in this result set won't show warnings.)\n" - puts "Metadata 1: #{ruby_meta.inspect}" - puts "Metadata 2: #{benchmark_results["ruby_metadata"].inspect}" - @printed_ruby_metadata_warning = true - end - unless ruby_meta["arch"] - # Our harness didn't record arch until adding ARM64 support. If a collected data file doesn't set it, - # autodetect from RUBY_DESCRIPTION. We only check x86_64 since all older data should only be on x86_64, - # which was all we supported. - if ruby_meta["RUBY_DESCRIPTION"].include?("x86_64") - ruby_meta["arch"] = "x86_64-unknown" - else - raise "No arch provided in data file, and no x86_64 detected in RUBY_DESCRIPTION!" - end - end - recognized_platforms = YJITMetrics::PLATFORMS + ["arm64"] - ruby_meta["platform"] ||= recognized_platforms.detect { |platform| (ruby_meta["uname -a"] || "").downcase.include?(platform) } - ruby_meta["platform"] ||= recognized_platforms.detect { |platform| (ruby_meta["arch"] || "").downcase.include?(platform) } - raise "Uknown platform" if !ruby_meta["platform"] - ruby_meta["platform"].sub!(/^arm(\d+)$/, 'aarch\1') - #@platform ||= ruby_meta["platform"] - - #if @platform != ruby_meta["platform"] - # raise "A single ResultSet may only contain data from one platform, not #{@platform.inspect} AND #{ruby_meta["platform"].inspect}!" - #end - - @full_run ||= benchmark_results["full_run"] - if @full_run != benchmark_results["full_run"] - warn "The 'full_run' data should not change within the same run!" - end - - @peak_mem[config_name] ||= {} - benchmark_results["peak_mem_bytes"].each do |benchmark_name, mem_bytes| - benchmark_name = benchmark_name.sub(/.rb$/, "") if normalize_bench_names - @peak_mem[config_name][benchmark_name] ||= [] - @peak_mem[config_name][benchmark_name].concat(mem_bytes) - end - end - - # This returns a hash-of-arrays by configuration name - # containing benchmark results (times) per - # benchmark for the specified config. - # - # If in_runs is specified, the array will contain - # arrays (runs) of samples. Otherwise all samples - # from all runs will be combined. - def times_for_config_by_benchmark(config, in_runs: false) - raise("No results for configuration: #{config.inspect}!") if !@times.has_key?(config) || @times[config].empty? - return @times[config] if in_runs - data = {} - @times[config].each do |benchmark_name, runs| - data[benchmark_name] = runs.inject([]) { |arr, piece| arr.concat(piece) } - end - data - end - - # This returns a hash-of-arrays by configuration name - # containing warmup results (times) per - # benchmark for the specified config. - # - # If in_runs is specified, the array will contain - # arrays (runs) of samples. Otherwise all samples - # from all runs will be combined. - def warmups_for_config_by_benchmark(config, in_runs: false) - return @warmups[config] if in_runs - data = {} - @warmups[config].each do |benchmark_name, runs| - data[benchmark_name] = runs.inject([]) { |arr, piece| arr.concat(piece) } - end - data - end - - # This returns a hash-of-arrays by config name - # containing YJIT statistics, if gathered, per - # benchmark run for the specified config. For configs - # that don't collect YJIT statistics, the array - # will be empty. - # - # If in_runs is specified, the array will contain - # arrays (runs) of samples. Otherwise all samples - # from all runs will be combined. - def yjit_stats_for_config_by_benchmark(config, in_runs: false) - return @yjit_stats[config] if in_runs - data = {} - @yjit_stats[config].each do |benchmark_name, runs| - data[benchmark_name] ||= [] - runs.each { |run| data[benchmark_name].concat(run) } - end - data - end - - def peak_mem_bytes_for_config_by_benchmark(config) - @peak_mem[config] - end - - # This returns a hash-of-hashes by config name - # containing per-benchmark metadata (parameters) per - # benchmark for the specified config. - def benchmark_metadata_for_config_by_benchmark(config) - @benchmark_metadata[config] - end - - # This returns a hash of metadata for the given config name - def metadata_for_config(config) - @ruby_metadata[config] - end - - def ruby_version_for_config(config) - return unless metadata = @ruby_metadata[config] - - if (match = metadata["RUBY_DESCRIPTION"]&.match(/^(?:ruby\s+)?([0-9.]+\S*)/)) - match[1] - else - metadata["RUBY_VERSION"] - end - end - - def full_run_info - @full_run - end - - def insert_version_for_config(str, config) - str.sub(//, ruby_version_for_config(config)) - end - - # What Ruby configurations does this ResultSet contain data for? - def available_configs - @ruby_metadata.keys - end - - def benchmarks - @benchmark_metadata.values.flat_map(&:keys).uniq - end - - # Sometimes you just want all the yjit_stats fields added up. - # - # This should return a hash-of-hashes where the top level key - # key is the benchmark name and each hash value is the combined stats - # for a single benchmark across whatever number of runs is present. - # - # This may not work as expected if you have full YJIT stats only - # sometimes for a given config - which normally should never be - # the case. - def combined_yjit_stats_for_config_by_benchmark(config) - data = {} - @yjit_stats[config].each do |benchmark_name, runs| - stats = {} - runs.map(&:flatten).map(&:first).each do |run| - raise "Internal error! #{run.class.name} is not a hash!" unless run.is_a?(Hash) - - stats["all_stats"] = run["all_stats"] if run["all_stats"] - (run.keys - ["all_stats"]).each do |key| - if run[key].is_a?(Integer) - stats[key] ||= 0 - stats[key] += run[key] - elsif run[key].is_a?(Float) - stats[key] ||= 0.0 - stats[key] += run[key] - elsif run[key].is_a?(Hash) - stats[key] ||= {} - run[key].each do |subkey, subval| - stats[key][subkey] ||= 0 - stats[key][subkey] += subval - end - else - raise "Unexpected stat type #{run[key].class}!" - end - end - end - data[benchmark_name] = stats - end - data - end - - # Summarize the data by config. If it's a YJIT config with full stats, get the highlights of the exit report too. - SUMMARY_STATS = [ - "inline_code_size", - "outlined_code_size", - #"exec_instruction", # exec_instruction changed name to yjit_insns_count -- only one of the two will be present in a dataset - "yjit_insns_count", - "vm_insns_count", - "compiled_iseq_count", - "leave_interp_return", - "compiled_block_count", - "invalidation_count", - "constant_state_bumps", - ] - def summary_by_config_and_benchmark - summary = {} - available_configs.each do |config| - summary[config] = {} - - times_by_bench = times_for_config_by_benchmark(config) - times_by_bench.each do |bench, results| - summary[config][bench] = { - "mean" => mean(results), - "stddev" => stddev(results), - "rel_stddev" => rel_stddev(results), - } - end - - mem_by_bench = peak_mem_bytes_for_config_by_benchmark(config) - times_by_bench.keys.each do |bench| - summary[config][bench]["peak_mem_bytes"] = mem_by_bench[bench] - end - - all_stats = combined_yjit_stats_for_config_by_benchmark(config) - all_stats.each do |bench, stats| - summary[config][bench]["yjit_stats"] = stats.slice(*SUMMARY_STATS) - summary[config][bench]["yjit_stats"]["yjit_insns_count"] ||= stats["exec_instruction"] - - # Do we have full YJIT stats? If so, let's add the relevant summary bits - if stats["all_stats"] - out_stats = summary[config][bench]["yjit_stats"] - out_stats["side_exits"] = stats.inject(0) { |total, (k, v)| total + (k.start_with?("exit_") ? v : 0) } - out_stats["total_exits"] = out_stats["side_exits"] + out_stats["leave_interp_return"] - out_stats["retired_in_yjit"] = (out_stats["exec_instruction"] || out_stats["yjit_insns_count"]) - out_stats["side_exits"] - out_stats["avg_len_in_yjit"] = out_stats["retired_in_yjit"].to_f / out_stats["total_exits"] - out_stats["total_insns_count"] = out_stats["retired_in_yjit"] + out_stats["vm_insns_count"] - out_stats["yjit_ratio_pct"] = 100.0 * out_stats["retired_in_yjit"] / out_stats["total_insns_count"] - end - end - end - summary - end - - # What Ruby configurations, if any, have full YJIT statistics available? - def configs_containing_full_yjit_stats - @yjit_stats.keys.select do |config_name| - stats = @yjit_stats[config_name] - - # Every benchmark gets a key/value pair in stats, and every - # value is an array of arrays -- each run gets an array, and - # each measurement in the run gets an array. - - # Even "non-stats" YJITs now have statistics, but not "full" statistics - - # If stats is nil or empty, this isn't a full-yjit-stats config - if stats.nil? || stats.empty? - false - else - # For each benchmark, grab its array of runs - vals = stats.values - - vals.all? { |run_values| } - end - - # Stats is a hash of the form { "30_ifelse" => [ { "all_stats" => true, "inline_code_size" => 5572282, ...}, {...} ], "30k_methods" => [ {}, {} ]} - # We want to make sure every run has an all_stats hash key. - !stats.nil? && - !stats.empty? && - !stats.values.all? { |val| val.nil? || val[0].nil? || val[0][0].nil? || val[0][0]["all_stats"].nil? } - end - end -end - -module YJITMetrics - # FIXME: Do we need this? - # Default settings for Benchmark CI. - # This is used by benchmark_and_update.rb for CI reporting directly. - # It's also used by the VariableWarmupReport when selecting appropriate - # benchmarking settings. This is only for the default yjit-bench benchmarks. - DEFAULT_YJIT_BENCH_CI_SETTINGS = { - # Config names and config-specific settings - "configs" => { - # Each config controls warmup individually. But the number of real iterations needs - # to match across all configs, so it's not set per-config. - "x86_64_yjit_stats" => { - max_warmup_itrs: 30, - }, - "x86_64_prod_ruby_no_jit" => { - max_warmup_itrs: 30, - }, - "x86_64_prod_ruby_with_yjit" => { - max_warmup_itrs: 30, - }, - "x86_64_prev_ruby_no_jit" => { - max_warmup_itrs: 30, - }, - "x86_64_prev_ruby_yjit" => { - max_warmup_itrs: 30, - }, - #"x86_64_prod_ruby_with_mjit" => { - # max_warmup_itrs: 75, - # max_warmup_time: 300, # in seconds; we try to let MJIT warm up "enough," but time and iters vary by workload - #}, - "aarch64_yjit_stats" => { - max_warmup_itrs: 30, - }, - "aarch64_prod_ruby_no_jit" => { - max_warmup_itrs: 30, - }, - "aarch64_prod_ruby_with_yjit" => { - max_warmup_itrs: 30, - }, - "aarch64_prev_ruby_no_jit" => { - max_warmup_itrs: 30, - }, - "aarch64_prev_ruby_yjit" => { - max_warmup_itrs: 30, - }, - }, - # Non-config-specific settings - "min_bench_itrs" => 15, - "min_bench_time" => 20, - "min_warmup_itrs" => 5, - "max_warmup_itrs" => 75, - "max_itr_time" => 8 * 3600, # Used to stop at 300 minutes to avoid GHActions 360 min cutoff. Now the 7pm run needs to not overlap the 6am run. - } -end - -# Shared utility methods for reports that use a single "blob" of results -class YJITMetrics::Report - Theme = YJITMetrics::Theme - - include YJITMetrics::Stats - - def self.subclasses - @subclasses ||= [] - @subclasses - end - - def self.inherited(subclass) - YJITMetrics::Report.subclasses.push(subclass) - end - - def self.report_name_hash - out = {} - - @subclasses.select { |s| s.respond_to?(:report_name) }.each do |subclass| - name = subclass.report_name - raise "Duplicated report name: #{name.inspect}!" if out[name] - out[name] = subclass - end - - out - end - - def initialize(config_names, results, benchmarks: []) - raise "No Rubies specified for report!" if config_names.empty? - - bad_configs = config_names - results.available_configs - raise "Unknown configurations in report: #{bad_configs.inspect}!" unless bad_configs.empty? - - @config_names = config_names - @only_benchmarks = benchmarks - @result_set = results - end - - # Child classes can accept params in this way. By default it's a no-op. - def set_extra_info(info) - @extra_info = info - end - - # Do we specifically recognize this extra field? Nope. Child classes can override. - def accepts_field(name) - false - end - - def filter_benchmark_names(names) - return names if @only_benchmarks.empty? - names.select { |bench_name| @only_benchmarks.any? { |bench_spec| bench_name.start_with?(bench_spec) } } - end - - # Take column headings, formats for the percent operator and data, and arrange it - # into a simple ASCII table returned as a string. - def format_as_table(headings, col_formats, data, separator_character: "-", column_spacer: " ") - out = "" - - unless data && data[0] && col_formats && col_formats[0] && headings && headings[0] - $stderr.puts "Error in format_as_table..." - $stderr.puts "Headings: #{headings.inspect}" - $stderr.puts "Col formats: #{col_formats.inspect}" - $stderr.puts "Data: #{data.inspect}" - raise "Invalid data sent to format_as_table" - end - - num_cols = data[0].length - raise "Mismatch between headings and first data row for number of columns!" unless headings.length == num_cols - raise "Data has variable number of columns!" unless data.all? { |row| row.length == num_cols } - raise "Column formats have wrong number of entries!" unless col_formats.length == num_cols - - formatted_data = data.map.with_index do |row, idx| - col_formats.zip(row).map { |fmt, item| item ? fmt % item : "" } - end - - col_widths = (0...num_cols).map { |col_num| (formatted_data.map { |row| row[col_num].length } + [ headings[col_num].length ]).max } - - out.concat(headings.map.with_index { |h, idx| "%#{col_widths[idx]}s" % h }.join(column_spacer), "\n") - - separator = col_widths.map { |width| separator_character * width }.join(column_spacer) - out.concat(separator, "\n") - - formatted_data.each do |row| - out.concat (row.map.with_index { |item, idx| " " * (col_widths[idx] - item.size) + item }).join(column_spacer), "\n" - end - - out.concat("\n", separator, "\n") - rescue - $stderr.puts "Error when trying to format table: #{headings.inspect} / #{col_formats.inspect} / #{data[0].inspect}" - raise - end - - def write_to_csv(filename, data) - CSV.open(filename, "wb") do |csv| - data.each { |row| csv << row } - end - end - -end - -# Class for reports that use a longer series of times, each with its own report/data. -class YJITMetrics::TimelineReport - include YJITMetrics::Stats - - def self.subclasses - @subclasses ||= [] - @subclasses - end - - def self.inherited(subclass) - YJITMetrics::TimelineReport.subclasses.push(subclass) - end - - def self.report_name_hash - out = {} - - @subclasses.select { |s| s.respond_to?(:report_name) }.each do |subclass| - name = subclass.report_name - raise "Duplicated report name: #{name.inspect}!" if out[name] - out[name] = subclass - end - - out - end - - def initialize(context) - @context = context - end - - # Look for "PLATFORM_#{name}"; prefer specified platform if present. - def find_config(name, platform: "x86_64") - matches = @context[:configs].select { |c| c.end_with?(name) } - matches.detect { |c| c.start_with?(platform) } || matches.first - end - - # Strip PLATFORM from beginning of name - def platform_of_config(config) - YJITMetrics::PLATFORMS.each do |p| - return p if config.start_with?("#{p}_") - end - raise "Unknown platform in config '#{config}'" - end -end diff --git a/lib/yjit-metrics/repo-management.rb b/lib/yjit-metrics/repo-management.rb deleted file mode 100644 index 264a1a0ea..000000000 --- a/lib/yjit-metrics/repo-management.rb +++ /dev/null @@ -1,70 +0,0 @@ -# For yjit-metrics, we often want to clone various repositories, Ruby and non-Ruby. -# This file is about cloning and managing those repositories, and installing Rubies. - -module YJITMetrics; end -module YJITMetrics::RepoManagement - def clone_repo_with(path:, git_url:, git_branch:, do_clean: true) - unless File.exist?(path) - check_call("git clone '#{git_url}' '#{path}'") - end - - chdir(path) do - if do_clean - check_call("git clean -d -f") - check_call("git checkout .") # There's a tendency to have local mods to Gemfile.lock -- get rid of those changes - check_call("git fetch") # Make sure we can see any new branches - "git checkout" can fail with a not-yet-seen branch - check_call("git checkout #{git_branch}") - if git_branch =~ /\A[0-9a-zA-Z]{5}/ - # Don't do a "git pull" on a raw SHA - else - check_call("git pull") - end - else - # If we're not cleaning, we should still make sure we're on the right branch - current_branch = `git rev-parse --abbrev-ref HEAD`.chomp - current_sha = `git rev-parse HEAD`.chomp - - # If the branch name doesn't match and we're not on the same specific SHA, check out the specified branch or revision - if current_branch != git_branch && !current_sha.start_with?(git_branch) - check_call("git fetch") # If we do a checkout, we need to fetch first to make sure we can see it - check_call("git checkout #{git_branch}") - end - end - end - end - - def clone_ruby_repo_with(path:, git_url:, git_branch:, config_opts:, config_env: [], install_to:) - clone_repo_with(path: path, git_url: git_url, git_branch: git_branch) - - chdir(path) do - config_opts += [ "--prefix=#{install_to}" ] - - unless File.exist?("./configure") - check_call("./autogen.sh") - end - - if !File.exist?("./config.status") - should_configure = true - else - # Right now this config check is brittle - if you give it a config_env containing quotes, for - # instance, it will tend to believe it needs to reconfigure. We cut out single-quotes - # because they've caused trouble, but a full fix might need to understand bash quoting. - config_status_output = check_output("./config.status --conf").gsub("'", "").split(" ").sort - desired_config = config_opts.sort.map { |s| s.gsub("'", "") } + config_env - if config_status_output != desired_config - puts "Configuration is wrong, reconfiguring..." - puts "Desired: #{desired_config.inspect}" - puts "Current: #{config_status_output.inspect}" - should_configure = true - end - end - - if should_configure - check_call("#{config_env.join(" ")} ./configure #{ config_opts.join(" ") }") - check_call("make clean") - end - - check_call("make -j16 install") - end - end -end diff --git a/lib/yjit-metrics/report_types/bloggable_speed_report.rb b/lib/yjit-metrics/report_types/bloggable_speed_report.rb deleted file mode 100644 index 39f710303..000000000 --- a/lib/yjit-metrics/report_types/bloggable_speed_report.rb +++ /dev/null @@ -1,1234 +0,0 @@ -# frozen_string_literal: true -require_relative "yjit_stats_reports" -require "yaml" - -# For details-at-a-specific-time reports, we'll want to find individual configs and make sure everything is -# present and accounted for. This is a "single" report in the sense that it's conceptually at a single -# time, even though it can be multiple runs and Rubies. What it is *not* is results over time as YJIT and -# the benchmarks change. -class YJITMetrics::BloggableSingleReport < YJITMetrics::YJITStatsReport - REPO_ROOT = File.expand_path("../../../..", __dir__) - - # Benchmarks sometimes go into multiple categories, based on the category field - BENCHMARK_METADATA = YAML.load_file(File.join(REPO_ROOT, "yjit-bench/benchmarks.yml")).map do |name, metadata| - [name, metadata.transform_keys(&:to_sym)] - end.to_h - - def headline_benchmarks - @benchmark_names.select { |bench| BENCHMARK_METADATA[bench] && BENCHMARK_METADATA[bench][:category] == "headline" } - end - - def micro_benchmarks - @benchmark_names.select { |bench| BENCHMARK_METADATA[bench] && BENCHMARK_METADATA[bench][:category] == "micro" } - end - - def benchmark_category_index(bench_name) - return 0 if BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:category] == "headline" - return 2 if BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:category] == "micro" - return 1 - end - - def exactly_one_config_with_name(configs, substring, description, none_okay: false) - matching_configs = configs.select { |name| name.include?(substring) } - raise "We found more than one candidate #{description} config (#{matching_configs.inspect}) in this result set!" if matching_configs.size > 1 - raise "We didn't find any #{description} config among #{configs.inspect}!" if matching_configs.empty? && !none_okay - matching_configs[0] - end - - # Include Truffle data only if we can find it, use MJIT 3.0 and/or 3.1 depending on what's available. - # YJIT and No-JIT are mandatory. - def look_up_data_by_ruby(only_platforms: YJITMetrics::PLATFORMS, in_runs: false) - only_platforms = [only_platforms].flatten - # Filter config names by given platform(s) - config_names = @config_names.select { |name| only_platforms.any? { |plat| name.include?(plat) } } - raise "No data files for platform(s) #{only_platforms.inspect} in #{@config_names}!" if config_names.empty? - - @with_yjit_config = exactly_one_config_with_name(config_names, "prod_ruby_with_yjit", "with-YJIT") - @prev_no_jit_config = exactly_one_config_with_name(config_names, "prev_ruby_no_jit", "prev-CRuby", none_okay: true) - @prev_yjit_config = exactly_one_config_with_name(config_names, "prev_ruby_yjit", "prev-YJIT", none_okay: true) - @with_mjit30_config = exactly_one_config_with_name(config_names, "ruby_30_with_mjit", "with-MJIT3.0", none_okay: true) - @with_mjit_latest_config = exactly_one_config_with_name(config_names, "prod_ruby_with_mjit", "with-MJIT", none_okay: true) - @no_jit_config = exactly_one_config_with_name(config_names, "prod_ruby_no_jit", "no-JIT") - @truffle_config = exactly_one_config_with_name(config_names, "truffleruby", "Truffle", none_okay: true) - - # Prefer previous CRuby if present otherwise current CRuby. - @baseline_config = @prev_no_jit_config || @no_jit_config - - # Order matters here - we push No-JIT, then MJIT(s), then YJIT and finally TruffleRuby when present - @configs_with_human_names = [ - ["CRuby ", @prev_no_jit_config], - ["CRuby ", @no_jit_config], - ["MJIT3.0", @with_mjit30_config], - ["MJIT", @with_mjit_latest_config], - ["YJIT ", @prev_yjit_config], - ["YJIT ", @with_yjit_config], - ["Truffle", @truffle_config], - ].map do |(name, config)| - [@result_set.insert_version_for_config(name, config), config] if config - end.compact - - # Grab relevant data from the ResultSet - @times_by_config = {} - @warmups_by_config = {} - @ruby_metadata_by_config = {} - @bench_metadata_by_config = {} - @peak_mem_by_config = {} - @yjit_stats = {} - @configs_with_human_names.map { |name, config| config }.each do |config| - @times_by_config[config] = @result_set.times_for_config_by_benchmark(config, in_runs: in_runs) - @warmups_by_config[config] = @result_set.warmups_for_config_by_benchmark(config, in_runs: in_runs) - @ruby_metadata_by_config[config] = @result_set.metadata_for_config(config) - @bench_metadata_by_config[config] = @result_set.benchmark_metadata_for_config_by_benchmark(config) - @peak_mem_by_config[config] = @result_set.peak_mem_bytes_for_config_by_benchmark(config) - end - - @yjit_stats = @result_set.yjit_stats_for_config_by_benchmark(@stats_config, in_runs: in_runs) - @benchmark_names = filter_benchmark_names(@times_by_config[@with_yjit_config].keys) - - @times_by_config.each do |config_name, config_results| - if config_results.nil? || config_results.empty? - raise("No results for configuration #{config_name.inspect} in #{self.class}!") - end - no_result_benchmarks = @benchmark_names.select { |bench_name| config_results[bench_name].nil? || config_results[bench_name].empty? } - unless no_result_benchmarks.empty? - # We allow MJIT latest ONLY to have some benchmarks skipped... (empty is also fine) - if config_name == @with_mjit_latest_config - @mjit_is_incomplete = true - else - raise("No results in config #{config_name.inspect} for benchmark(s) #{no_result_benchmarks.inspect} in #{self.class}!") - end - end - end - - no_stats_benchmarks = @benchmark_names.select { |bench_name| !@yjit_stats[bench_name] || !@yjit_stats[bench_name][0] || @yjit_stats[bench_name][0].empty? } - unless no_stats_benchmarks.empty? - raise "No YJIT stats found for benchmarks: #{no_stats_benchmarks.inspect}" - end - end - - def calc_speed_stats_by_config - @mean_by_config = {} - @rsd_pct_by_config = {} - @speedup_by_config = {} - @total_time_by_config = {} - - @configs_with_human_names.map { |name, config| config }.each do |config| - @mean_by_config[config] = [] - @rsd_pct_by_config[config] = [] - @total_time_by_config[config] = 0.0 - @speedup_by_config[config] = [] - end - - @yjit_ratio = [] - - @benchmark_names.each do |benchmark_name| - @configs_with_human_names.each do |name, config| - this_config_times = @times_by_config[config][benchmark_name] - this_config_mean = mean_or_nil(this_config_times) # When nil? When a benchmark didn't happen for this config. - @mean_by_config[config].push this_config_mean - @total_time_by_config[config] += this_config_times.nil? ? 0.0 : sum(this_config_times) - this_config_rel_stddev_pct = rel_stddev_pct_or_nil(this_config_times) - @rsd_pct_by_config[config].push this_config_rel_stddev_pct - end - - baseline_mean = @mean_by_config[@baseline_config][-1] # Last pushed -- the one for this benchmark - baseline_rel_stddev_pct = @rsd_pct_by_config[@baseline_config][-1] - baseline_rel_stddev = baseline_rel_stddev_pct / 100.0 # Get ratio, not percent - @configs_with_human_names.each do |name, config| - this_config_mean = @mean_by_config[config][-1] - - if this_config_mean.nil? - @speedup_by_config[config].push [nil, nil] - else - this_config_rel_stddev_pct = @rsd_pct_by_config[config][-1] - # Use (baseline / this) so that the bar goes up as the value (test duration) goes down. - speed_ratio = baseline_mean / this_config_mean - - # For non-baseline we add the rsd for the config to the rsd - # for the baseline to determine the full variance bounds. - # For just the baseline we don't need to add anything. - speed_rsd = if config == @baseline_config - this_config_rel_stddev_pct - else - this_config_rel_stddev = this_config_rel_stddev_pct / 100.0 # Get ratio, not percent - # Because we are dividing the baseline mean by this mean - # to get a ratio we need to add the variance of each (the - # baseline and this config) to determine the full error bounds. - speed_rel_stddev = Math.sqrt(baseline_rel_stddev * baseline_rel_stddev + this_config_rel_stddev * this_config_rel_stddev) - speed_rel_stddev * 100.0 - end - - @speedup_by_config[config].push [speed_ratio, speed_rsd] - end - - end - - # A benchmark run may well return multiple sets of YJIT stats per benchmark name/type. - # For these calculations we just add all relevant counters together. - this_bench_stats = combined_stats_data_for_benchmarks([benchmark_name]) - - total_exits = total_exit_count(this_bench_stats) - retired_in_yjit = (this_bench_stats["exec_instruction"] || this_bench_stats["yjit_insns_count"]) - total_exits - total_insns_count = retired_in_yjit + this_bench_stats["vm_insns_count"] - yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count - @yjit_ratio.push yjit_ratio_pct - end - end - - def calc_mem_stats_by_config - @peak_mb_by_config = {} - @peak_mb_relative_by_config = {} - @configs_with_human_names.map { |name, config| config }.each do |config| - @peak_mb_by_config[config] = [] - @peak_mb_relative_by_config[config] = [] - end - @mem_overhead_factor_by_benchmark = [] - - @inline_mem_used = [] - @outline_mem_used = [] - - one_mib = 1024 * 1024.0 # As a float - - @benchmark_names.each.with_index do |benchmark_name, idx| - @configs_with_human_names.each do |name, config| - if @peak_mem_by_config[config][benchmark_name].nil? - @peak_mb_by_config[config].push nil - @peak_mb_relative_by_config[config].push [nil, nil] - else - this_config_bytes = mean(@peak_mem_by_config[config][benchmark_name]) - @peak_mb_by_config[config].push(this_config_bytes / one_mib) - end - end - - baseline_mean = @peak_mb_by_config[@baseline_config][-1] - baseline_rsd = rel_stddev(@peak_mem_by_config[@baseline_config][benchmark_name]) - @configs_with_human_names.each do |name, config| - if @peak_mem_by_config[config][benchmark_name].nil? - @peak_mb_relative_by_config[config].push [nil] - else - values = @peak_mem_by_config[config][benchmark_name] - this_config_mean_mb = mean(values) / one_mib - # For baseline use rsd. For other configs we need to add the baseline rsd to this rsd. - # (See comments for speedup calculations). - rsd = if config == @baseline_config - baseline_rsd - else - Math.sqrt(baseline_rsd ** 2 + rel_stddev(values) ** 2) - end - # Use (this / baseline) so that bar goes up as value (mem usage) of *this* goes up. - @peak_mb_relative_by_config[config].push [this_config_mean_mb / baseline_mean, rsd] - end - end - - # Here we use @with_yjit_config and @no_jit_config directly (not @baseline_config) - # to compare the memory difference of yjit vs no_jit on the same version. - - yjit_mem_usage = @peak_mem_by_config[@with_yjit_config][benchmark_name].sum - no_jit_mem_usage = @peak_mem_by_config[@no_jit_config][benchmark_name].sum - @mem_overhead_factor_by_benchmark[idx] = (yjit_mem_usage.to_f / no_jit_mem_usage) - 1.0 - - # Round MiB upward, even with a single byte used, since we crash if the block isn't allocated. - inline_mib = ((@yjit_stats[benchmark_name][0]["inline_code_size"] + (one_mib - 1))/one_mib).to_i - outline_mib = ((@yjit_stats[benchmark_name][0]["outlined_code_size"] + (one_mib - 1))/one_mib).to_i - - @inline_mem_used.push inline_mib - @outline_mem_used.push outline_mib - end - end -end - -# This report is to compare YJIT's speedup versus other Rubies for a single run or block of runs, -# with a single YJIT head-of-master. -class YJITMetrics::SpeedDetailsReport < YJITMetrics::BloggableSingleReport - # This report requires a platform name and can't be auto-instantiated by basic_report.rb. - # Instead, its child report(s) can instantiate it for a specific platform. - #def self.report_name - # "blog_speed_details" - #end - - def self.report_extensions - [ "html", "svg", "head.svg", "back.svg", "micro.svg", "tripwires.json", "csv" ] - end - - def initialize(orig_config_names, results, platform:, benchmarks: []) - # Dumb hack for subclasses until we refactor everything. - return super(orig_config_names, results, benchmarks: benchmarks) unless self.class == YJITMetrics::SpeedDetailsReport - - unless YJITMetrics::PLATFORMS.include?(platform) - raise "Invalid platform for #{self.class.name}: #{platform.inspect}!" - end - @platform = platform - - # Permit non-same-platform stats config - config_names = orig_config_names.select { |name| name.start_with?(platform) || name.include?("yjit_stats") } - raise("Can't find any stats configuration in #{orig_config_names.inspect}!") if config_names.empty? - - # Set up the parent class, look up relevant data - super(config_names, results, benchmarks: benchmarks) - return if @inactive # Can't get stats? Bail out. - - look_up_data_by_ruby - - # Sort benchmarks by headline/micro category, then alphabetically - @benchmark_names.sort_by! { |bench_name| - [ benchmark_category_index(bench_name), - bench_name ] } - - @headings = [ "bench" ] + - @configs_with_human_names.flat_map { |name, config| [ "#{name} (ms)", "#{name} RSD" ] } + - @configs_with_human_names.flat_map { |name, config| config == @baseline_config ? [] : [ "#{name} spd", "#{name} spd RSD" ] } + - [ "% in YJIT" ] - # Col formats are only used when formatting entries for a text table, not for CSV - @col_formats = [ "%s" ] + # Benchmark name - [ "%.1f", "%.2f%%" ] * @configs_with_human_names.size + # Mean and RSD per-Ruby - [ "%.2fx", "%.2f%%" ] * (@configs_with_human_names.size - 1) + # Speedups per-Ruby - [ "%.2f%%" ] # YJIT ratio - - @col_formats[13] = "%.2fx" # Boldface the YJIT speedup column. - - calc_speed_stats_by_config - end - - # Printed to console - def report_table_data - @benchmark_names.map.with_index do |bench_name, idx| - [ bench_name ] + - @configs_with_human_names.flat_map { |name, config| [ @mean_by_config[config][idx], @rsd_pct_by_config[config][idx] ] } + - @configs_with_human_names.flat_map { |name, config| config == @baseline_config ? [] : @speedup_by_config[config][idx] } + - [ @yjit_ratio[idx] ] - end - end - - # Listed on the details page - def details_report_table_data - @benchmark_names.map.with_index do |bench_name, idx| - bench_desc = ( BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:desc] ) || "(no description available)" - if BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:single_file] - bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}.rb" - else - bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}/benchmark.rb" - end - [ "#{bench_name}" ] + - @configs_with_human_names.flat_map { |name, config| [ @mean_by_config[config][idx], @rsd_pct_by_config[config][idx] ] } + - @configs_with_human_names.flat_map { |name, config| config == @baseline_config ? [] : @speedup_by_config[config][idx] } + - [ @yjit_ratio[idx] ] - end - end - - def to_s - # This is just used to print the table to the console - format_as_table(@headings, @col_formats, report_table_data) + - "\nRSD is relative standard deviation (stddev / mean), expressed as a percent.\n" + - "Spd is the speed (iters/second) of the optimised implementation -- 2.0x would be twice as many iters per second.\n" - end - - # For the SVG, we calculate ratios from 0 to 1 for how far across the graph area a coordinate is. - # Then we convert them here to the actual size of the graph. - def ratio_to_x(ratio) - (ratio * 1000).to_s - end - - def ratio_to_y(ratio) - (ratio * 600.0).to_s - end - - def svg_object(relative_values_by_config_and_benchmark, benchmarks: @benchmark_names) - svg = Victor::SVG.new :template => :minimal, - :viewBox => "0 0 1000 600", - :xmlns => "http://www.w3.org/2000/svg", - "xmlns:xlink" => "http://www.w3.org/1999/xlink" # background: '#ddd' - - # Reserve some width on the left for the axis. Include a bit of right-side whitespace. - left_axis_width = 0.05 - right_whitespace = 0.01 - - # Reserve some height for the legend and bottom height for x-axis labels - bottom_key_height = 0.17 - top_whitespace = 0.05 - - plot_left_edge = left_axis_width - plot_top_edge = top_whitespace - plot_bottom_edge = 1.0 - bottom_key_height - plot_width = 1.0 - left_axis_width - right_whitespace - plot_height = 1.0 - bottom_key_height - top_whitespace - plot_right_edge = 1.0 - right_whitespace - - svg.rect x: ratio_to_x(plot_left_edge), y: ratio_to_y(plot_top_edge), - width: ratio_to_x(plot_width), height: ratio_to_y(plot_height), - stroke: Theme.axis_color, - fill: Theme.background_color - - - # Basic info on Ruby configs and benchmarks - ruby_configs = @configs_with_human_names.map { |name, config| config } - ruby_human_names = @configs_with_human_names.map(&:first) - ruby_config_bar_colour = Hash[ruby_configs.zip(Theme.bar_chart_colors)] - baseline_colour = ruby_config_bar_colour[@baseline_config] - baseline_strokewidth = 2 - n_configs = ruby_configs.size - n_benchmarks = benchmarks.size - - - # How high do ratios go? - max_value = benchmarks.map do |bench_name| - bench_idx = @benchmark_names.index(bench_name) - relative_values_by_config_and_benchmark.values.map { |by_bench| by_bench[bench_idx][0] }.compact.max - end.max - - if max_value.nil? - $stderr.puts "Error finding Y axis. Benchmarks: #{benchmarks.inspect}." - $stderr.puts "data: #{relative_values_by_config_and_benchmark.inspect}" - raise "Error finding axis Y scale for benchmarks: #{benchmarks.inspect}" - end - - # Now let's calculate some widths... - - # Within each benchmark's horizontal span we'll want 3 or 4 bars plus a bit of whitespace. - # And we'll reserve 5% of the plot's width for whitespace on the far left and again on the far right. - plot_padding_ratio = 0.05 - plot_effective_width = plot_width * (1.0 - 2 * plot_padding_ratio) - plot_effective_left = plot_left_edge + plot_width * plot_padding_ratio - - # And some heights... - plot_top_whitespace = 0.15 * plot_height - plot_effective_top = plot_top_edge + plot_top_whitespace - plot_effective_height = plot_height - plot_top_whitespace - - # Add axis markers down the left side - tick_length = 0.008 - font_size = "small" - # This is the largest power-of-10 multiple of the no-JIT mean that we'd see on the axis. Often it's 1 (ten to the zero.) - largest_power_of_10 = 10.0 ** Math.log10(max_value).to_i - # Let's get some nice even numbers for possible distances between ticks - candidate_division_values = - [ largest_power_of_10 * 5, largest_power_of_10 * 2, largest_power_of_10, largest_power_of_10 / 2, largest_power_of_10 / 5, - largest_power_of_10 / 10, largest_power_of_10 / 20 ] - # We'll try to show between about 4 and 10 ticks along the axis, at nice even-numbered spots. - division_value = candidate_division_values.detect do |div_value| - divs_shown = (max_value / div_value).to_i - divs_shown >= 4 && divs_shown <= 10 - end - raise "Error figuring out axis scale with max ratio: #{max_value.inspect} (pow10: #{largest_power_of_10.inspect})!" if division_value.nil? - division_ratio_per_value = plot_effective_height / max_value - - # Now find all the y-axis tick locations - divisions = [] - cur_div = 0.0 - loop do - divisions.push cur_div - cur_div += division_value - break if cur_div > max_value - end - - divisions.each do |div_value| - tick_distance_from_zero = div_value / max_value - tick_y = plot_effective_top + (1.0 - tick_distance_from_zero) * plot_effective_height - svg.line x1: ratio_to_x(plot_left_edge - tick_length), y1: ratio_to_y(tick_y), - x2: ratio_to_x(plot_left_edge), y2: ratio_to_y(tick_y), - stroke: Theme.axis_color - svg.text ("%.1f" % div_value), - x: ratio_to_x(plot_left_edge - 3 * tick_length), y: ratio_to_y(tick_y), - text_anchor: "end", - font_weight: "bold", - font_size: font_size, - fill: Theme.text_color - end - - # Set up the top legend with coloured boxes and Ruby config names - top_legend_box_height = 0.032 - top_legend_box_width = 0.12 - text_height = 0.015 - - top_legend_item_width = plot_effective_width / n_configs - n_configs.times do |config_idx| - item_center_x = plot_effective_left + top_legend_item_width * (config_idx + 0.5) - item_center_y = plot_top_edge + 0.025 - legend_text_color = Theme.text_on_bar_color - if @configs_with_human_names[config_idx][1] == @baseline_config - legend_text_color = Theme.axis_color - left = item_center_x - 0.5 * top_legend_box_width - y = item_center_y - 0.5 * top_legend_box_height + top_legend_box_height - svg.line \ - x1: ratio_to_x(left), - y1: ratio_to_y(y), - x2: ratio_to_x(left + top_legend_box_width), - y2: ratio_to_y(y), - stroke: baseline_colour, - "stroke-width": 2 - else - svg.rect \ - x: ratio_to_x(item_center_x - 0.5 * top_legend_box_width), - y: ratio_to_y(item_center_y - 0.5 * top_legend_box_height), - width: ratio_to_x(top_legend_box_width), - height: ratio_to_y(top_legend_box_height), - fill: ruby_config_bar_colour[ruby_configs[config_idx]], - **Theme.legend_box_attrs - end - svg.text @configs_with_human_names[config_idx][0], - x: ratio_to_x(item_center_x), - y: ratio_to_y(item_center_y + 0.5 * text_height), - font_size: font_size, - text_anchor: "middle", - font_weight: "bold", - fill: legend_text_color, - **(legend_text_color == Theme.text_on_bar_color ? Theme.legend_text_attrs : {}) - end - - baseline_y = plot_effective_top + (1.0 - (1.0 / max_value)) * plot_effective_height - - bar_data = [] - - # Okay. Now let's plot a lot of boxes and whiskers. - benchmarks.each.with_index do |bench_name, bench_short_idx| - bar_data << {label: bench_name.delete_suffix('.rb'), bars: []} - bench_idx = @benchmark_names.index(bench_name) - - ruby_configs.each.with_index do |config, config_idx| - human_name = ruby_human_names[config_idx] - - relative_value, rsd_pct = relative_values_by_config_and_benchmark[config][bench_idx] - - if config == @baseline_config - # Sanity check. - raise "Unexpected relative value for baseline config" if relative_value != 1.0 - end - - # If relative_value is nil, there's no such benchmark in this specific case. - if relative_value != nil - rsd_ratio = rsd_pct / 100.0 - bar_height_ratio = relative_value / max_value - - # The calculated number is rel stddev and is scaled by bar height. - stddev_ratio = bar_height_ratio * rsd_ratio - - tooltip_text = "#{"%.2f" % relative_value}x baseline (#{human_name})" - - if config == @baseline_config - next - end - - bar_data.last[:bars] << { - value: bar_height_ratio, - fill: ruby_config_bar_colour[config], - label: sprintf("%.2f", relative_value), - tooltip: tooltip_text, - stddev_ratio: stddev_ratio, - } - end - end - end - - geomeans = ruby_configs.each_with_object({}) do |config, h| - next unless relative_values_by_config_and_benchmark[config] - values = benchmarks.map { |bench| relative_values_by_config_and_benchmark[config][ @benchmark_names.index(bench) ]&.first }.compact - h[config] = geomean(values) - end - - bar_data << { - label: "geomean*", - label_attrs: {font_style: "italic"}, - bars: ruby_configs.map.with_index do |config, index| - next if config == @baseline_config - value = geomeans[config] - { - value: value / max_value, - fill: ruby_config_bar_colour[config], - label: sprintf("%.2f", value), - tooltip: sprintf("%.2fx baseline (%s)", value, ruby_human_names[index]), - } - end.compact, - } - - # Determine bar width by counting the bars and adding the number of groups - # for bar-sized space before each group, plus one for the right side of the graph. - num_groups = bar_data.size - bar_width = plot_width / (num_groups + bar_data.map { |x| x[:bars].size }.sum + 1) - - bar_labels = [] - - # Start at the y-axis. - left = plot_left_edge - bar_data.each.with_index do |data, group_index| - data[:bars].each.with_index do |bar, bar_index| - # Move position one width over to place this bar. - left += bar_width - - bar_left = left - bar_center = bar_left + 0.5 * bar_width - bar_right = bar_left + bar_width - bar_top = plot_effective_top + (1.0 - bar[:value]) * plot_effective_height - bar_height = bar[:value] * plot_effective_height - - svg.rect \ - x: ratio_to_x(bar_left), - y: ratio_to_y(bar_top), - width: ratio_to_x(bar_width), - height: ratio_to_y(bar_height), - fill: bar[:fill], - data_tooltip: bar[:tooltip] - - if bar[:label] - bar_labels << { - x: bar_left + 0.002, - y: bar_top - 0.0125, - text: bar[:label], - } - end - - if bar[:stddev_ratio]&.nonzero? - # Whiskers should be centered around the top of the bar, at a distance of one stddev. - stddev_top = bar_top - bar[:stddev_ratio] * plot_effective_height - stddev_bottom = bar_top + bar[:stddev_ratio] * plot_effective_height - - svg.line \ - x1: ratio_to_x(bar_left), - y1: ratio_to_y(stddev_top), - x2: ratio_to_x(bar_right), - y2: ratio_to_y(stddev_top), - **Theme.stddev_marker_attrs - svg.line \ - x1: ratio_to_x(bar_left), - y1: ratio_to_y(stddev_bottom), - x2: ratio_to_x(bar_right), - y2: ratio_to_y(stddev_bottom), - **Theme.stddev_marker_attrs - svg.line \ - x1: ratio_to_x(bar_center), - y1: ratio_to_y(stddev_top), - x2: ratio_to_x(bar_center), - y2: ratio_to_y(stddev_bottom), - **Theme.stddev_marker_attrs - end - end - - # Place a tick on the x-axis in the middle of the group and print label. - group_right = left + bar_width - group_left = (group_right - (bar_width * data[:bars].size)) - middle = group_left + (group_right - group_left) / 2 - svg.line \ - x1: ratio_to_x(middle), - y1: ratio_to_y(plot_bottom_edge), - x2: ratio_to_x(middle), - y2: ratio_to_y(plot_bottom_edge + tick_length), - stroke: Theme.axis_color - - text_end_x = middle - text_end_y = plot_bottom_edge + tick_length * 3 - svg.text data[:label], - x: ratio_to_x(text_end_x), - y: ratio_to_y(text_end_y), - fill: Theme.text_color, - font_size: font_size, - text_anchor: "end", - transform: "rotate(-60, #{ratio_to_x(text_end_x)}, #{ratio_to_y(text_end_y)})", - **data.fetch(:label_attrs, {}) - - # After a group of bars leave the space of one bar width before the next group. - left += bar_width - end - - # Horizontal line for baseline of CRuby at 1.0. - svg.line x1: ratio_to_x(plot_left_edge), y1: ratio_to_y(baseline_y), x2: ratio_to_x(plot_right_edge), y2: ratio_to_y(baseline_y), stroke: baseline_colour, "stroke-width": baseline_strokewidth - - # Do value labels last so that they are above bars, variance whiskers, etc. - bar_labels.each do |label| - font_size = "0.5em" # xx-small is equivalent to 9px or 0.5625em at the default browser font size. - label_text_height = text_height * 0.8 - text_length = 0.0175 - transform = "rotate(-60, #{ratio_to_x(label[:x] + (bar_width * 0.5))}, #{ratio_to_y(label[:y])})" - - svg.rect \ - x: ratio_to_x(label[:x] - text_length * 0.01), - y: ratio_to_y(label[:y] - 0.925 * label_text_height), - width: ratio_to_x(text_length * 1.02), - height: ratio_to_y(label_text_height), - transform: transform, - **Theme.bar_text_background_attrs - - svg.text label[:text], - x: ratio_to_x(label[:x]), - y: ratio_to_y(label[:y]), - fill: Theme.text_color, - font_size: font_size, - text_anchor: "start", - textLength: ratio_to_x(text_length), - transform: transform, - **Theme.bar_text_attrs - end - - svg - end - - def tripwires - tripwires = {} - micro = micro_benchmarks - @benchmark_names.each_with_index do |bench_name, idx| - tripwires[bench_name] = { - mean: @mean_by_config[@with_yjit_config][idx], - rsd_pct: @rsd_pct_by_config[@with_yjit_config][idx], - micro: micro.include?(bench_name), - } - end - tripwires - end - - def html_template_path - File.expand_path("../report_templates/blog_speed_details.html.erb", __dir__) - end - - def relative_values_by_config_and_benchmark - @speedup_by_config - end - - def write_file(filename) - if @inactive - # Can't get stats? Write an empty file. - self.class.report_extensions.each do |ext| - File.open(filename + ".#{@platform}.#{ext}", "w") { |f| f.write("") } - end - return - end - - require "victor" - - head_bench = headline_benchmarks - micro_bench = micro_benchmarks - back_bench = @benchmark_names - head_bench - micro_bench - - if head_bench.empty? - puts "Warning: when writing file #{filename.inspect}, headlining benchmark list is empty!" - end - if micro_bench.empty? - puts "Warning: when writing file #{filename.inspect}, micro benchmark list is empty!" - end - if back_bench.empty? - puts "Warning: when writing file #{filename.inspect}, miscellaneous benchmark list is empty!" - end - - [ - [ @benchmark_names, ".svg" ], - [ head_bench, ".head.svg" ], - [ micro_bench, ".micro.svg" ], - [ back_bench, ".back.svg" ], - ].each do |bench_names, extension| - if bench_names.empty? - contents = "" - else - contents = svg_object(relative_values_by_config_and_benchmark, benchmarks: bench_names).render - end - - File.open(filename + "." + @platform + extension, "w") { |f| f.write(contents) } - end - - # First the 'regular' details report, with tables and text descriptions - script_template = ERB.new File.read(html_template_path) - html_output = script_template.result(binding) - File.open(filename + ".#{@platform}.html", "w") { |f| f.write(html_output) } - - # The Tripwire report is used to tell when benchmark performance drops suddenly - json_data = tripwires - File.open(filename + ".#{@platform}.tripwires.json", "w") { |f| f.write JSON.pretty_generate json_data } - - write_to_csv(filename + ".#{@platform}.csv", [@headings] + report_table_data) - end -end - -class YJITMetrics::SpeedDetailsMultiplatformReport < YJITMetrics::Report - def self.report_name - "blog_speed_details" - end - - def self.single_report_class - ::YJITMetrics::SpeedDetailsReport - end - - # Report-extensions tries to be data-agnostic. That doesn't work very well here. - # It turns out that the platforms in the result set determine a lot of the - # files we generate. So we approximate by generating (sometimes-empty) indicator - # files. That way we still rebuild all the platform-specific files if they have - # been removed or a new type is added. - def self.report_extensions - single_report_class.report_extensions - end - - def initialize(config_names, results, benchmarks: []) - # We need to instantiate N sub-reports for N platforms - @platforms = results.platforms - @sub_reports = {} - @platforms.each do |platform| - platform_config_names = config_names.select { |name| name.start_with?(platform) } - - # If we can't find a config with stats for this platform, is there one in x86_64? - unless platform_config_names.detect { |config| config.include?("yjit_stats") } - x86_stats_config = config_names.detect { |config| config.start_with?("x86_64") && config.include?("yjit_stats") } - puts "Can't find #{platform} stats config, falling back to using x86_64 stats" - platform_config_names << x86_stats_config if x86_stats_config - end - - raise("Can't find a stats config for this platform in #{config_names.inspect}!") if platform_config_names.empty? - @sub_reports[platform] = self.class.single_report_class.new(platform_config_names, results, platform: platform, benchmarks: benchmarks) - if @sub_reports[platform].inactive - puts "Platform config names: #{platform_config_names.inspect}" - puts "All config names: #{config_names.inspect}" - raise "Unable to produce stats-capable report for platform #{platform.inspect} in SpeedDetailsMultiplatformReport!" - end - end - end - - def write_file(filename) - # First, write out per-platform reports - @sub_reports.values.each do |report| - # Each sub-report will add the platform name for itself - report.write_file(filename) - end - - # extensions: - - # For each of these types, we'll just include for each platform and we can switch display - # in the Jekyll site. They exist, but there's no combined multiplatform version. - # We'll create an empty 'tracker' file for the combined version. - self.class.report_extensions.each do |ext| - outfile = "#{filename}.#{ext}" - File.open(outfile, "w") { |f| f.write("") } - end - end -end - -# This report is to compare YJIT's memory usage versus other Rubies for a single run or block of runs, -# with a single YJIT head-of-master. -class YJITMetrics::MemoryDetailsReport < YJITMetrics::SpeedDetailsReport - # This report requires a platform name and can't be auto-instantiated by basic_report.rb. - # Instead, its child report(s) can instantiate it for a specific platform. - #def self.report_name - # "blog_memory_details" - #end - - def self.report_extensions - [ "html", "svg", "head.svg", "back.svg", "micro.svg", "tripwires.json", "csv" ] - end - - def initialize(config_names, results, platform:, benchmarks: []) - unless YJITMetrics::PLATFORMS.include?(platform) - raise "Invalid platform for #{self.class.name}: #{platform.inspect}!" - end - @platform = platform - - # Set up the parent class, look up relevant data - # Permit non-same-platform stats config - config_names = config_names.select { |name| name.start_with?(platform) || name.include?("yjit_stats") } - # FIXME: Drop the platform: platform when we stop inheriting from SpeedDetailsReport. - super(config_names, results, platform: platform, benchmarks: benchmarks) - return if @inactive - - look_up_data_by_ruby - - # Sort benchmarks by headline/micro category, then alphabetically - @benchmark_names.sort_by! { |bench_name| - [ benchmark_category_index(bench_name), - #-@yjit_stats[bench_name][0]["compiled_iseq_count"], - bench_name ] } - - @headings = [ "bench" ] + - @configs_with_human_names.map { |name, config| "#{name} mem (MiB)"} + - [ "Inline Code", "Outlined Code", "YJIT Mem overhead" ] - #@configs_with_human_names.flat_map { |name, config| config == @baseline_config ? [] : [ "#{name} mem ratio" ] } - # Col formats are only used when formatting entries for a text table, not for CSV - @col_formats = [ "%s" ] + # Benchmark name - [ "%d" ] * @configs_with_human_names.size + # Mem usage per-Ruby - [ "%d", "%d", "%.1f%%" ] # YJIT mem breakdown - #[ "%.2fx" ] * (@configs_with_human_names.size - 1) # Mem ratio per-Ruby - - calc_mem_stats_by_config - end - - # Printed to console - def report_table_data - @benchmark_names.map.with_index do |bench_name, idx| - [ bench_name ] + - @configs_with_human_names.map { |name, config| @peak_mb_by_config[config][idx] } + - [ @inline_mem_used[idx], @outline_mem_used[idx] ] - #[ "#{"%d" % (@peak_mb_by_config[@with_yjit_config][idx] - 256)} + #{@inline_mem_used[idx]}/128 + #{@outline_mem_used[idx]}/128" ] - end - end - - # Listed on the details page - def details_report_table_data - @benchmark_names.map.with_index do |bench_name, idx| - bench_desc = ( BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:desc] ) || "(no description available)" - if BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:single_file] - bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}.rb" - else - bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}/benchmark.rb" - end - [ "#{bench_name}" ] + - @configs_with_human_names.map { |name, config| @peak_mb_by_config[config][idx] } + - [ @inline_mem_used[idx], @outline_mem_used[idx], @mem_overhead_factor_by_benchmark[idx] * 100.0 ] - #[ "#{"%d" % (@peak_mb_by_config[@with_yjit_config][idx] - 256)} + #{@inline_mem_used[idx]}/128 + #{@outline_mem_used[idx]}/128" ] - end - end - - def to_s - # This is just used to print the table to the console - format_as_table(@headings, @col_formats, report_table_data) + - "\nMemory usage is in MiB (mebibytes,) rounded. Ratio is versus interpreted baseline CRuby.\n" - end - - def html_template_path - File.expand_path("../report_templates/blog_memory_details.html.erb", __dir__) - end - - def relative_values_by_config_and_benchmark - @peak_mb_relative_by_config - end - - # FIXME: We aren't reporting on the tripwires currently, but it makes sense to implement it and report on it. - def tripwires - {} - end -end - -class YJITMetrics::MemoryDetailsMultiplatformReport < YJITMetrics::SpeedDetailsMultiplatformReport - def self.report_name - "blog_memory_details" - end - - def self.single_report_class - ::YJITMetrics::MemoryDetailsReport - end -end - -# Count up number of iterations and warmups for each Ruby and benchmark configuration. -# As we vary these, we need to make sure people can see what settings we're using for each Ruby. -class YJITMetrics::IterationCountReport < YJITMetrics::BloggableSingleReport - def self.report_name - "iteration_count" - end - - def self.report_extensions - ["html"] - end - - def initialize(config_names, results, benchmarks: []) - # This report will only work with one platform at - # a time, so if we have yjit_stats for x86 prefer that one. - platform = "x86_64" - if results.configs_containing_full_yjit_stats.any? { |c| c.start_with?(platform) } - config_names = config_names.select { |c| c.start_with?(platform) } - else - platform = results.platforms.first - end - - # Set up the parent class, look up relevant data - super - - return if @inactive - - # This report can just run with one platform's data and everything's fine. - # The iteration counts should be identical on other platforms. - look_up_data_by_ruby only_platforms: [platform] - - # Sort benchmarks by headline/micro category, then alphabetically - @benchmark_names.sort_by! { |bench_name| - [ benchmark_category_index(bench_name), - bench_name ] } - - @headings = [ "bench" ] + - @configs_with_human_names.flat_map { |name, config| [ "#{name} warmups", "#{name} iters" ] } - # Col formats are only used when formatting entries for a text table, not for CSV - @col_formats = [ "%s" ] + # Benchmark name - [ "%d", "%d" ] * @configs_with_human_names.size # Iterations per-Ruby-config - end - - # Listed on the details page - def iterations_report_table_data - @benchmark_names.map do |bench_name| - [ bench_name ] + - @configs_with_human_names.flat_map do |_, config| - if @times_by_config[config][bench_name] - [ - @warmups_by_config[config][bench_name].size, - @times_by_config[config][bench_name].size, - ] - else - # If we didn't run this benchmark for this config, we'd like the columns to be blank. - [ nil, nil ] - end - end - end - end - - def write_file(filename) - if @inactive - # Can't get stats? Write an empty file. - self.class.report_extensions.each do |ext| - File.open(filename + ".#{ext}", "w") { |f| f.write("") } - end - return - end - - # Memory details report, with tables and text descriptions - script_template = ERB.new File.read(__dir__ + "/../report_templates/iteration_count.html.erb") - html_output = script_template.result(binding) - File.open(filename + ".html", "w") { |f| f.write(html_output) } - end -end - - -# This report is to compare YJIT's speedup versus other Rubies for a single run or block of runs, -# with a single YJIT head-of-master. -class YJITMetrics::BlogYJITStatsReport < YJITMetrics::BloggableSingleReport - def self.report_name - "blog_yjit_stats" - end - - def self.report_extensions - ["html"] - end - - def set_extra_info(info) - super - - if info[:timestamps] - @timestamps = info[:timestamps] - if @timestamps.size != 1 - raise "WE REQUIRE A SINGLE TIMESTAMP FOR THIS REPORT RIGHT NOW!" - end - @timestamp_str = @timestamps[0].strftime("%Y-%m-%d-%H%M%S") - end - end - - def initialize(config_names, results, benchmarks: []) - # Set up the parent class, look up relevant data - super - return if @inactive - - # This report can just run with one platform's data and everything's fine. - # The stats data should be basically identical on other platforms. - look_up_data_by_ruby only_platforms: results.platforms[0] - - # Sort benchmarks by headline/micro category, then alphabetically - @benchmark_names.sort_by! { |bench_name| - [ benchmark_category_index(bench_name), - bench_name ] } - - @headings_with_tooltips = { - "bench" => "Benchmark name", - "Exit Report" => "Link to a generated YJIT-stats-style exit report", - "Inline" => "Bytes of inlined code generated", - "Outlined" => "Bytes of outlined code generated", - "Comp iSeqs" => "Number of compiled iSeqs (methods)", - "Comp Blocks" => "Number of compiled blocks", - "Inval" => "Number of methods or blocks invalidated", - "Inval Ratio" => "Number of blocks invalidated over number of blocks compiled", - "Bind Alloc" => "Number of Ruby bindings allocated", - "Bind Set" => "Number of variables set via bindings", - "Const Bumps" => "Number of times Ruby clears its internal constant cache", - } - - # Col formats are only used when formatting entries for a text table, not for CSV - @col_formats = @headings_with_tooltips.keys.map { "%s" } - end - - # Listed on the details page - def details_report_table_data - @benchmark_names.map.with_index do |bench_name, idx| - bench_desc = ( BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:desc] ) || "(no description available)" - bench_desc = bench_desc.gsub('"' , """) - if BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:single_file] - bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}.rb" - else - bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}/benchmark.rb" - end - - exit_report_url = "/reports/benchmarks/blog_exit_reports_#{@timestamp_str}.#{bench_name}.txt" - - bench_stats = @yjit_stats[bench_name][0] - - fmt_inval_ratio = "?" - if bench_stats["invalidation_count"] && bench_stats["compiled_block_count"] - inval_ratio = bench_stats["invalidation_count"].to_f / bench_stats["compiled_block_count"] - fmt_inval_ratio = "%d%%" % (inval_ratio * 100.0).to_i - end - - [ "#{bench_name}", - "(click)", - bench_stats["inline_code_size"], - bench_stats["outlined_code_size"], - bench_stats["compiled_iseq_count"], - bench_stats["compiled_block_count"], - bench_stats["invalidation_count"], - fmt_inval_ratio, - bench_stats["binding_allocations"], - bench_stats["binding_set"], - bench_stats["constant_state_bumps"], - ] - - end - end - - def write_file(filename) - if @inactive - # Can't get stats? Write an empty file. - self.class.report_extensions.each do |ext| - File.open(filename + ".#{ext}", "w") { |f| f.write("") } - end - return - end - - # Memory details report, with tables and text descriptions - script_template = ERB.new File.read(__dir__ + "/../report_templates/blog_yjit_stats.html.erb") - html_output = script_template.result(binding) - File.open(filename + ".html", "w") { |f| f.write(html_output) } - end - -end - -class BlogStatsExitReports < YJITMetrics::BloggableSingleReport - def self.report_name - "blog_exit_reports" - end - - def self.report_extensions - ["bench_list.txt"] - end - - def write_file(filename) - if @inactive - # Can't get stats? Write an empty file. - self.class.report_extensions.each do |ext| - File.open(filename + ".#{ext}", "w") { |f| f.write("") } - end - return - end - - @benchmark_names.each do |bench_name| - File.open("#{filename}.#{bench_name}.txt", "w") { |f| f.puts exit_report_for_benchmarks([bench_name]) } - end - - # This is a file with a known name that we can look for when generating. - File.open("#{filename}.bench_list.txt", "w") { |f| f.puts @benchmark_names.join("\n") } - end -end - -# This very small report is to give the quick headlines and summary for a YJIT comparison. -class YJITMetrics::SpeedHeadlineReport < YJITMetrics::BloggableSingleReport - def self.report_name - "blog_speed_headline" - end - - def self.report_extensions - ["html"] - end - - def format_speedup(ratio) - if ratio >= 1.01 - "%.1f%% faster than" % ((ratio - 1.0) * 100) - elsif ratio < 0.99 - "%.1f%% slower than" % ((1.0 - ratio) * 100) - else - "the same speed as" - end - end - - def platforms - @result_set.platforms - end - - def yjit_bench_file_url(path) - "https://github.com/Shopify/yjit-bench/blob/#{@result_set.full_run_info&.dig("git_versions", "yjit_bench") || "main"}/#{path}" - end - - def ruby_version(config) - @result_set.ruby_version_for_config(config) - end - - X86_ONLY = ENV['ALLOW_ARM_ONLY_REPORTS'] != '1' - - def initialize(config_names, results, benchmarks: []) - # Give the headline data for x86 processors, not ARM64. - # No x86 data? Then no headline. - x86_configs = config_names.select { |name| name.include?("x86_64") } - if x86_configs.empty? - if X86_ONLY - @no_data = true - puts "WARNING: no x86_64 data for data: #{config_names.inspect}" - return - end - else - config_names = x86_configs - end - - # Set up the parent class, look up relevant data - super - return if @inactive # Can't get stats? Bail out. - - platform = "x86_64" - if !X86_ONLY && !results.platforms.include?(platform) - platform = results.platforms[0] - end - look_up_data_by_ruby(only_platforms: [platform]) - - # Report the headlining speed comparisons versus current prerelease MJIT if available, or fall back to MJIT - if @mjit_is_incomplete - @with_mjit_config = @with_mjit30_config - else - @with_mjit_config = @with_mjit_latest_config || @with_mjit30_config - end - @mjit_name = "MJIT" - @mjit_name = "MJIT (3.0)" if @with_mjit_config == @with_mjit30_config - - # Sort benchmarks by headline/micro category, then alphabetically - @benchmark_names.sort_by! { |bench_name| - [ benchmark_category_index(bench_name), - #-@yjit_stats[bench_name][0]["compiled_iseq_count"], - bench_name ] } - - calc_speed_stats_by_config - - # For these ratios we compare current yjit and no_jit directly (not @baseline_config). - - # "Ratio of total times" method - #@yjit_vs_cruby_ratio = @total_time_by_config[@no_jit_config] / @total_time_by_config[@with_yjit_config] - - headline_runtimes = headline_benchmarks.map do |bench_name| - bench_idx = @benchmark_names.index(bench_name) - - bench_no_jit_mean = @mean_by_config[@no_jit_config][bench_idx] - bench_yjit_mean = @mean_by_config[@with_yjit_config][bench_idx] - prev_yjit_mean = @mean_by_config.dig(@prev_yjit_config, bench_idx) - - [ bench_yjit_mean, bench_no_jit_mean, prev_yjit_mean ] - end - # Geometric mean of headlining benchmarks only - @yjit_vs_cruby_ratio = geomean headline_runtimes.map { |yjit_mean, no_jit_mean, _| no_jit_mean / yjit_mean } - - if @prev_yjit_config - @yjit_vs_prev_yjit_ratio = geomean headline_runtimes.map { |yjit_mean, _, prev_yjit| prev_yjit / yjit_mean } - end - - @railsbench_idx = @benchmark_names.index("railsbench") - if @railsbench_idx - @yjit_vs_cruby_railsbench_ratio = @mean_by_config[@no_jit_config][@railsbench_idx] / @mean_by_config[@with_yjit_config][@railsbench_idx] - @yjit_vs_prev_yjit_railsbench_ratio = @mean_by_config[@prev_yjit_config][@railsbench_idx] / @mean_by_config[@with_yjit_config][@railsbench_idx] - end - end - - def to_s - return "(This run had no x86 results)" if @no_data - script_template = ERB.new File.read(__dir__ + "/../report_templates/blog_speed_headline.html.erb") - script_template.result(binding) # Evaluate an Erb template with template_settings - end - - def write_file(filename) - if @inactive || @no_data - # Can't get stats? Write an empty file. - self.class.report_extensions.each do |ext| - File.open(filename + ".#{ext}", "w") { |f| f.write("") } - end - return - end - - html_output = self.to_s - File.open(filename + ".html", "w") { |f| f.write(html_output) } - end -end diff --git a/lib/yjit-metrics/report_types/per_bench_ruby_compare_report.rb b/lib/yjit-metrics/report_types/per_bench_ruby_compare_report.rb deleted file mode 100644 index f63bf10a9..000000000 --- a/lib/yjit-metrics/report_types/per_bench_ruby_compare_report.rb +++ /dev/null @@ -1,78 +0,0 @@ -# We'd like to be able to create a quick columnar report, often for one -# Ruby config versus another, and load/dump it as JSON or CSV. This isn't a -# report class that is all things to all people -- it's specifically -# a comparison of two or more configurations per-benchmark for yjit-bench. -# -# The first configuration given is assumed to be the baseline against -# which the other configs are measured. -class YJITMetrics::PerBenchRubyComparison < YJITMetrics::Report - def self.report_name - "per_bench_compare" - end - - def initialize(config_names, results, benchmarks: []) - super - - @headings = [ "bench" ] + config_names.flat_map { |config| [ "#{config}", "RSD" ] } + alt_configs.map { |config| "#{config}/#{base_config}" } - @col_formats = [ "%s" ] + config_names.flat_map { [ "%.1fms", "%.1f%%" ] } + alt_configs.map { "%.2f" } - - @report_data = [] - times_by_config = {} - config_names.each { |config| times_by_config[config] = results.times_for_config_by_benchmark(config) } - - benchmark_names = times_by_config[config_names[0]].keys - - times_by_config.each do |config_name, results| - raise("No results for configuration #{config_name.inspect} in PerBenchRubyComparison!") if results.nil? - end - - benchmark_names.each do |benchmark_name| - # Only run benchmarks if there is no list of "only run these" benchmarks, or if the benchmark name starts with one of the list elements - unless @only_benchmarks.empty? - next unless @only_benchmarks.any? { |bench_spec| benchmark_name.start_with?(bench_spec) } - end - row = [ benchmark_name ] - config_names.each do |config| - unless times_by_config[config][benchmark_name] - raise("Configuration #{config.inspect} has no results for #{benchmark_name.inspect} even though #{config_names[0]} does in the same dataset!") - end - config_times = times_by_config[config][benchmark_name] - config_mean = mean(config_times) - row.push config_mean - row.push 100.0 * stddev(config_times) / config_mean - end - - base_config_mean = mean(times_by_config[base_config][benchmark_name]) - alt_configs.each do |config| - config_mean = mean(times_by_config[config][benchmark_name]) - row.push config_mean / base_config_mean - end - - @report_data.push row - end - end - - def base_config - @config_names[0] - end - - def alt_configs - @config_names[1..-1] - end - - def to_s - format_as_table(@headings, @col_formats, @report_data) + config_legend_text - end - - def config_legend_text - [ - "", - "Legend:", - alt_configs.map do |config| - "- #{config}/#{base_config}: ratio of mean(#{config} times)/mean(#{base_config} times). >1 means #{base_config} is faster." - end, - "RSD is relative standard deviation (percent).", - "" - ].join("\n") - end -end diff --git a/lib/yjit-metrics/report_types/variable_warmup_report.rb b/lib/yjit-metrics/report_types/variable_warmup_report.rb deleted file mode 100644 index 373caca95..000000000 --- a/lib/yjit-metrics/report_types/variable_warmup_report.rb +++ /dev/null @@ -1,181 +0,0 @@ -# And here is where we get into... cleverness :-/ - -# This report intends to look over the most recent results for a specific benchmark and Ruby configuration -# and determine how much warmup is really required or useful. Where possible we should be a bit conservative -# and run additional warmups, and we should check to see if we might be inadequately warming up a particular -# combination. - -# We don't want to let warmup or number of iterations get so high that we run over the GitHub Actions -# maximum job duration. - -class YJITMetrics::VariableWarmupReport < YJITMetrics::Report - def self.report_name - "variable_warmup" - end - - def self.report_extensions - "warmup_settings.json" - end - - # The internal state of these is huge - reduce the size of debug output when calling a bad - # method... - def inspect - "VariableWarmupReport<#{self.object_id}>" - end - - CORRELATION_THRESHOLD = 0.1 - - def look_up_data_by_ruby - # Order matters here - we push No-JIT, then MJIT(s), then YJIT. For each one we sort by platform name. - # It matters because we want the output reports to be stable with no churn in Git. - bench_configs = YJITMetrics::DEFAULT_YJIT_BENCH_CI_SETTINGS["configs"] - configs = @result_set.config_names - config_order = [] - config_order += configs.select { |c| c["prev_ruby_no_jit"] }.sort # optional - config_order += configs.select { |c| c["prod_ruby_no_jit"] }.sort - config_order += configs.select { |c| c["prod_ruby_with_mjit"] }.sort # MJIT is optional, may be empty - config_order += configs.select { |c| c["prev_ruby_yjit"] }.sort # optional - config_order += configs.select { |c| c["prod_ruby_with_yjit"] }.sort - config_order += configs.select { |c| c["yjit_stats"] }.sort # Stats configs *also* take time to run - @configs_with_human_names = @result_set.configs_with_human_names(config_order) - - # Grab relevant data from the ResultSet - @warmups_by_config = {} - @times_by_config = {} - @iters_by_config = {} - @ruby_metadata_by_config = {} - @bench_metadata_by_config = {} - - @configs_with_human_names.map { |name, config| config }.each do |config| - @warmups_by_config[config] = @result_set.warmups_for_config_by_benchmark(config, in_runs: true) - @times_by_config[config] = @result_set.times_for_config_by_benchmark(config, in_runs: true) - - @warmups_by_config[config].keys.each do |bench_name| - @iters_by_config[config] ||= {} - # For each run, add its warmups to its timed iterations in a single array. - runs = @warmups_by_config[config][bench_name].zip(@times_by_config[config][bench_name]).map { |a, b| a + b } - @iters_by_config[config][bench_name] = runs - end - - @ruby_metadata_by_config[config] = @result_set.metadata_for_config(config) - @bench_metadata_by_config[config] = @result_set.benchmark_metadata_for_config_by_benchmark(config) - end - - all_bench_names = @times_by_config[config_order[-1]].keys - @benchmark_names = filter_benchmark_names(all_bench_names) - - @times_by_config.each do |config_name, config_results| - if config_results.nil? || config_results.empty? - raise("No results for configuration #{config_name.inspect} in #{self.class}!") - end - end - end - - def initialize(config_names, results, - default_yjit_bench_settings: ::YJITMetrics::DEFAULT_YJIT_BENCH_CI_SETTINGS, benchmarks: []) - - # Set up the parent class, look up relevant data - super(config_names, results, benchmarks: benchmarks) - - @default_yjit_bench_settings = default_yjit_bench_settings - - look_up_data_by_ruby - end - - # Figure out how many iterations, warmup and non-, for each Ruby config and benchmark - def iterations_for_configs_and_benchmarks(default_settings) - # Note: default_configs are config *roots*, not full configurations - default_configs = default_settings["configs"].keys.sort - - warmup_settings = default_configs.to_h do |config| - [ config, @benchmark_names.to_h do |bench_name| - [ bench_name, - { - # Conservative defaults - sometimes these are for Ruby configs we know nothing about, - # because they're not present in recent-at-the-time benchmark data. - warmup_itrs: default_settings["min_warmup_itrs"], - min_bench_itrs: default_settings["min_bench_itrs"], - min_bench_time: 0, - } - ] - end - ] - end - - @benchmark_names.each do |bench_name| - idx = @benchmark_names.index(bench_name) - - # Number of iterations is chosen per-benchmark, but stays the same across all configs. - # Find the fastest mean iteration across all configs. - summary = @result_set.summary_by_config_and_benchmark - fastest_itr_time_ms = default_configs.map do |config| - summary.dig(config, bench_name, "mean") - end.compact.min || 10_000_000.0 - - min_itrs_needed = (default_settings["min_bench_time"] * 1000.0 / fastest_itr_time_ms).to_i - min_itrs_needed = [ min_itrs_needed, default_settings["min_bench_itrs"] ].max - - default_configs.each do |config| - config_settings = default_settings["configs"][config] - - itr_time_ms = summary.dig(config, bench_name, "mean") - ws = warmup_settings[config][bench_name] - raise "No warmup settings found for #{config.inspect}/#{bench_name.inspect}!" if ws.nil? - - ws[:min_bench_itrs] = min_itrs_needed - - # Do we have an estimate of how long this takes per iteration? If so, include it. - ws[:itr_time_ms] = ("%.2f" % [ws[:itr_time_ms], itr_time_ms].compact.max) unless itr_time_ms.nil? - - # Warmup is chosen per-config to reduce unneeded warmup for low-warmup configs - ws[:warmup_itrs] = config_settings[:max_warmup_itrs] - if config_settings[:max_warmup_time] && itr_time_ms - # itr_time_ms is in milliseconds, while max_warmup_time is in seconds - max_allowed_warmup = config_settings[:max_warmup_time] * 1000.0 / itr_time_ms - # Choose the tighter of the two warmup limits - ws[:warmup_itrs] = max_allowed_warmup if ws[:warmup_itrs] > max_allowed_warmup - end - - if itr_time_ms - itrs = ws[:warmup_itrs] + ws[:min_bench_itrs] - est_time_ms = itrs * (itr_time_ms || 0.0) - ws[:estimated_time] = ((est_time_ms + 999.0) / 1000).to_i # Round up for elapsed time - else - ws[:estimated_time] = 0 unless ws[:estimated_time] - end - #puts "Est time #{config.inspect} #{bench_name.inspect}: #{itrs} * #{"%.1f" % (itr_time_ms || 0.0)}ms = #{ws[:estimated_time].inspect}sec" - end - end - - platform_configs = {} - @configs_with_human_names.values.each do |config| - config_platform = YJITMetrics::PLATFORMS.detect { |platform| config.start_with?(platform) } - platform_configs[config_platform] ||= [] - platform_configs[config_platform] << config - end - - # How much total time have we allocated to running benchmarks per platform? - platform_configs.each do |platform, configs| - est_time = configs.map do |config| - warmup_settings[config].values.map { |s| s[:estimated_time] || 0.0 }.sum - end.sum - warmup_settings["#{platform}_estimated_time"] = est_time - - # Do we need to reduce the time taken? - if est_time > default_settings["max_itr_time"] - warn "Maximum allowed time: #{default_settings["max_itr_time"].inspect}sec" - warn "Estimated run time on #{platform}: #{est_time.inspect}sec" - warn "This is where logic to do something statistical and clever would go!" - end - end - - warmup_settings - end - - def write_file(filename) - settings = iterations_for_configs_and_benchmarks(@default_yjit_bench_settings) - - puts "Writing file: #{filename}.warmup_settings.json" - File.open(filename + ".warmup_settings.json", "w") { |f| f.puts JSON.pretty_generate settings } - end -end diff --git a/lib/yjit-metrics/report_types/warmup_report.rb b/lib/yjit-metrics/report_types/warmup_report.rb deleted file mode 100644 index bde58fb29..000000000 --- a/lib/yjit-metrics/report_types/warmup_report.rb +++ /dev/null @@ -1,198 +0,0 @@ -# This is intended to be a simple warmup report, showing how long it takes -# one or more Ruby implementations to reach full performance, per-benchmark. -class YJITMetrics::WarmupReport < YJITMetrics::Report - def self.report_name - "warmup" - end - - def initialize(config_names, results, benchmarks: []) - raise "Not yet updated for multi-platform!" - - super - - @headings_by_config = {} - @col_formats_by_config = {} - @report_data_by_config = {} - - @config_names.each do |config| - times = @result_set.times_for_config_by_benchmark(config, in_runs: true) - warmups = @result_set.warmups_for_config_by_benchmark(config, in_runs: true) - - # Combine times and warmups for each run, for each benchmark - all_iters = {} - times.keys.each do |benchmark_name| - all_iters[benchmark_name] = warmups[benchmark_name].zip(times[benchmark_name]).map { |warmups, real_iters| warmups + real_iters } - end - - benchmark_names = filter_benchmark_names(times.keys) - raise "No benchmarks found for config #{config.inspect}!" if benchmark_names.empty? - - # For every benchmark, check the fewest iterations/run. - min_iters_per_benchmark = benchmark_names.map { |bn| all_iters[bn].map { |run| run.size }.min } - - most_cols_of_benchmarks = min_iters_per_benchmark.max - - showcased_iters = [1, 5, 10, 50, 100, 200, 500, 1000, 5000, 10_000, 50_000, 100_000].select { |i| i <= most_cols_of_benchmarks } - - @headings_by_config[config] = - [ "bench", "samples" ] + - showcased_iters.map { |iter| "iter ##{iter}" } + - showcased_iters.map { |iter| "RSD ##{iter}" } - @col_formats_by_config[config] = - [ "%s", "%d" ] + - showcased_iters.map { "%.1fms" } + - showcased_iters.map { "%.2f%%" } - @report_data_by_config[config] = [] - - benchmark_names.each do |benchmark_name| - # We assume that for each config/benchmark combo we have the same number of warmup runs as timed runs - all_runs = all_iters[benchmark_name] - num_runs = all_runs.size - min_iters = all_runs.map { |run| run.size }.min - - iters_present = showcased_iters.select { |i| i <= min_iters } - end_nils = [nil] * (showcased_iters.size - iters_present.size) - - iter_N_mean = [] - iter_N_rsd = [] - - iters_present.each do |iter_num| - iter_series = all_runs.map { |run| run[iter_num - 1] } # Human-readable "iteration #10" is array index 9 - iter_N_mean.push mean(iter_series) - iter_N_rsd.push rel_stddev_pct(iter_series) - end - - @report_data_by_config[config].push([benchmark_name, num_runs] + iter_N_mean + end_nils + iter_N_rsd + end_nils) - end - end - end - - def to_s - output = "" - - @config_names.each do |config_name| - output.concat("Warmup for #{config_name.capitalize}:\n\n") - - output.concat(format_as_table(@headings_by_config[config_name], - @col_formats_by_config[config_name], - @report_data_by_config[config_name])) - - output.concat("Each iteration is a set of samples of that iteration in a series.\n") - output.concat("RSD is relative standard deviation - the standard deviation divided by the mean of the series.\n") - output.concat("Samples is the number of runs (samples taken) for each specific iteration number.\n") - output.concat("\n\n") - end - - output - end - - def write_file(filename) - @config_names.each do |config_name| - headings = @headings_by_config[config_name] - report_data = @report_data_by_config[config_name] - write_to_csv("#{filename}_#{config_name}.csv", [headings] + report_data) - end - end - - -end - - -# This is intended to show the total time taken to get to -# a particular iteration, to help understand warmup -class YJITMetrics::TotalToIterReport < YJITMetrics::Report - def self.report_name - "total_to_iter" - end - - def initialize(config_names, results, benchmarks: []) - raise "Not yet updated for multi-platform!" - - super - - @headings_by_config = {} - @col_formats_by_config = {} - @report_data_by_config = {} - - @config_names.each do |config| - times = @result_set.times_for_config_by_benchmark(config, in_runs: true) - warmups = @result_set.warmups_for_config_by_benchmark(config, in_runs: true) - - # Combine times and warmups for each run, for each benchmark - all_iters = {} - times.keys.each do |benchmark_name| - all_iters[benchmark_name] = warmups[benchmark_name].zip(times[benchmark_name]).map { |warmups, real_iters| warmups + real_iters } - end - - benchmark_names = filter_benchmark_names(times.keys) - raise "No benchmarks found for config #{config.inspect}!" if benchmark_names.empty? - max_num_runs = benchmark_names.map { |bn| times[bn].size }.max - - # For every benchmark, check the fewest iterations/run. - min_iters_per_benchmark = benchmark_names.map { |bn| all_iters[bn].map { |run| run.size }.min } - - most_cols_of_benchmarks = min_iters_per_benchmark.max - - showcased_iters = [1, 5, 10, 50, 100, 200, 500, 1000, 5000, 10_000, 50_000, 100_000].select { |i| i <= most_cols_of_benchmarks } - - @headings_by_config[config] = - [ "bench", "samples" ] + - showcased_iters.map { |iter| "iter ##{iter}" } + - showcased_iters.map { |iter| "RSD ##{iter}" } - @col_formats_by_config[config] = - [ "%s", "%d" ] + - showcased_iters.map { "%.1fms" } + - showcased_iters.map { "%.2f%%" } - @report_data_by_config[config] = [] - - benchmark_names.each do |benchmark_name| - # We assume that for each config/benchmark combo we have the same number of warmup runs as timed runs - all_runs = all_iters[benchmark_name] - num_runs = all_runs.size - min_iters = all_runs.map { |run| run.size }.min - - iters_present = showcased_iters.select { |i| i <= min_iters } - end_nils = [nil] * (showcased_iters.size - iters_present.size) - - iter_N_mean = [] - iter_N_rsd = [] - - iters_present.each do |iter_num| - # For this report, we want the *total* non-harness time to get to an iteration number - iter_series = all_runs.map { |run| (0..(iter_num - 1)).map { |idx| run[idx] }.sum } - iter_N_mean.push mean(iter_series) - iter_N_rsd.push rel_stddev_pct(iter_series) - end - - @report_data_by_config[config].push([benchmark_name, num_runs] + iter_N_mean + end_nils + iter_N_rsd + end_nils) - end - end - end - - def to_s - output = "" - - @config_names.each do |config_name| - output.concat("Total Time to Iteration N for #{config_name.capitalize}:\n\n") - - output.concat(format_as_table(@headings_by_config[config_name], - @col_formats_by_config[config_name], - @report_data_by_config[config_name])) - - output.concat("Each iteration is a set of samples of that iteration in a series.\n") - output.concat("RSD is relative standard deviation - the standard deviation divided by the mean of the series.\n") - output.concat("Samples is the number of runs (samples taken) for each specific iteration number.\n") - output.concat("\n\n") - end - - output - end - - def write_file(filename) - @config_names.each do |config_name| - headings = @headings_by_config[config_name] - report_data = @report_data_by_config[config_name] - write_to_csv("#{filename}_#{config_name}.csv", [headings] + report_data) - end - end -end diff --git a/lib/yjit-metrics/report_types/yjit_stats_reports.rb b/lib/yjit-metrics/report_types/yjit_stats_reports.rb deleted file mode 100644 index 0c69bfe43..000000000 --- a/lib/yjit-metrics/report_types/yjit_stats_reports.rb +++ /dev/null @@ -1,278 +0,0 @@ -class YJITMetrics::YJITStatsReport < YJITMetrics::Report - attr_reader :stats_config - - # The report only runs on benchmarks that match the ones specified *and* that are present in - # the data files. This is that final list of benchmarks. - attr_reader :benchmark_names - - # If we can't get stats data, we can't usefully run this report. - attr_reader :inactive - - def initialize(stats_configs, results, benchmarks: []) - raise("No stats configs given for YJIT stats report!") if stats_configs.empty? - - super - - bad_configs = stats_configs - results.available_configs - raise "Unknown configurations in report: #{bad_configs.inspect}!" unless bad_configs.empty? - - # Take the specified reporting configurations and filter by which ones contain YJIT stats. The result should - # be a single configuration to report on. - filtered_stats_configs = results.configs_containing_full_yjit_stats & stats_configs - - # The surrounding code seems to be from before we started running multiple platforms, - # so if that's what we have (multiple platforms) just limit to one so we can get the report. - if filtered_stats_configs.size > 1 - # If the configs are the same but for different platforms, pick one. - # This regexp should be a constant but when this file is loaded - # the PLATFORMS constant hasn't been defined yet. - if filtered_stats_configs.map { |c| c.sub(/^#{Regexp.union(YJITMetrics::PLATFORMS)}_/, '') }.uniq.size == 1 - x86 = filtered_stats_configs.select { |c| c.start_with?("x86_64") } - filtered_stats_configs = x86 unless x86.empty? - end - end - - @inactive = false - if filtered_stats_configs.empty? - puts "We didn't find any config with YJIT stats among #{stats_configs.inspect}!" if filtered_stats_configs.empty? - @inactive = true - return - elsif filtered_stats_configs.size > 1 - puts "We found more than one config with YJIT stats (#{filtered_stats_configs.inspect}) in this result set!" - @inactive = true - return - end - @stats_config = filtered_stats_configs.first - - @result_set = results - @only_benchmarks = benchmarks - - bench_yjit_stats = @result_set.yjit_stats_for_config_by_benchmark(@stats_config) - raise("Config #{@stats_config.inspect} collected no YJIT stats!") if bench_yjit_stats.nil? || bench_yjit_stats.values.all?(&:empty?) - - # Only run benchmarks if there is no list of "only run these" benchmarks, or if the benchmark name starts with one of the list elements - @benchmark_names = filter_benchmark_names(bench_yjit_stats.keys) - end - - # Pretend that all these listed benchmarks ran inside a single Ruby process. Combine their statistics, as though you were - # about to print an exit report. - def combined_stats_data_for_benchmarks(benchmark_names) - raise("Can't query stats for an inactive stats-based report!") if @inactive - - unless benchmark_names.all? { |benchmark_name| @benchmark_names.include?(benchmark_name) } - raise "No data found for benchmark #{benchmark_name.inspect}!" - end - - all_yjit_stats = @result_set.yjit_stats_for_config_by_benchmark(@stats_config) - relevant_stats = benchmark_names.flat_map { |benchmark_name| all_yjit_stats[benchmark_name] }.select { |data| !data.empty? } - - if relevant_stats.empty? - raise "No YJIT stats data found for benchmarks: #{benchmark_names.inspect}!" - end - - # For each key in the YJIT statistics, add up the value for that key in all datasets. Note: all_stats is a non-numeric key. - yjit_stats_keys = relevant_stats[0].keys - ["all_stats"] - yjit_data = {} - yjit_stats_keys.each do |stats_key| - # Unknown keys default to 0 - entries = relevant_stats.map { |dataset| dataset[stats_key] }.compact - if entries[0].is_a?(Integer) - yjit_data[stats_key] = entries.sum(0) - elsif entries[0].is_a?(Float) - yjit_data[stats_key] = entries.sum(0.0) - elsif entries[0].is_a?(Hash) && entries[0].empty? || entries[0].values[0].is_a?(Integer) - yjit_data[stats_key] = {} - sub_keys = entries.flat_map(&:keys).uniq - sub_keys.each do |sub_key| - yjit_data[stats_key][sub_key] = entries.sum(0) { |entry| entry[sub_key] } - end - else - raise "Failing for #{stats_key.inspect} with unknown entry type #{entries[0].class}!" - end - end - yjit_data - end - - def total_exit_count(stats, prefix: "exit_") - total = 0 - stats.each do |k,v| - total += v if k.start_with?(prefix) - end - total - end - - def exit_report_for_benchmarks(benchmarks) - # Bindings for use inside ERB report template - stats = combined_stats_data_for_benchmarks(benchmarks) - side_exits = total_exit_count(stats) - total_exits = side_exits + stats["leave_interp_return"] - - # Number of instructions that finish executing in YJIT - retired_in_yjit = (stats["exec_instruction"] || stats["yjit_insns_count"]) - side_exits - - # Average length of instruction sequences executed by YJIT - avg_len_in_yjit = retired_in_yjit.to_f / total_exits - - # Proportion of instructions that retire in YJIT - total_insns_count = retired_in_yjit + stats["vm_insns_count"] - yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count - - report_template = ERB.new File.read(__dir__ + "/../report_templates/yjit_stats_exit.erb") - report_template.result(binding) # Evaluate with the local variables right here - end - - def sorted_exit_counts(stats, prefix:, how_many: 20, left_pad: 4) - prefix_text = "" - - exits = [] - stats.each do |k, v| - if k.start_with?(prefix) - exits.push [k.to_s.delete_prefix(prefix), v] - end - end - - exits = exits.sort_by { |name, count| [-count, name] }[0...how_many] - side_exits = total_exit_count(stats) - - top_n_total = exits.map { |name, count| count }.sum - top_n_exit_pct = 100.0 * top_n_total / side_exits - - prefix_text = "Top-#{how_many} most frequent exit ops (#{"%.1f" % top_n_exit_pct}% of exits):\n" - - longest_insn_name_len = exits.map { |name, count| name.length }.max - prefix_text + exits.map do |name, count| - padding = longest_insn_name_len + left_pad - padded_name = "%#{padding}s" % name - padded_count = "%10d" % count - percent = 100.0 * count / side_exits - formatted_percent = "%.1f" % percent - "#{padded_name}: #{padded_count} (#{formatted_percent})" - end.join("\n") - end - - def counters_section(counters, prefix:, prompt:) - text = prompt + "\n" - - counters = counters.filter { |key, _| key.start_with?(prefix) } - counters.filter! { |_, value| value != 0 } - counters.transform_keys! { |key| key.to_s.delete_prefix(prefix) } - - if counters.empty? - text.concat(" (all relevant counters are zero)") - return text - end - - counters = counters.to_a - counters.sort_by! { |_, counter_value| counter_value } - longest_name_length = counters.max_by { |name, _| name.length }.first.length - total = counters.sum { |_, counter_value| counter_value } - - counters.reverse_each do |name, value| - percentage = value.to_f * 100 / total - text.concat(" %*s %10d (%4.1f%%)\n" % [longest_name_length, name, value, percentage]) - end - - text - end - -end - -# This is intended to match the exit report printed by debug YJIT when stats are turned on. -# Note that this is somewhat complex to keep up to date. We don't store literal YJIT exit -# reports. In fact, exit reports are often meant to mimic a situation that never existed, -# where multiple runs are combined and then a hypothetical exit report is printed for them. -# So we don't store a real, literal exit report, which sometimes never happened. -# -# Instead we periodically update the logic and templates for the exit reports to match -# the current YJIT stats data. Keep in mind that older YJIT stats data often has different -# stats -- including renamed stats, or stats not collected for years, etc. So that means -# the code for exit reports may need to be more robust than the code from YJIT, which -# only has to deal with stats from its own exact YJIT version. -# -# Despite that, the logic here intentionally follows the structure of YJIT's own exit -# reports so that it's not too difficult to update. Make sure to rebuild all the old -# exit reports when you update this to ensure that you don't have any that crash because -# of missing or renamed stats. -class YJITMetrics::YJITStatsExitReport < YJITMetrics::YJITStatsReport - def self.report_name - "yjit_stats_default" - end - - def to_s - exit_report_for_benchmarks(@benchmark_names) - end - - def write_file(filename) - text_output = self.to_s - File.open(filename + ".txt", "w") { |f| f.write(text_output) } - end -end - -# Note: this is now unused in normal operation, but is still in unit tests for reporting. -# This report is to compare YJIT's time-in-JIT versus its speedup for various benchmarks. -class YJITMetrics::YJITStatsMultiRubyReport < YJITMetrics::YJITStatsReport - def self.report_name - "yjit_stats_multi" - end - - def initialize(config_names, results, benchmarks: []) - # Set up the YJIT stats parent class - super - - # We've figured out which config is the YJIT stats. Now which one is production stats with YJIT turned on? - alt_configs = config_names - [ @stats_config ] - with_yjit_configs = alt_configs.select { |name| name.end_with?("prod_ruby_with_yjit") } - raise "We found more than one candidate with-YJIT config (#{with_yjit_configs.inspect}) in this result set!" if with_yjit_configs.size > 1 - raise "We didn't find any config that looked like a with-YJIT config among #{config_names.inspect}!" if with_yjit_configs.empty? - @with_yjit_config = with_yjit_configs[0] - - alt_configs -= with_yjit_configs - no_yjit_configs = alt_configs.select { |name| name.end_with?("prod_ruby_no_jit") } - raise "We found more than one candidate no-YJIT config (#{no_yjit_configs.inspect}) in this result set!" if no_yjit_configs.size > 1 - raise "We didn't find any config that looked like a no-YJIT config among #{config_names.inspect}!" if no_yjit_configs.empty? - @no_yjit_config = no_yjit_configs[0] - - # Let's calculate some report data - times_by_config = {} - [ @with_yjit_config, @no_yjit_config ].each { |config| times_by_config[config] = results.times_for_config_by_benchmark(config) } - @headings = [ "bench", @with_yjit_config + " (ms)", "speedup (%)", "% in YJIT" ] - @col_formats = [ "%s", "%.1f", "%.2f", "%.2f" ] - - @benchmark_names = filter_benchmark_names(times_by_config[@no_yjit_config].keys) - - times_by_config.each do |config_name, results| - raise("No results for configuration #{config_name.inspect} in PerBenchRubyComparison!") if results.nil? || results.empty? - end - - stats = results.yjit_stats_for_config_by_benchmark(@stats_config) - - @report_data = @benchmark_names.map do |benchmark_name| - no_yjit_config_times = times_by_config[@no_yjit_config][benchmark_name] - no_yjit_mean = mean(no_yjit_config_times) - with_yjit_config_times = times_by_config[@with_yjit_config][benchmark_name] - with_yjit_mean = mean(with_yjit_config_times) - yjit_ratio = no_yjit_mean / with_yjit_mean - yjit_speedup_pct = (yjit_ratio - 1.0) * 100.0 - - # A benchmark run may well return multiple sets of YJIT stats per benchmark name/type. - # For these calculations we just add all relevant counters together. - this_bench_stats = combined_stats_data_for_benchmarks([benchmark_name]) - - side_exits = total_exit_count(this_bench_stats) - retired_in_yjit = (this_bench_stats["exec_instruction"] || this_bench_stats["yjit_insns_count"]) - side_exits - total_insns_count = retired_in_yjit + this_bench_stats["vm_insns_count"] - yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count - - [ benchmark_name, with_yjit_mean, yjit_speedup_pct, yjit_ratio_pct ] - end - end - - def to_s - format_as_table(@headings, @col_formats, @report_data) - end - - def write_file(filename) - text_output = self.to_s - File.open(filename + ".txt", "w") { |f| f.write(text_output) } - end -end diff --git a/lib/yjit-metrics/timeline_report_types/all_timeline_reports_lib.rb b/lib/yjit-metrics/timeline_report_types/all_timeline_reports_lib.rb deleted file mode 100644 index 0dc3cbc4c..000000000 --- a/lib/yjit-metrics/timeline_report_types/all_timeline_reports_lib.rb +++ /dev/null @@ -1,36 +0,0 @@ -# Ruby code for all timeline report types - -# This is the Munin palette from Shutterstock Rickshaw -MUNIN_PALETTE = [ - '#00cc00', - '#0066b3', - '#ff8000', - '#ffcc00', - '#330099', - '#990099', - '#ccff00', - '#ff0000', - '#808080', - '#008f00', - '#00487d', - '#b35a00', - '#b38f00', - '#6b006b', - '#8fb300', - '#b30000', - '#bebebe', - '#80ff80', - '#80c9ff', - '#ffc080', - '#ffe680', - '#aa80ff', - '#ee00cc', - '#ff8080', - '#666600', - '#ffbfff', - '#00ffcc', - '#cc6699', - '#999900', - # If we add one colour we get 29 entries, it's not divisible by the number of platforms and won't get weird repeats - '#003399', -]; diff --git a/lib/yjit-metrics/timeline_report_types/blog_timeline.rb b/lib/yjit-metrics/timeline_report_types/blog_timeline.rb deleted file mode 100644 index f75b0352b..000000000 --- a/lib/yjit-metrics/timeline_report_types/blog_timeline.rb +++ /dev/null @@ -1,123 +0,0 @@ -class BlogTimelineReport < YJITMetrics::TimelineReport - def self.report_name - "blog_timeline" - end - - def self.report_extensions - [ "html", "recent.html" ] - end - - # These objects have *gigantic* internal state. For debuggability, don't print the whole thing. - def inspect - "BlogTimelineReport<#{object_id}>" - end - - NUM_RECENT=100 - def initialize(context) - super - - yjit_config_root = "prod_ruby_with_yjit" - - # This should match the JS parser in the template file - time_format = "%Y %m %d %H %M %S" - - @series = {} - YJITMetrics::PLATFORMS.each { |platform| @series[platform] = { :recent => [], :all_time => [] } } - - @context[:benchmark_order].each.with_index do |benchmark, idx| - color = MUNIN_PALETTE[idx % MUNIN_PALETTE.size] - YJITMetrics::PLATFORMS.each do |platform| - config = "#{platform}_#{yjit_config_root}" - points = @context[:timestamps].map do |ts| - this_point = @context[:summary_by_timestamp].dig(ts, config, benchmark) - if this_point - this_ruby_desc = @context[:ruby_desc_by_config_and_timestamp][config][ts] || "unknown" - # These fields are from the ResultSet summary - [ ts.strftime(time_format), this_point["mean"], this_point["stddev"], this_ruby_desc ] - else - nil - end - end - points.compact! - next if points.empty? - - visible = @context[:selected_benchmarks].include?(benchmark) - - s_all_time = { config: config, benchmark: benchmark, name: "#{yjit_config_root}-#{benchmark}", platform: platform, visible: visible, color: color, data: points } - s_recent = s_all_time.dup - s_recent[:data] = s_recent[:data].last(NUM_RECENT) - - @series[platform][:recent].push s_recent - @series[platform][:all_time].push s_all_time - end - end - end - - def write_files(out_dir) - [:recent, :all_time].each do |duration| - YJITMetrics::PLATFORMS.each do |platform| - begin - @data_series = @series[platform][duration] - - script_template = ERB.new File.read(__dir__ + "/../report_templates/blog_timeline_data_template.js.erb") - text = script_template.result(binding) - File.open("#{out_dir}/reports/timeline/blog_timeline.data.#{platform}.#{duration}.js", "w") { |f| f.write(text) } - rescue - puts "Error writing data file for #{platform} #{duration} data!" - raise - end - end - end - - script_template = ERB.new File.read(__dir__ + "/../report_templates/blog_timeline_d3_template.html.erb") - html_output = script_template.result(binding) # Evaluate an Erb template with template_settings - File.open("#{out_dir}/_includes/reports/blog_timeline.html", "w") { |f| f.write(html_output) } - end -end - -class MiniTimelinesReport < YJITMetrics::TimelineReport - def self.report_name - "mini_timelines" - end - - # These objects have *gigantic* internal state. For debuggability, don't print the whole thing. - def inspect - "MiniTimelinesReport<#{object_id}>" - end - - def initialize(context) - super - - config = find_config("prod_ruby_with_yjit") - platform = platform_of_config(config) - - # This should match the JS parser in the template file - time_format = "%Y %m %d %H %M %S" - - @series = [] - - @context[:selected_benchmarks].each do |benchmark| - points = @context[:timestamps].map do |ts| - this_point = @context[:summary_by_timestamp].dig(ts, config, benchmark) - if this_point - this_ruby_desc = @context[:ruby_desc_by_config_and_timestamp][config][ts] || "unknown" - # These fields are from the ResultSet summary - [ ts.strftime(time_format), this_point["mean"], this_ruby_desc ] - else - nil - end - end - points.compact! - next if points.empty? - - @series.push({ config: config, benchmark: benchmark, name: "#{config}-#{benchmark}", platform: platform, data: points }) - end - #@series.sort_by! { |s| s[:name] } - end - - def write_files(out_dir) - script_template = ERB.new File.read(__dir__ + "/../report_templates/mini_timeline_d3_template.html.erb") - html_output = script_template.result(binding) # Evaluate an Erb template with template_settings - File.open("#{out_dir}/_includes/reports/mini_timelines.html", "w") { |f| f.write(html_output) } - end -end diff --git a/lib/yjit-metrics/timeline_report_types/memory_timeline_report.rb b/lib/yjit-metrics/timeline_report_types/memory_timeline_report.rb deleted file mode 100644 index 492dceed0..000000000 --- a/lib/yjit-metrics/timeline_report_types/memory_timeline_report.rb +++ /dev/null @@ -1,86 +0,0 @@ -class MemoryTimelineReport < YJITMetrics::TimelineReport - def self.report_name - "memory_timeline" - end - - def self.report_extensions - [ "html", "recent.html" ] - end - - # These objects have *gigantic* internal state. For debuggability, don't print the whole thing. - def inspect - "MemoryTimelineReport<#{object_id}>" - end - - CONFIG_NAMES = { - "no-jit" => "prod_ruby_no_jit", - "yjit" => "prod_ruby_with_yjit", - } - CONFIG_ROOTS = CONFIG_NAMES.values - NUM_RECENT=100 - def initialize(context) - super - - ruby_config_roots = CONFIG_NAMES.values - - # This should match the JS parser in the template file - time_format = "%Y %m %d %H %M %S" - - @series = {} - YJITMetrics::PLATFORMS.each { |platform| @series[platform] = { :recent => [], :all_time => [] } } - - color_idx = 0 - @context[:benchmark_order].each.with_index do |benchmark, idx| - CONFIG_NAMES.each do |config_human_name, config_root| - color = MUNIN_PALETTE[color_idx % MUNIN_PALETTE.size] - color_idx += 1 - - YJITMetrics::PLATFORMS.each do |platform| - config = "#{platform}_#{config_root}" - points = @context[:timestamps].map do |ts| - this_point = @context[:summary_by_timestamp].dig(ts, config, benchmark) - if this_point - this_ruby_desc = @context[:ruby_desc_by_config_and_timestamp][config][ts] || "unknown" - # These fields are from the ResultSet summary - peak_mem_bytes is an array because multiple runs are possible - [ ts.strftime(time_format), this_point["peak_mem_bytes"].max, this_ruby_desc ] - else - nil - end - end - points.compact! - next if points.empty? - - visible = @context[:selected_benchmarks].include?(benchmark) - - s_all_time = { config: config, config_human_name: config_human_name, benchmark: benchmark, name: "#{config_root}-#{benchmark}", platform: platform, visible: visible, color: color, data: points } - s_recent = s_all_time.dup - s_recent[:data] = s_recent[:data].last(NUM_RECENT) - - @series[platform][:recent].push s_recent - @series[platform][:all_time].push s_all_time - end - end - end - end - - def write_files(out_dir) - [:recent, :all_time].each do |duration| - YJITMetrics::PLATFORMS.each do |platform| - begin - @data_series = @series[platform][duration].select { |s| CONFIG_ROOTS.any? { |config_root| s[:config].include?(config_root) } } - - script_template = ERB.new File.read(__dir__ + "/../report_templates/memory_timeline_data_template.js.erb") - text = script_template.result(binding) - File.open("#{out_dir}/reports/timeline/memory_timeline.data.#{platform}.#{duration}.js", "w") { |f| f.write(text) } - rescue - puts "Error writing data file for #{platform} #{duration} data!" - raise - end - end - end - - script_template = ERB.new File.read(__dir__ + "/../report_templates/memory_timeline_d3_template.html.erb") - html_output = script_template.result(binding) # Evaluate an Erb template with template_settings - File.open("#{out_dir}/_includes/reports/memory_timeline.html", "w") { |f| f.write(html_output) } - end -end diff --git a/lib/yjit-metrics/timeline_report_types/yjit_stats_timeline.rb b/lib/yjit-metrics/timeline_report_types/yjit_stats_timeline.rb deleted file mode 100644 index 3f07c3b24..000000000 --- a/lib/yjit-metrics/timeline_report_types/yjit_stats_timeline.rb +++ /dev/null @@ -1,168 +0,0 @@ -class YJITSpeedupTimelineReport < YJITMetrics::TimelineReport - def self.report_name - "yjit_stats_timeline" - end - - # These objects have *gigantic* internal state. For debuggability, don't print the whole thing. - def inspect - "YJITSpeedupTimelineReport<#{object_id}>" - end - - NUM_RECENT=100 - def initialize(context) - super - - yjit_config_root = "prod_ruby_with_yjit" - stats_config_root = "yjit_stats" - no_jit_config_root = "prod_ruby_no_jit" - x86_stats_config = "x86_64_#{stats_config_root}" - - # This should match the JS parser in the template file - time_format = "%Y %m %d %H %M %S" - - @series = {} - YJITMetrics::PLATFORMS.each { |platform| @series[platform] = { :recent => [], :all_time => [] } } - - @context[:benchmark_order].each do |benchmark| - YJITMetrics::PLATFORMS.each do |platform| - yjit_config = "#{platform}_#{yjit_config_root}" - stats_config = "#{platform}_#{stats_config_root}" - no_jit_config = "#{platform}_#{no_jit_config_root}" - points = @context[:timestamps_with_stats].map do |ts| - this_point_yjit = @context[:summary_by_timestamp].dig(ts, yjit_config, benchmark) - this_point_cruby = @context[:summary_by_timestamp].dig(ts, no_jit_config, benchmark) - # If no same-platform stats, fall back to x86_64 stats if available - this_point_stats = @context[:summary_by_timestamp].dig(ts, stats_config, benchmark) || - @context[:summary_by_timestamp].dig(ts, x86_stats_config, benchmark) - if this_point_yjit && this_point_stats - this_ruby_desc = @context[:ruby_desc_by_config_and_timestamp][yjit_config][ts] || "unknown" - # These fields are from the ResultSet summary - out = { - time: ts.strftime(time_format), - yjit_speedup: this_point_cruby["mean"] / this_point_yjit["mean"], - ratio_in_yjit: this_point_stats["yjit_stats"]["yjit_ratio_pct"], - side_exits: this_point_stats["yjit_stats"]["side_exits"], - invalidation_count: this_point_stats["yjit_stats"]["invalidation_count"] || 0, - ruby_desc: this_ruby_desc, - } - if out[:ratio_in_yjit].nil? || out[:side_exits].nil? || out[:invalidation_count].nil? - puts "Problem location: Benchmark #{benchmark.inspect} platform #{platform.inspect} timestamp #{ts.inspect}" - puts "Stats config(s): #{stats_config.inspect} / #{x86_stats_config.inspect}" - puts "Bad output sample: #{out.inspect}" - puts "Stats array: #{this_point_stats["yjit_stats"]}" - raise("Found point with nil as summary!") - end - out - else - nil - end - end - points.compact! - next if points.empty? - - visible = @context[:selected_benchmarks].include?(benchmark) - - s = { config: yjit_config, benchmark: benchmark, name: "#{yjit_config_root}-#{benchmark}", platform: platform, visible: visible, data: points } - s_recent = { config: yjit_config, benchmark: benchmark, name: "#{yjit_config_root}-#{benchmark}", platform: platform, visible: visible, data: points.last(NUM_RECENT) } - @series[platform][:all_time].push s - @series[platform][:recent].push s_recent - end - end - - # Grab the stats fields from the first stats point (for the first platform that has data). - @stats_fields = @series.values.reject { |v| v[:all_time].empty? }[0][:all_time][0][:data][0].keys - [:time, :ruby_desc] - - # Calculate overall yjit speedup, yjit ratio, etc. over all benchmarks per-platform - YJITMetrics::PLATFORMS.each do |platform| - yjit_config = "#{platform}_#{yjit_config_root}" - # No Ruby desc for this? If so, that means no results for this platform - next unless @context[:ruby_desc_by_config_and_timestamp][yjit_config] - - data_mean = [] - data_geomean = [] - @context[:timestamps_with_stats].map.with_index do |ts, t_idx| - # No Ruby desc for this platform/timestamp combo? If so, that means no results for this platform and timestamp. - next unless @context[:ruby_desc_by_config_and_timestamp][yjit_config][ts] - - ruby_desc = @context[:ruby_desc_by_config_and_timestamp][yjit_config][ts] || "unknown" - point_mean = { - time: ts.strftime(time_format), - ruby_desc: ruby_desc, - } - point_geomean = point_mean.dup - @stats_fields.each do |field| - begin - points = @context[:benchmark_order].map.with_index do |bench, b_idx| - t_str = ts.strftime(time_format) - this_bench_data = @series[platform][:all_time][b_idx] - if this_bench_data - t_in_series = this_bench_data[:data].detect { |point_info| point_info[:time] == t_str } - t_in_series ? t_in_series[field] : nil - else - nil - end - end - rescue - STDERR.puts "Error in yjit_stats_timeline calculating field #{field} for TS #{ts.inspect} for all #{platform} benchmarks" - raise - end - points.compact! - raise("No data points for stat #{field.inspect} for TS #{ts.inspect}") if points.empty? - point_mean[field] = mean(points) - point_geomean[field] = geomean(points) - end - - data_mean.push(point_mean) - data_geomean.push(point_geomean) - end - overall_mean = { config: yjit_config, benchmark: "overall-mean", name: "#{yjit_config_root}-overall-mean", platform: platform, visible: true, data: data_mean } - overall_geomean = { config: yjit_config, benchmark: "overall-geomean", name: "#{yjit_config_root}-overall-geomean", platform: platform, visible: true, data: data_geomean } - overall_mean_recent = { config: yjit_config, benchmark: "overall-mean", name: "#{yjit_config_root}-overall-mean", platform: platform, visible: true, data: data_mean.last(NUM_RECENT) } - overall_geomean_recent = { config: yjit_config, benchmark: "overall-geomean", name: "#{yjit_config_root}-overall-geomean", platform: platform, visible: true, data: data_geomean.last(NUM_RECENT) } - - @series[platform][:all_time].prepend overall_geomean - @series[platform][:all_time].prepend overall_mean - @series[platform][:recent].prepend overall_geomean_recent - @series[platform][:recent].prepend overall_mean_recent - end - - # Recent and all-time series have different numbers of benchmarks. To keep everybody in sync, we set - # the colours here in Ruby and pass them through. - color_by_benchmark = {} - (["overall-mean", "overall-geomean"] + @context[:benchmark_order]).each.with_index do |bench, idx| - color_by_benchmark[bench] = MUNIN_PALETTE[idx % MUNIN_PALETTE.size] - end - @series.each do |platform, hash| - hash.each do |duration, all_series| - all_series.each.with_index do |series, idx| - series[:color] = color_by_benchmark[series[:benchmark]] - if series[:color].nil? - raise "Error for #{platform} #{duration} w/ bench #{series[:benchmark].inspect}!" - end - end - end - end - end - - def write_files(out_dir) - [:recent, :all_time].each do |duration| - YJITMetrics::PLATFORMS.each do |platform| - begin - @data_series = @series[platform][duration] - - script_template = ERB.new File.read(__dir__ + "/../report_templates/yjit_stats_timeline_data_template.js.erb") - text = script_template.result(binding) - File.open("#{out_dir}/reports/timeline/yjit_stats_timeline.data.#{platform}.#{duration}.js", "w") { |f| f.write(text) } - rescue - puts "Error writing data file for #{platform} #{duration} data!" - raise - end - end - end - - script_template = ERB.new File.read(__dir__ + "/../report_templates/yjit_stats_timeline_d3_template.html.erb") - #File.write("/tmp/erb_template.txt", script_template.src) - html_output = script_template.result(binding) # Evaluate an Erb template with template_settings - File.open("#{out_dir}/_includes/reports/yjit_stats_timeline.html", "w") { |f| f.write(html_output) } - end -end diff --git a/lib/yjit_metrics.rb b/lib/yjit_metrics.rb new file mode 100644 index 000000000..d53b34113 --- /dev/null +++ b/lib/yjit_metrics.rb @@ -0,0 +1,535 @@ +# frozen_string_literal: true +# General-purpose benchmark management routines + +require 'benchmark' +require 'fileutils' +require 'tempfile' +require 'json' +require 'csv' +require 'erb' + +require_relative "./yjit_metrics/cli" +require_relative "./yjit_metrics/continuous_reporting" +require_relative "./yjit_metrics/defaults" +require_relative "./yjit_metrics/repo_management" +require_relative "./yjit_metrics/result_set" + +Dir.glob("yjit_metrics/reports/*.rb", base: __dir__).each do |report_type_file| + require_relative report_type_file +end + +Dir.glob("yjit_metrics/timeline_reports/*.rb", base: __dir__).each do |report_type_file| + require_relative report_type_file +end + +module YJITMetrics + include RepoManagement + + extend self # Make methods callable as YJITMetrics.method_name + + HARNESS_PATH = File.expand_path(__dir__ + "/../metrics-harness") + + PLATFORMS = ["x86_64", "aarch64"] + + uname_platform = `uname -m`.chomp.downcase.sub(/^arm(\d+)$/, 'aarch\1') + PLATFORM = PLATFORMS.detect { |platform| uname_platform == platform } + raise("yjit-metrics only supports running on x86_64 and aarch64!") if !PLATFORM + + # This structure is returned by the benchmarking harness from a run. + JSON_RUN_FIELDS = %i(times warmups yjit_stats peak_mem_bytes failures_before_success benchmark_metadata ruby_metadata) + RunData = Struct.new(*JSON_RUN_FIELDS) do + def exit_status + 0 + end + + def success? + true + end + + def times_ms + self.times.map { |v| 1000.0 * v } + end + + def warmups_ms + self.warmups.map { |v| 1000.0 * v } + end + + def to_json + out = { "version": 2 } # Current version of the single-run data file format + JSON_RUN_FIELDS.each { |f| out[f.to_s] = self.send(f) } + out + end + + def self.from_json(json) + unless json["version"] == 2 + raise "This looks like out-of-date single-run data!" + end + + RunData.new(*JSON_RUN_FIELDS.map { |f| json[f.to_s] }) + end + end + + ErrorData = Struct.new(:exit_status, :error, :summary, keyword_init: true) do + def success? + false + end + end + + def chdir(dir, &block) + puts "### cd #{dir}" + Dir.chdir(dir, &block).tap do + puts "### cd #{Dir.pwd}" if block + end + end + + # Checked system - error if the command fails + def check_call(command) + # Use prefix to makes it easier to see in the log. + puts("\e[33m## [#{Time.now}] #{command}\e[00m") + + status = nil + Benchmark.realtime do + status = system(command) + end.tap do |time| + printf "\e[34m## (`#{command}` took %.2fs)\e[00m\n", time + end + + unless status + puts "\e[31mCommand #{command.inspect} failed in directory #{Dir.pwd}\e[00m" + raise RuntimeError.new + end + end + + def check_output(command) + output = IO.popen(command) do |io_obj| + io_obj.read + end + unless $?.success? + puts "Command #{command.inspect} failed in directory #{Dir.pwd}" + raise RuntimeError.new + end + output + end + + def run_harness_script_from_string(script, + local_popen: proc { |*args, **kwargs, &block| IO.popen(*args, **kwargs, &block) }, + crash_file_check: true, + do_echo: true) + run_info = {} + + os = os_type + + if crash_file_check + if os == :linux + FileUtils.rm_f("core") + elsif os == :mac + crash_pattern = "#{ENV['HOME']}/Library/Logs/DiagnosticReports/ruby_*.crash" + ruby_crash_files_before = Dir[crash_pattern].to_a + end + end + + tf = Tempfile.new("yjit-metrics-script") + tf.write(script) + tf.flush # Not flushing can result in successfully running an empty script + + script_output = nil + harness_script_pid = nil + worker_pid = nil + + # We basically always want this to sync immediately to console or logfile. + # If the library was run with nohup (or otherwise not connected to a tty) + # that won't happen by default. + $stdout.sync = true + + # Passing -l to bash makes sure to load .bash_profile for chruby. + err_r, err_w = IO.pipe + local_popen.call(["bash", "-l", tf.path], err: err_w) do |script_out_io| + harness_script_pid = script_out_io.pid + script_output = "" + loop do + begin + chunk = script_out_io.readpartial(1024) + + # The harness will print the worker PID before doing anything else. + if (worker_pid.nil? && chunk.include?("HARNESS PID")) + if chunk =~ /HARNESS PID: (\d+) -/ + worker_pid = $1.to_i + else + puts "Failed to read harness PID correctly from chunk: #{chunk.inspect}" + end + end + + print chunk if do_echo + script_output += chunk + rescue EOFError + # Cool, all done. + break + end + end + end + + err_w.close + script_err = err_r.read + print script_err if do_echo + + # This code and the ensure handler need to point to the same + # status structure so that both can make changes (e.g. to crash_files). + # We'd like this structure to be simple and serialisable -- it's + # passed back from the framework, more or less intact. + run_info.merge!({ + failed: !$?.success?, + crash_files: [], + exit_status: $?.exitstatus, + harness_script_pid: harness_script_pid, + worker_pid: worker_pid, + stderr: script_err, + output: script_output + }) + + return run_info + ensure + if(tf) + tf.close + tf.unlink + end + + if crash_file_check + if os == :linux + run_info[:crash_files] = [ "core" ] if File.exist?("core") + elsif os == :mac + # Horrifying realisation: it takes a short time after the segfault for the crash file to be written. + # Matching these up is really hard to do automatically, particularly when/if we're not sure if + # they'll be showing up at all. + sleep(1) if run_info[:failed] + + ruby_crash_files = Dir[crash_pattern].to_a + # If any new ruby_* crash files have appeared, include them. + run_info[:crash_files] = (ruby_crash_files - ruby_crash_files_before).sort + end + end + end + + def os_type + if RUBY_PLATFORM["darwin"] + :mac + elsif RUBY_PLATFORM["win"] + :win + else + :linux + end + end + + def per_os_checks + if os_type == :win + puts "Windows is not supported or tested yet. Best of luck!" + return + end + + if os_type == :mac + puts "Mac results are considered less stable for this benchmarking harness." + puts "Please assume you'll need more runs and more time for similar final quality." + return + end + + # Only available on intel systems + if !File.exist?('/sys/devices/system/cpu/intel_pstate/no_turbo') + return + end + + File.open('/sys/devices/system/cpu/intel_pstate/no_turbo', mode='r') do |file| + if file.read.strip != '1' + puts("You forgot to disable turbo: (note: sudo ./setup.sh will do this)") + puts(" sudo sh -c 'echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo'") + exit(-1) + end + end + + if !File.exist?('/sys/devices/system/cpu/intel_pstate/min_perf_pct') + return + end + + File.open('/sys/devices/system/cpu/intel_pstate/min_perf_pct', mode='r') do |file| + if file.read.strip != '100' + puts("You forgot to set the min perf percentage to 100: (note: sudo ./setup.sh will do this)") + puts(" sudo sh -c 'echo 100 > /sys/devices/system/cpu/intel_pstate/min_perf_pct'") + exit(-1) + end + end + end + + class BenchmarkList + attr_reader :yjit_bench_path + + def initialize(name_list:, yjit_bench_path:) + @name_list = name_list + @yjit_bench_path = File.expand_path(yjit_bench_path) + + bench_names = Dir.glob("*", base: "#{@yjit_bench_path}/benchmarks") + legal_bench_names = (bench_names + bench_names.map { |name| name.delete_suffix(".rb") }).uniq + @name_list.map! { |name| name.delete_suffix(".rb") } + + unknown_benchmarks = name_list - legal_bench_names + raise(RuntimeError.new("Unknown benchmarks: #{unknown_benchmarks.inspect}!")) if unknown_benchmarks.size > 0 + bench_names = @name_list if @name_list.size > 0 + raise "No testable benchmarks found!" if bench_names.empty? # This should presumably not happen after the "unknown" check + + @benchmark_script_by_name = {} + bench_names.each do |bench_name| + script_path = "#{@yjit_bench_path}/benchmarks/#{bench_name}" + + # Choose the first of these that exists + real_script_path = [script_path, script_path + ".rb", script_path + "/benchmark.rb"].detect { |path| File.exist?(path) && !File.directory?(path) } + raise "Could not find benchmark file starting from script path #{script_path.inspect}!" unless real_script_path + @benchmark_script_by_name[bench_name] = real_script_path + end + end + + # For now, benchmark_info returns a Hash. At some point it may want to get fancier. + def benchmark_info(name) + raise "Querying unknown benchmark name #{name.inspect}!" unless @benchmark_script_by_name[name] + { + name: name, + script_path: @benchmark_script_by_name[name], + } + end + + def to_a + @benchmark_script_by_name.keys.map { |name| benchmark_info(name) } + end + + # If we call .map, we'll pretend to be an array of benchmark_info hashes + def map + @benchmark_script_by_name.keys.map do |name| + yield benchmark_info(name) + end + end + end + + # Eventually we'd like to do fancy things with interesting settings. + # Before that, let's encapsulate the settings in a simple object so + # we can pass them around easily. + # + # Harness Settings are about how to sample the benchmark repeatedly - + # iteration counts, thresholds, etc. + class HarnessSettings + LEGAL_SETTINGS = [ :warmup_itrs, :min_benchmark_itrs, :min_benchmark_time ] + + def initialize(settings) + illegal_keys = settings.keys - LEGAL_SETTINGS + raise "Illegal settings given to HarnessSettings: #{illegal_keys.inspect}!" unless illegal_keys.empty? + @settings = settings + end + + def [](key) + @settings[key] + end + + def to_h + @settings + end + end + + # Shell Settings encapsulate how we run Ruby and the appropriate shellscript + # for each sampling run. That means which Ruby, which Ruby and shell options, + # what env vars to set, whether core dumps are enabled, what to do on error and more. + class ShellSettings + LEGAL_SETTINGS = [ :ruby_opts, :prefix, :chruby, :enable_core_dumps, :on_error, :bundler_version ] + + def initialize(settings) + illegal_keys = settings.keys - LEGAL_SETTINGS + raise "Illegal settings given to ShellSettings: #{illegal_keys.inspect}!" unless illegal_keys.empty? + @settings = settings + end + + def [](key) + @settings[key] + end + + def to_h + @settings + end + end + + # The yjit-metrics harness returns its data as a simple hash for that benchmark: + # + # { + # "times" => [ 2.3, 2.5, 2.7, 2.4, ...], # The benchmark returns times in seconds, not milliseconds + # "benchmark_metadata" => {...}, + # "ruby_metadata" => {...}, + # "yjit_stats" => {...}, # Note: yjit_stats may be empty, but is present. It's a hash, not an array. + # } + # + # This method returns a RunData struct. Note that only a single yjit stats + # hash is returned for all iterations combined, while times and warmups are + # arrays with sizes equal to the number of 'real' and warmup iterations, + # respectively. + # + # If on_error is specified it should be a proc that takes a hash. In case of + # an exception or a failing status returned by the harness script, + # that proc will be called with information about the error that occurred. + # If on_error raises (or re-raises) an exception then the benchmark run will + # stop. If no exception is raised, this method will collect no samples and + # will return nil. + def run_single_benchmark(benchmark_info, harness_settings:, shell_settings:, + run_script: proc { |s| run_harness_script_from_string(s) }) + + out_tempfile = Tempfile.new("yjit-metrics-single-run") + + env_vars = { + OUT_JSON_PATH: out_tempfile.path, + WARMUP_ITRS: harness_settings[:warmup_itrs], + MIN_BENCH_ITRS: harness_settings[:min_benchmark_itrs], + MIN_BENCH_TIME: harness_settings[:min_benchmark_time], + FORCE_BUNDLER_VERSION: shell_settings[:bundler_version], + } + + with_chruby = shell_settings[:chruby] + + script_template = ERB.new File.read(__dir__ + "/../metrics-harness/run_harness.sh.erb") + # These are used in the ERB template + template_settings = { + pre_benchmark_code: (with_chruby ? "chruby && chruby #{with_chruby}" : "") + "\n" + + (shell_settings[:enable_core_dumps] ? "ulimit -c unlimited" : ""), + pre_cmd: shell_settings[:prefix], + env_var_exports: env_vars.map { |key, val| "export #{key}='#{val}'" }.join("\n"), + ruby_opts: "-I#{HARNESS_PATH} " + shell_settings[:ruby_opts].map { |s| '"' + s + '"' }.join(" "), + script_path: benchmark_info[:script_path], + bundler_version: shell_settings[:bundler_version], + } + bench_script = script_template.result(binding) # Evaluate an Erb template with template_settings + + # Do the benchmarking + script_details = run_script.call(bench_script) + + if script_details[:failed] + # We shouldn't normally get a Ruby exception in the parent process. Instead the harness + # process fails and returns an exit status. We'll create an exception for the error + # handler to raise if it decides this is a fatal error. + result = ErrorData.new( + exit_status: script_details[:exit_status], + error: "Failure in benchmark test harness, exit status: #{script_details[:exit_status].inspect}", + summary: script_details[:stderr]&.lines&.detect { |l| l.match?(/\S/) }&.sub("#{Dir.pwd}", ".")&.strip, + ) + + STDERR.puts "-----" + STDERR.print bench_script + STDERR.puts "-----" + + if shell_settings[:on_error] + begin + # What should go in here? What should the interface be? Some things will + # be unavailable, depending what stage of the script got an error. + shell_settings[:on_error].call(script_details.merge({ + exception: result.error, + benchmark_name: benchmark_info[:name], + benchmark_path: benchmark_info[:script_path], + harness_settings: harness_settings.to_h, + shell_settings: shell_settings.to_h, + })) + rescue StandardError => error + result.error = error + end + end + + return result + end + + # Read the benchmark data + json_string_data = File.read out_tempfile.path + if json_string_data == "" + # The tempfile exists, so no read error... But no data returned. + raise "No error from benchmark, but no data was returned!" + end + single_bench_data = JSON.load(json_string_data) + obj = RunData.new(*JSON_RUN_FIELDS.map { |field| single_bench_data[field.to_s] }) + obj.yjit_stats = nil if obj.yjit_stats.nil? || obj.yjit_stats.empty? + + # Add per-benchmark metadata from this script to the data returned from the harness. + obj.benchmark_metadata.merge!({ + "benchmark_name" => benchmark_info[:name], + "benchmark_path" => benchmark_info[:script_path], + }) + + obj + ensure + if out_tempfile + out_tempfile.close + out_tempfile.unlink + end + end + + # This method combines run_data objects from multiple benchmark runs. + # + # It returns a benchmark data array of the following form: + # + # { + # "times" => { "yaml-load" => [[ 2.3, 2.5, 2.7, 2.4, ...],[...]] "psych" => [...] }, + # "warmups" => { "yaml-load" => [[ 2.3, 2.5, 2.7, 2.4, ...],[...]] "psych" => [...] }, + # "benchmark_metadata" => { "yaml-load" => {}, "psych" => { ... }, }, + # "ruby_metadata" => {...}, + # "yjit_stats" => { "yaml-load" => [{...}, {...}, ...] }, + # "peak_mem_bytes" => { "yaml-load" => [2343423, 2349341, ...], "psych" => [112234, ...], ... }, + # } + # + # For times, warmups, YJIT stats and benchmark metadata, that means there is a hash inside + # each top-level key for each benchmark name, e.g.: + # + # "times" => { "yaml-load" => [[ 2.3, 2.5, 2.7, 2.4, ...], [...], ...] } + # + # For times, warmups and YJIT stats that means the value of each hash value is an array. + # For times and warmups, the top-level array is the runs, and the sub-arrays are iterations + # in a single run. For YJIT stats, the top-level array is runs and the hash is the gathered + # YJIT stats for that run. + # + # If no valid data was successfully collected (e.g. a single benchmark was to run, but failed) + # then this method will return nil. + def merge_benchmark_data(all_run_data) + bench_data = { "version": 2 } + JSON_RUN_FIELDS.each { |f| bench_data[f.to_s] = {} } + + all_run_data.each do |run_data| + bench_name = run_data.benchmark_metadata["benchmark_name"] + + bench_data["times"][bench_name] ||= [] + bench_data["warmups"][bench_name] ||= [] + bench_data["yjit_stats"][bench_name] ||= [] + bench_data["peak_mem_bytes"][bench_name] ||= [] + bench_data["failures_before_success"][bench_name] ||= [] + + # Return times and warmups in milliseconds, not seconds + bench_data["times"][bench_name].push run_data.times_ms + bench_data["warmups"][bench_name].push run_data.warmups_ms + + bench_data["yjit_stats"][bench_name].push [run_data.yjit_stats] if run_data.yjit_stats + bench_data["peak_mem_bytes"][bench_name].push run_data.peak_mem_bytes + bench_data["failures_before_success"][bench_name].push run_data.failures_before_success + + # Benchmark metadata should be unique per-benchmark. In other words, + # we do *not* want to combine runs with different amounts of warmup, + # iterations, different env/gems, etc, into the same dataset. + bench_data["benchmark_metadata"][bench_name] ||= run_data.benchmark_metadata + if bench_data["benchmark_metadata"][bench_name] != run_data.benchmark_metadata + puts "#{bench_name} metadata 1: #{bench_data["benchmark_metadata"][bench_name].inspect}" + puts "#{bench_name} metadata 2: #{run_data.benchmark_metadata.inspect}" + puts "Benchmark metadata should not change for benchmark #{bench_name} in the same configuration!" + end + + # We don't save individual Ruby metadata for all benchmarks because it + # should be identical for all of them -- we use the same Ruby + # every time. Instead we save one copy of it, but we make sure + # on each subsequent benchmark that it returned exactly the same + # metadata about the Ruby version. + bench_data["ruby_metadata"] = run_data.ruby_metadata if bench_data["ruby_metadata"].empty? + if bench_data["ruby_metadata"] != run_data.ruby_metadata + puts "Ruby metadata 1: #{bench_data["ruby_metadata"].inspect}" + puts "Ruby metadata 2: #{run_data.ruby_metadata.inspect}" + raise "Ruby metadata should not change across a single set of benchmark runs in the same Ruby config!" + end + end + + # With error handlers it's possible that every benchmark had an error so there's no data to return. + return nil if bench_data["times"].empty? + + return bench_data + end +end diff --git a/lib/yjit-metrics/cli-interface.rb b/lib/yjit_metrics/cli.rb similarity index 94% rename from lib/yjit-metrics/cli-interface.rb rename to lib/yjit_metrics/cli.rb index 8798663ff..9ddf1a2a2 100644 --- a/lib/yjit-metrics/cli-interface.rb +++ b/lib/yjit_metrics/cli.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true module YJITMetrics module CLI def self.human_string_to_boolean(str) diff --git a/lib/yjit-metrics/continuous_reporting.rb b/lib/yjit_metrics/continuous_reporting.rb similarity index 97% rename from lib/yjit-metrics/continuous_reporting.rb rename to lib/yjit_metrics/continuous_reporting.rb index 9db063bdc..3916542f6 100644 --- a/lib/yjit-metrics/continuous_reporting.rb +++ b/lib/yjit_metrics/continuous_reporting.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require "fileutils" module YJITMetrics diff --git a/lib/yjit_metrics/defaults.rb b/lib/yjit_metrics/defaults.rb new file mode 100644 index 000000000..77aa5cb3b --- /dev/null +++ b/lib/yjit_metrics/defaults.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true +module YJITMetrics + # FIXME: Do we need this? + # Default settings for Benchmark CI. + # This is used by benchmark_and_update.rb for CI reporting directly. + # It's also used by the VariableWarmupReport when selecting appropriate + # benchmarking settings. This is only for the default yjit-bench benchmarks. + DEFAULT_YJIT_BENCH_CI_SETTINGS = { + # Config names and config-specific settings + "configs" => { + # Each config controls warmup individually. But the number of real iterations needs + # to match across all configs, so it's not set per-config. + "x86_64_yjit_stats" => { + max_warmup_itrs: 30, + }, + "x86_64_prod_ruby_no_jit" => { + max_warmup_itrs: 30, + }, + "x86_64_prod_ruby_with_yjit" => { + max_warmup_itrs: 30, + }, + "x86_64_prev_ruby_no_jit" => { + max_warmup_itrs: 30, + }, + "x86_64_prev_ruby_yjit" => { + max_warmup_itrs: 30, + }, + #"x86_64_prod_ruby_with_mjit" => { + # max_warmup_itrs: 75, + # max_warmup_time: 300, # in seconds; we try to let MJIT warm up "enough," but time and iters vary by workload + #}, + "aarch64_yjit_stats" => { + max_warmup_itrs: 30, + }, + "aarch64_prod_ruby_no_jit" => { + max_warmup_itrs: 30, + }, + "aarch64_prod_ruby_with_yjit" => { + max_warmup_itrs: 30, + }, + "aarch64_prev_ruby_no_jit" => { + max_warmup_itrs: 30, + }, + "aarch64_prev_ruby_yjit" => { + max_warmup_itrs: 30, + }, + }, + # Non-config-specific settings + "min_bench_itrs" => 15, + "min_bench_time" => 20, + "min_warmup_itrs" => 5, + "max_warmup_itrs" => 75, + "max_itr_time" => 8 * 3600, # Used to stop at 300 minutes to avoid GHActions 360 min cutoff. Now the 7pm run needs to not overlap the 6am run. + } +end diff --git a/lib/yjit_metrics/repo_management.rb b/lib/yjit_metrics/repo_management.rb new file mode 100644 index 000000000..7590d2244 --- /dev/null +++ b/lib/yjit_metrics/repo_management.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true +module YJITMetrics + module RepoManagement + def clone_repo_with(path:, git_url:, git_branch:, do_clean: true) + unless File.exist?(path) + check_call("git clone '#{git_url}' '#{path}'") + end + + chdir(path) do + if do_clean + check_call("git clean -d -f") + check_call("git checkout .") # There's a tendency to have local mods to Gemfile.lock -- get rid of those changes + check_call("git fetch") # Make sure we can see any new branches - "git checkout" can fail with a not-yet-seen branch + check_call("git checkout #{git_branch}") + if git_branch =~ /\A[0-9a-zA-Z]{5}/ + # Don't do a "git pull" on a raw SHA + else + check_call("git pull") + end + else + # If we're not cleaning, we should still make sure we're on the right branch + current_branch = `git rev-parse --abbrev-ref HEAD`.chomp + current_sha = `git rev-parse HEAD`.chomp + + # If the branch name doesn't match and we're not on the same specific SHA, check out the specified branch or revision + if current_branch != git_branch && !current_sha.start_with?(git_branch) + check_call("git fetch") # If we do a checkout, we need to fetch first to make sure we can see it + check_call("git checkout #{git_branch}") + end + end + end + end + + def clone_ruby_repo_with(path:, git_url:, git_branch:, config_opts:, config_env: [], install_to:) + clone_repo_with(path: path, git_url: git_url, git_branch: git_branch) + + chdir(path) do + config_opts += [ "--prefix=#{install_to}" ] + + unless File.exist?("./configure") + check_call("./autogen.sh") + end + + if !File.exist?("./config.status") + should_configure = true + else + # Right now this config check is brittle - if you give it a config_env containing quotes, for + # instance, it will tend to believe it needs to reconfigure. We cut out single-quotes + # because they've caused trouble, but a full fix might need to understand bash quoting. + config_status_output = check_output("./config.status --conf").gsub("'", "").split(" ").sort + desired_config = config_opts.sort.map { |s| s.gsub("'", "") } + config_env + if config_status_output != desired_config + puts "Configuration is wrong, reconfiguring..." + puts "Desired: #{desired_config.inspect}" + puts "Current: #{config_status_output.inspect}" + should_configure = true + end + end + + if should_configure + check_call("#{config_env.join(" ")} ./configure #{ config_opts.join(" ") }") + check_call("make clean") + end + + check_call("make -j16 install") + end + end + end +end diff --git a/lib/yjit_metrics/report.rb b/lib/yjit_metrics/report.rb new file mode 100644 index 000000000..defe2c004 --- /dev/null +++ b/lib/yjit_metrics/report.rb @@ -0,0 +1,106 @@ +# frozen_string_literal: true +require_relative "./stats" +require_relative "./theme" + +# Shared utility methods for reports that use a single "blob" of results +module YJITMetrics + class Report + Theme = YJITMetrics::Theme + + include YJITMetrics::Stats + + def self.subclasses + @subclasses ||= [] + @subclasses + end + + def self.inherited(subclass) + YJITMetrics::Report.subclasses.push(subclass) + end + + def self.report_name_hash + out = {} + + @subclasses.select { |s| s.respond_to?(:report_name) }.each do |subclass| + name = subclass.report_name + + raise "Duplicated report name: #{name.inspect}!" if out[name] + + out[name] = subclass + end + + out + end + + def initialize(config_names, results, benchmarks: []) + raise "No Rubies specified for report!" if config_names.empty? + + bad_configs = config_names - results.available_configs + raise "Unknown configurations in report: #{bad_configs.inspect}!" unless bad_configs.empty? + + @config_names = config_names + @only_benchmarks = benchmarks + @result_set = results + end + + # Child classes can accept params in this way. By default it's a no-op. + def set_extra_info(info) + @extra_info = info + end + + # Do we specifically recognize this extra field? Nope. Child classes can override. + def accepts_field(name) + false + end + + def filter_benchmark_names(names) + return names if @only_benchmarks.empty? + names.select { |bench_name| @only_benchmarks.any? { |bench_spec| bench_name.start_with?(bench_spec) } } + end + + # Take column headings, formats for the percent operator and data, and arrange it + # into a simple ASCII table returned as a string. + def format_as_table(headings, col_formats, data, separator_character: "-", column_spacer: " ") + out = "" + + unless data && data[0] && col_formats && col_formats[0] && headings && headings[0] + $stderr.puts "Error in format_as_table..." + $stderr.puts "Headings: #{headings.inspect}" + $stderr.puts "Col formats: #{col_formats.inspect}" + $stderr.puts "Data: #{data.inspect}" + raise "Invalid data sent to format_as_table" + end + + num_cols = data[0].length + raise "Mismatch between headings and first data row for number of columns!" unless headings.length == num_cols + raise "Data has variable number of columns!" unless data.all? { |row| row.length == num_cols } + raise "Column formats have wrong number of entries!" unless col_formats.length == num_cols + + formatted_data = data.map.with_index do |row, idx| + col_formats.zip(row).map { |fmt, item| item ? fmt % item : "" } + end + + col_widths = (0...num_cols).map { |col_num| (formatted_data.map { |row| row[col_num].length } + [ headings[col_num].length ]).max } + + out.concat(headings.map.with_index { |h, idx| "%#{col_widths[idx]}s" % h }.join(column_spacer), "\n") + + separator = col_widths.map { |width| separator_character * width }.join(column_spacer) + out.concat(separator, "\n") + + formatted_data.each do |row| + out.concat (row.map.with_index { |item, idx| " " * (col_widths[idx] - item.size) + item }).join(column_spacer), "\n" + end + + out.concat("\n", separator, "\n") + rescue + $stderr.puts "Error when trying to format table: #{headings.inspect} / #{col_formats.inspect} / #{data[0].inspect}" + raise + end + + def write_to_csv(filename, data) + CSV.open(filename, "wb") do |csv| + data.each { |row| csv << row } + end + end + end +end diff --git a/lib/yjit-metrics/report_templates/blog_memory_details.html.erb b/lib/yjit_metrics/report_templates/blog_memory_details.html.erb similarity index 100% rename from lib/yjit-metrics/report_templates/blog_memory_details.html.erb rename to lib/yjit_metrics/report_templates/blog_memory_details.html.erb diff --git a/lib/yjit-metrics/report_templates/blog_speed_details.html.erb b/lib/yjit_metrics/report_templates/blog_speed_details.html.erb similarity index 100% rename from lib/yjit-metrics/report_templates/blog_speed_details.html.erb rename to lib/yjit_metrics/report_templates/blog_speed_details.html.erb diff --git a/lib/yjit-metrics/report_templates/blog_speed_headline.html.erb b/lib/yjit_metrics/report_templates/blog_speed_headline.html.erb similarity index 100% rename from lib/yjit-metrics/report_templates/blog_speed_headline.html.erb rename to lib/yjit_metrics/report_templates/blog_speed_headline.html.erb diff --git a/lib/yjit-metrics/report_templates/blog_timeline_d3_template.html.erb b/lib/yjit_metrics/report_templates/blog_timeline_d3_template.html.erb similarity index 100% rename from lib/yjit-metrics/report_templates/blog_timeline_d3_template.html.erb rename to lib/yjit_metrics/report_templates/blog_timeline_d3_template.html.erb diff --git a/lib/yjit-metrics/report_templates/blog_timeline_data_template.js.erb b/lib/yjit_metrics/report_templates/blog_timeline_data_template.js.erb similarity index 100% rename from lib/yjit-metrics/report_templates/blog_timeline_data_template.js.erb rename to lib/yjit_metrics/report_templates/blog_timeline_data_template.js.erb diff --git a/lib/yjit-metrics/report_templates/blog_yjit_stats.html.erb b/lib/yjit_metrics/report_templates/blog_yjit_stats.html.erb similarity index 100% rename from lib/yjit-metrics/report_templates/blog_yjit_stats.html.erb rename to lib/yjit_metrics/report_templates/blog_yjit_stats.html.erb diff --git a/lib/yjit-metrics/report_templates/iteration_count.html.erb b/lib/yjit_metrics/report_templates/iteration_count.html.erb similarity index 100% rename from lib/yjit-metrics/report_templates/iteration_count.html.erb rename to lib/yjit_metrics/report_templates/iteration_count.html.erb diff --git a/lib/yjit-metrics/report_templates/memory_timeline_d3_template.html.erb b/lib/yjit_metrics/report_templates/memory_timeline_d3_template.html.erb similarity index 100% rename from lib/yjit-metrics/report_templates/memory_timeline_d3_template.html.erb rename to lib/yjit_metrics/report_templates/memory_timeline_d3_template.html.erb diff --git a/lib/yjit-metrics/report_templates/memory_timeline_data_template.js.erb b/lib/yjit_metrics/report_templates/memory_timeline_data_template.js.erb similarity index 100% rename from lib/yjit-metrics/report_templates/memory_timeline_data_template.js.erb rename to lib/yjit_metrics/report_templates/memory_timeline_data_template.js.erb diff --git a/lib/yjit-metrics/report_templates/mini_timeline_d3_template.html.erb b/lib/yjit_metrics/report_templates/mini_timeline_d3_template.html.erb similarity index 100% rename from lib/yjit-metrics/report_templates/mini_timeline_d3_template.html.erb rename to lib/yjit_metrics/report_templates/mini_timeline_d3_template.html.erb diff --git a/lib/yjit-metrics/report_templates/yjit_stats_exit.erb b/lib/yjit_metrics/report_templates/yjit_stats_exit.erb similarity index 100% rename from lib/yjit-metrics/report_templates/yjit_stats_exit.erb rename to lib/yjit_metrics/report_templates/yjit_stats_exit.erb diff --git a/lib/yjit-metrics/report_templates/yjit_stats_timeline_d3_template.html.erb b/lib/yjit_metrics/report_templates/yjit_stats_timeline_d3_template.html.erb similarity index 100% rename from lib/yjit-metrics/report_templates/yjit_stats_timeline_d3_template.html.erb rename to lib/yjit_metrics/report_templates/yjit_stats_timeline_d3_template.html.erb diff --git a/lib/yjit-metrics/report_templates/yjit_stats_timeline_data_template.js.erb b/lib/yjit_metrics/report_templates/yjit_stats_timeline_data_template.js.erb similarity index 100% rename from lib/yjit-metrics/report_templates/yjit_stats_timeline_data_template.js.erb rename to lib/yjit_metrics/report_templates/yjit_stats_timeline_data_template.js.erb diff --git a/lib/yjit_metrics/reports/blog_stats_exit_reports.rb b/lib/yjit_metrics/reports/blog_stats_exit_reports.rb new file mode 100644 index 000000000..32dea4352 --- /dev/null +++ b/lib/yjit_metrics/reports/blog_stats_exit_reports.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true +require_relative "./bloggable_single_report" + +module YJITMetrics + class BlogStatsExitReports < BloggableSingleReport + def self.report_name + "blog_exit_reports" + end + + def self.report_extensions + ["bench_list.txt"] + end + + def write_file(filename) + if @inactive + # Can't get stats? Write an empty file. + self.class.report_extensions.each do |ext| + File.open(filename + ".#{ext}", "w") { |f| f.write("") } + end + return + end + + @benchmark_names.each do |bench_name| + File.open("#{filename}.#{bench_name}.txt", "w") { |f| f.puts exit_report_for_benchmarks([bench_name]) } + end + + # This is a file with a known name that we can look for when generating. + File.open("#{filename}.bench_list.txt", "w") { |f| f.puts @benchmark_names.join("\n") } + end + end +end diff --git a/lib/yjit_metrics/reports/blog_yjit_stats_report.rb b/lib/yjit_metrics/reports/blog_yjit_stats_report.rb new file mode 100644 index 000000000..87bd8c531 --- /dev/null +++ b/lib/yjit_metrics/reports/blog_yjit_stats_report.rb @@ -0,0 +1,112 @@ +# frozen_string_literal: true +require_relative "./bloggable_single_report" + +# This report is to compare YJIT's speedup versus other Rubies for a single run or block of runs, +# with a single YJIT head-of-master. +module YJITMetrics + class BlogYJITStatsReport < BloggableSingleReport + def self.report_name + "blog_yjit_stats" + end + + def self.report_extensions + ["html"] + end + + def set_extra_info(info) + super + + if info[:timestamps] + @timestamps = info[:timestamps] + if @timestamps.size != 1 + raise "WE REQUIRE A SINGLE TIMESTAMP FOR THIS REPORT RIGHT NOW!" + end + @timestamp_str = @timestamps[0].strftime("%Y-%m-%d-%H%M%S") + end + end + + def initialize(config_names, results, benchmarks: []) + # Set up the parent class, look up relevant data + super + return if @inactive + + # This report can just run with one platform's data and everything's fine. + # The stats data should be basically identical on other platforms. + look_up_data_by_ruby only_platforms: results.platforms[0] + + # Sort benchmarks by headline/micro category, then alphabetically + @benchmark_names.sort_by! { |bench_name| + [ benchmark_category_index(bench_name), + bench_name ] } + + @headings_with_tooltips = { + "bench" => "Benchmark name", + "Exit Report" => "Link to a generated YJIT-stats-style exit report", + "Inline" => "Bytes of inlined code generated", + "Outlined" => "Bytes of outlined code generated", + "Comp iSeqs" => "Number of compiled iSeqs (methods)", + "Comp Blocks" => "Number of compiled blocks", + "Inval" => "Number of methods or blocks invalidated", + "Inval Ratio" => "Number of blocks invalidated over number of blocks compiled", + "Bind Alloc" => "Number of Ruby bindings allocated", + "Bind Set" => "Number of variables set via bindings", + "Const Bumps" => "Number of times Ruby clears its internal constant cache", + } + + # Col formats are only used when formatting entries for a text table, not for CSV + @col_formats = @headings_with_tooltips.keys.map { "%s" } + end + + # Listed on the details page + def details_report_table_data + @benchmark_names.map.with_index do |bench_name, idx| + bench_desc = ( BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:desc] ) || "(no description available)" + bench_desc = bench_desc.gsub('"' , """) + if BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:single_file] + bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}.rb" + else + bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}/benchmark.rb" + end + + exit_report_url = "/reports/benchmarks/blog_exit_reports_#{@timestamp_str}.#{bench_name}.txt" + + bench_stats = @yjit_stats[bench_name][0] + + fmt_inval_ratio = "?" + if bench_stats["invalidation_count"] && bench_stats["compiled_block_count"] + inval_ratio = bench_stats["invalidation_count"].to_f / bench_stats["compiled_block_count"] + fmt_inval_ratio = "%d%%" % (inval_ratio * 100.0).to_i + end + + [ "#{bench_name}", + "(click)", + bench_stats["inline_code_size"], + bench_stats["outlined_code_size"], + bench_stats["compiled_iseq_count"], + bench_stats["compiled_block_count"], + bench_stats["invalidation_count"], + fmt_inval_ratio, + bench_stats["binding_allocations"], + bench_stats["binding_set"], + bench_stats["constant_state_bumps"], + ] + + end + end + + def write_file(filename) + if @inactive + # Can't get stats? Write an empty file. + self.class.report_extensions.each do |ext| + File.open(filename + ".#{ext}", "w") { |f| f.write("") } + end + return + end + + # Memory details report, with tables and text descriptions + script_template = ERB.new File.read(__dir__ + "/../report_templates/blog_yjit_stats.html.erb") + html_output = script_template.result(binding) + File.open(filename + ".html", "w") { |f| f.write(html_output) } + end + end +end diff --git a/lib/yjit_metrics/reports/bloggable_single_report.rb b/lib/yjit_metrics/reports/bloggable_single_report.rb new file mode 100644 index 000000000..c64003648 --- /dev/null +++ b/lib/yjit_metrics/reports/bloggable_single_report.rb @@ -0,0 +1,244 @@ +# frozen_string_literal: true +require "yaml" + +require_relative "./yjit_stats_report" + +# For details-at-a-specific-time reports, we'll want to find individual configs and make sure everything is +# present and accounted for. This is a "single" report in the sense that it's conceptually at a single +# time, even though it can be multiple runs and Rubies. What it is *not* is results over time as YJIT and +# the benchmarks change. +module YJITMetrics + class BloggableSingleReport < YJITStatsReport + REPO_ROOT = File.expand_path("../../../..", __dir__) + + # Benchmarks sometimes go into multiple categories, based on the category field + BENCHMARK_METADATA = YAML.load_file(File.join(REPO_ROOT, "yjit-bench/benchmarks.yml")).map do |name, metadata| + [name, metadata.transform_keys(&:to_sym)] + end.to_h + + def headline_benchmarks + @benchmark_names.select { |bench| BENCHMARK_METADATA[bench] && BENCHMARK_METADATA[bench][:category] == "headline" } + end + + def micro_benchmarks + @benchmark_names.select { |bench| BENCHMARK_METADATA[bench] && BENCHMARK_METADATA[bench][:category] == "micro" } + end + + def benchmark_category_index(bench_name) + return 0 if BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:category] == "headline" + return 2 if BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:category] == "micro" + return 1 + end + + def exactly_one_config_with_name(configs, substring, description, none_okay: false) + matching_configs = configs.select { |name| name.include?(substring) } + + raise "We found more than one candidate #{description} config (#{matching_configs.inspect}) in this result set!" if matching_configs.size > 1 + raise "We didn't find any #{description} config among #{configs.inspect}!" if matching_configs.empty? && !none_okay + + matching_configs[0] + end + + # Include Truffle data only if we can find it, use MJIT 3.0 and/or 3.1 depending on what's available. + # YJIT and No-JIT are mandatory. + def look_up_data_by_ruby(only_platforms: YJITMetrics::PLATFORMS, in_runs: false) + only_platforms = [only_platforms].flatten + # Filter config names by given platform(s) + config_names = @config_names.select { |name| only_platforms.any? { |plat| name.include?(plat) } } + raise "No data files for platform(s) #{only_platforms.inspect} in #{@config_names}!" if config_names.empty? + + @with_yjit_config = exactly_one_config_with_name(config_names, "prod_ruby_with_yjit", "with-YJIT") + @prev_no_jit_config = exactly_one_config_with_name(config_names, "prev_ruby_no_jit", "prev-CRuby", none_okay: true) + @prev_yjit_config = exactly_one_config_with_name(config_names, "prev_ruby_yjit", "prev-YJIT", none_okay: true) + @with_mjit30_config = exactly_one_config_with_name(config_names, "ruby_30_with_mjit", "with-MJIT3.0", none_okay: true) + @with_mjit_latest_config = exactly_one_config_with_name(config_names, "prod_ruby_with_mjit", "with-MJIT", none_okay: true) + @no_jit_config = exactly_one_config_with_name(config_names, "prod_ruby_no_jit", "no-JIT") + @truffle_config = exactly_one_config_with_name(config_names, "truffleruby", "Truffle", none_okay: true) + + # Prefer previous CRuby if present otherwise current CRuby. + @baseline_config = @prev_no_jit_config || @no_jit_config + + # Order matters here - we push No-JIT, then MJIT(s), then YJIT and finally TruffleRuby when present + @configs_with_human_names = [ + ["CRuby ", @prev_no_jit_config], + ["CRuby ", @no_jit_config], + ["MJIT3.0", @with_mjit30_config], + ["MJIT", @with_mjit_latest_config], + ["YJIT ", @prev_yjit_config], + ["YJIT ", @with_yjit_config], + ["Truffle", @truffle_config], + ].map do |(name, config)| + [@result_set.insert_version_for_config(name, config), config] if config + end.compact + + # Grab relevant data from the ResultSet + @times_by_config = {} + @warmups_by_config = {} + @ruby_metadata_by_config = {} + @bench_metadata_by_config = {} + @peak_mem_by_config = {} + @yjit_stats = {} + @configs_with_human_names.map { |name, config| config }.each do |config| + @times_by_config[config] = @result_set.times_for_config_by_benchmark(config, in_runs: in_runs) + @warmups_by_config[config] = @result_set.warmups_for_config_by_benchmark(config, in_runs: in_runs) + @ruby_metadata_by_config[config] = @result_set.metadata_for_config(config) + @bench_metadata_by_config[config] = @result_set.benchmark_metadata_for_config_by_benchmark(config) + @peak_mem_by_config[config] = @result_set.peak_mem_bytes_for_config_by_benchmark(config) + end + + @yjit_stats = @result_set.yjit_stats_for_config_by_benchmark(@stats_config, in_runs: in_runs) + @benchmark_names = filter_benchmark_names(@times_by_config[@with_yjit_config].keys) + + @times_by_config.each do |config_name, config_results| + if config_results.nil? || config_results.empty? + raise("No results for configuration #{config_name.inspect} in #{self.class}!") + end + + no_result_benchmarks = @benchmark_names.select { |bench_name| config_results[bench_name].nil? || config_results[bench_name].empty? } + unless no_result_benchmarks.empty? + # We allow MJIT latest ONLY to have some benchmarks skipped... (empty is also fine) + if config_name == @with_mjit_latest_config + @mjit_is_incomplete = true + else + raise("No results in config #{config_name.inspect} for benchmark(s) #{no_result_benchmarks.inspect} in #{self.class}!") + end + end + end + + no_stats_benchmarks = @benchmark_names.select { |bench_name| !@yjit_stats[bench_name] || !@yjit_stats[bench_name][0] || @yjit_stats[bench_name][0].empty? } + unless no_stats_benchmarks.empty? + raise "No YJIT stats found for benchmarks: #{no_stats_benchmarks.inspect}" + end + end + + def calc_speed_stats_by_config + @mean_by_config = {} + @rsd_pct_by_config = {} + @speedup_by_config = {} + @total_time_by_config = {} + + @configs_with_human_names.map { |name, config| config }.each do |config| + @mean_by_config[config] = [] + @rsd_pct_by_config[config] = [] + @total_time_by_config[config] = 0.0 + @speedup_by_config[config] = [] + end + + @yjit_ratio = [] + + @benchmark_names.each do |benchmark_name| + @configs_with_human_names.each do |name, config| + this_config_times = @times_by_config[config][benchmark_name] + this_config_mean = mean_or_nil(this_config_times) # When nil? When a benchmark didn't happen for this config. + @mean_by_config[config].push this_config_mean + @total_time_by_config[config] += this_config_times.nil? ? 0.0 : sum(this_config_times) + this_config_rel_stddev_pct = rel_stddev_pct_or_nil(this_config_times) + @rsd_pct_by_config[config].push this_config_rel_stddev_pct + end + + baseline_mean = @mean_by_config[@baseline_config][-1] # Last pushed -- the one for this benchmark + baseline_rel_stddev_pct = @rsd_pct_by_config[@baseline_config][-1] + baseline_rel_stddev = baseline_rel_stddev_pct / 100.0 # Get ratio, not percent + @configs_with_human_names.each do |name, config| + this_config_mean = @mean_by_config[config][-1] + + if this_config_mean.nil? + @speedup_by_config[config].push [nil, nil] + else + this_config_rel_stddev_pct = @rsd_pct_by_config[config][-1] + # Use (baseline / this) so that the bar goes up as the value (test duration) goes down. + speed_ratio = baseline_mean / this_config_mean + + # For non-baseline we add the rsd for the config to the rsd + # for the baseline to determine the full variance bounds. + # For just the baseline we don't need to add anything. + speed_rsd = if config == @baseline_config + this_config_rel_stddev_pct + else + this_config_rel_stddev = this_config_rel_stddev_pct / 100.0 # Get ratio, not percent + # Because we are dividing the baseline mean by this mean + # to get a ratio we need to add the variance of each (the + # baseline and this config) to determine the full error bounds. + speed_rel_stddev = Math.sqrt(baseline_rel_stddev * baseline_rel_stddev + this_config_rel_stddev * this_config_rel_stddev) + speed_rel_stddev * 100.0 + end + + @speedup_by_config[config].push [speed_ratio, speed_rsd] + end + + end + + # A benchmark run may well return multiple sets of YJIT stats per benchmark name/type. + # For these calculations we just add all relevant counters together. + this_bench_stats = combined_stats_data_for_benchmarks([benchmark_name]) + + total_exits = total_exit_count(this_bench_stats) + retired_in_yjit = (this_bench_stats["exec_instruction"] || this_bench_stats["yjit_insns_count"]) - total_exits + total_insns_count = retired_in_yjit + this_bench_stats["vm_insns_count"] + yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count + @yjit_ratio.push yjit_ratio_pct + end + end + + def calc_mem_stats_by_config + @peak_mb_by_config = {} + @peak_mb_relative_by_config = {} + @configs_with_human_names.map { |name, config| config }.each do |config| + @peak_mb_by_config[config] = [] + @peak_mb_relative_by_config[config] = [] + end + @mem_overhead_factor_by_benchmark = [] + + @inline_mem_used = [] + @outline_mem_used = [] + + one_mib = 1024 * 1024.0 # As a float + + @benchmark_names.each.with_index do |benchmark_name, idx| + @configs_with_human_names.each do |name, config| + if @peak_mem_by_config[config][benchmark_name].nil? + @peak_mb_by_config[config].push nil + @peak_mb_relative_by_config[config].push [nil, nil] + else + this_config_bytes = mean(@peak_mem_by_config[config][benchmark_name]) + @peak_mb_by_config[config].push(this_config_bytes / one_mib) + end + end + + baseline_mean = @peak_mb_by_config[@baseline_config][-1] + baseline_rsd = rel_stddev(@peak_mem_by_config[@baseline_config][benchmark_name]) + @configs_with_human_names.each do |name, config| + if @peak_mem_by_config[config][benchmark_name].nil? + @peak_mb_relative_by_config[config].push [nil] + else + values = @peak_mem_by_config[config][benchmark_name] + this_config_mean_mb = mean(values) / one_mib + # For baseline use rsd. For other configs we need to add the baseline rsd to this rsd. + # (See comments for speedup calculations). + rsd = if config == @baseline_config + baseline_rsd + else + Math.sqrt(baseline_rsd ** 2 + rel_stddev(values) ** 2) + end + # Use (this / baseline) so that bar goes up as value (mem usage) of *this* goes up. + @peak_mb_relative_by_config[config].push [this_config_mean_mb / baseline_mean, rsd] + end + end + + # Here we use @with_yjit_config and @no_jit_config directly (not @baseline_config) + # to compare the memory difference of yjit vs no_jit on the same version. + + yjit_mem_usage = @peak_mem_by_config[@with_yjit_config][benchmark_name].sum + no_jit_mem_usage = @peak_mem_by_config[@no_jit_config][benchmark_name].sum + @mem_overhead_factor_by_benchmark[idx] = (yjit_mem_usage.to_f / no_jit_mem_usage) - 1.0 + + # Round MiB upward, even with a single byte used, since we crash if the block isn't allocated. + inline_mib = ((@yjit_stats[benchmark_name][0]["inline_code_size"] + (one_mib - 1))/one_mib).to_i + outline_mib = ((@yjit_stats[benchmark_name][0]["outlined_code_size"] + (one_mib - 1))/one_mib).to_i + + @inline_mem_used.push inline_mib + @outline_mem_used.push outline_mib + end + end + end +end diff --git a/lib/yjit_metrics/reports/iteration_count_report.rb b/lib/yjit_metrics/reports/iteration_count_report.rb new file mode 100644 index 000000000..c451c28dc --- /dev/null +++ b/lib/yjit_metrics/reports/iteration_count_report.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true +# Count up number of iterations and warmups for each Ruby and benchmark configuration. +# As we vary these, we need to make sure people can see what settings we're using for each Ruby. +module YJITMetrics + class IterationCountReport < BloggableSingleReport + def self.report_name + "iteration_count" + end + + def self.report_extensions + ["html"] + end + + def initialize(config_names, results, benchmarks: []) + # This report will only work with one platform at + # a time, so if we have yjit_stats for x86 prefer that one. + platform = "x86_64" + if results.configs_containing_full_yjit_stats.any? { |c| c.start_with?(platform) } + config_names = config_names.select { |c| c.start_with?(platform) } + else + platform = results.platforms.first + end + + # Set up the parent class, look up relevant data + super + + return if @inactive + + # This report can just run with one platform's data and everything's fine. + # The iteration counts should be identical on other platforms. + look_up_data_by_ruby only_platforms: [platform] + + # Sort benchmarks by headline/micro category, then alphabetically + @benchmark_names.sort_by! { |bench_name| + [ benchmark_category_index(bench_name), + bench_name ] } + + @headings = [ "bench" ] + + @configs_with_human_names.flat_map { |name, config| [ "#{name} warmups", "#{name} iters" ] } + # Col formats are only used when formatting entries for a text table, not for CSV + @col_formats = [ "%s" ] + # Benchmark name + [ "%d", "%d" ] * @configs_with_human_names.size # Iterations per-Ruby-config + end + + # Listed on the details page + def iterations_report_table_data + @benchmark_names.map do |bench_name| + [ bench_name ] + + @configs_with_human_names.flat_map do |_, config| + if @times_by_config[config][bench_name] + [ + @warmups_by_config[config][bench_name].size, + @times_by_config[config][bench_name].size, + ] + else + # If we didn't run this benchmark for this config, we'd like the columns to be blank. + [ nil, nil ] + end + end + end + end + + def write_file(filename) + if @inactive + # Can't get stats? Write an empty file. + self.class.report_extensions.each do |ext| + File.open(filename + ".#{ext}", "w") { |f| f.write("") } + end + return + end + + # Memory details report, with tables and text descriptions + script_template = ERB.new File.read(__dir__ + "/../report_templates/iteration_count.html.erb") + html_output = script_template.result(binding) + File.open(filename + ".html", "w") { |f| f.write(html_output) } + end + end +end diff --git a/lib/yjit_metrics/reports/memory_details_multi_platform_report.rb b/lib/yjit_metrics/reports/memory_details_multi_platform_report.rb new file mode 100644 index 000000000..267e1f67b --- /dev/null +++ b/lib/yjit_metrics/reports/memory_details_multi_platform_report.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true +require_relative "./memory_details_report" +require_relative "./speed_details_multi_platform_report" + +module YJITMetrics + class MemoryDetailsMultiPlatformReport < SpeedDetailsMultiPlatformReport + def self.report_name + "blog_memory_details" + end + + def self.single_report_class + ::YJITMetrics::MemoryDetailsReport + end + end +end diff --git a/lib/yjit_metrics/reports/memory_details_report.rb b/lib/yjit_metrics/reports/memory_details_report.rb new file mode 100644 index 000000000..921920080 --- /dev/null +++ b/lib/yjit_metrics/reports/memory_details_report.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true +require_relative "./speed_details_report" + +# This report is to compare YJIT's memory usage versus other Rubies for a single run or block of runs, +# with a single YJIT head-of-master. +module YJITMetrics + class MemoryDetailsReport < SpeedDetailsReport + # This report requires a platform name and can't be auto-instantiated by basic_report.rb. + # Instead, its child report(s) can instantiate it for a specific platform. + #def self.report_name + # "blog_memory_details" + #end + + def self.report_extensions + [ "html", "svg", "head.svg", "back.svg", "micro.svg", "tripwires.json", "csv" ] + end + + def initialize(config_names, results, platform:, benchmarks: []) + unless YJITMetrics::PLATFORMS.include?(platform) + raise "Invalid platform for #{self.class.name}: #{platform.inspect}!" + end + @platform = platform + + # Set up the parent class, look up relevant data + # Permit non-same-platform stats config + config_names = config_names.select { |name| name.start_with?(platform) || name.include?("yjit_stats") } + # FIXME: Drop the platform: platform when we stop inheriting from SpeedDetailsReport. + super(config_names, results, platform: platform, benchmarks: benchmarks) + return if @inactive + + look_up_data_by_ruby + + # Sort benchmarks by headline/micro category, then alphabetically + @benchmark_names.sort_by! { |bench_name| + [ benchmark_category_index(bench_name), + #-@yjit_stats[bench_name][0]["compiled_iseq_count"], + bench_name ] } + + @headings = [ "bench" ] + + @configs_with_human_names.map { |name, config| "#{name} mem (MiB)"} + + [ "Inline Code", "Outlined Code", "YJIT Mem overhead" ] + #@configs_with_human_names.flat_map { |name, config| config == @baseline_config ? [] : [ "#{name} mem ratio" ] } + # Col formats are only used when formatting entries for a text table, not for CSV + @col_formats = [ "%s" ] + # Benchmark name + [ "%d" ] * @configs_with_human_names.size + # Mem usage per-Ruby + [ "%d", "%d", "%.1f%%" ] # YJIT mem breakdown + #[ "%.2fx" ] * (@configs_with_human_names.size - 1) # Mem ratio per-Ruby + + calc_mem_stats_by_config + end + + # Printed to console + def report_table_data + @benchmark_names.map.with_index do |bench_name, idx| + [ bench_name ] + + @configs_with_human_names.map { |name, config| @peak_mb_by_config[config][idx] } + + [ @inline_mem_used[idx], @outline_mem_used[idx] ] + #[ "#{"%d" % (@peak_mb_by_config[@with_yjit_config][idx] - 256)} + #{@inline_mem_used[idx]}/128 + #{@outline_mem_used[idx]}/128" ] + end + end + + # Listed on the details page + def details_report_table_data + @benchmark_names.map.with_index do |bench_name, idx| + bench_desc = ( BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:desc] ) || "(no description available)" + if BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:single_file] + bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}.rb" + else + bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}/benchmark.rb" + end + [ "#{bench_name}" ] + + @configs_with_human_names.map { |name, config| @peak_mb_by_config[config][idx] } + + [ @inline_mem_used[idx], @outline_mem_used[idx], @mem_overhead_factor_by_benchmark[idx] * 100.0 ] + #[ "#{"%d" % (@peak_mb_by_config[@with_yjit_config][idx] - 256)} + #{@inline_mem_used[idx]}/128 + #{@outline_mem_used[idx]}/128" ] + end + end + + def to_s + # This is just used to print the table to the console + format_as_table(@headings, @col_formats, report_table_data) + + "\nMemory usage is in MiB (mebibytes,) rounded. Ratio is versus interpreted baseline CRuby.\n" + end + + def html_template_path + File.expand_path("../report_templates/blog_memory_details.html.erb", __dir__) + end + + def relative_values_by_config_and_benchmark + @peak_mb_relative_by_config + end + + # FIXME: We aren't reporting on the tripwires currently, but it makes sense to implement it and report on it. + def tripwires + {} + end + end +end diff --git a/lib/yjit_metrics/reports/per_bench_ruby_comparison_report.rb b/lib/yjit_metrics/reports/per_bench_ruby_comparison_report.rb new file mode 100644 index 000000000..f082e7d58 --- /dev/null +++ b/lib/yjit_metrics/reports/per_bench_ruby_comparison_report.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true +require_relative "../report" + +# We'd like to be able to create a quick columnar report, often for one +# Ruby config versus another, and load/dump it as JSON or CSV. This isn't a +# report class that is all things to all people -- it's specifically +# a comparison of two or more configurations per-benchmark for yjit-bench. +# +# The first configuration given is assumed to be the baseline against +# which the other configs are measured. +module YJITMetrics + class PerBenchRubyComparisonRepor < Report + def self.report_name + "per_bench_compare" + end + + def initialize(config_names, results, benchmarks: []) + super + + @headings = [ "bench" ] + config_names.flat_map { |config| [ "#{config}", "RSD" ] } + alt_configs.map { |config| "#{config}/#{base_config}" } + @col_formats = [ "%s" ] + config_names.flat_map { [ "%.1fms", "%.1f%%" ] } + alt_configs.map { "%.2f" } + + @report_data = [] + times_by_config = {} + config_names.each { |config| times_by_config[config] = results.times_for_config_by_benchmark(config) } + + benchmark_names = times_by_config[config_names[0]].keys + + times_by_config.each do |config_name, results| + raise("No results for configuration #{config_name.inspect} in PerBenchRubyComparison!") if results.nil? + end + + benchmark_names.each do |benchmark_name| + # Only run benchmarks if there is no list of "only run these" benchmarks, or if the benchmark name starts with one of the list elements + unless @only_benchmarks.empty? + next unless @only_benchmarks.any? { |bench_spec| benchmark_name.start_with?(bench_spec) } + end + row = [ benchmark_name ] + config_names.each do |config| + unless times_by_config[config][benchmark_name] + raise("Configuration #{config.inspect} has no results for #{benchmark_name.inspect} even though #{config_names[0]} does in the same dataset!") + end + config_times = times_by_config[config][benchmark_name] + config_mean = mean(config_times) + row.push config_mean + row.push 100.0 * stddev(config_times) / config_mean + end + + base_config_mean = mean(times_by_config[base_config][benchmark_name]) + alt_configs.each do |config| + config_mean = mean(times_by_config[config][benchmark_name]) + row.push config_mean / base_config_mean + end + + @report_data.push row + end + end + + def base_config + @config_names[0] + end + + def alt_configs + @config_names[1..-1] + end + + def to_s + format_as_table(@headings, @col_formats, @report_data) + config_legend_text + end + + def config_legend_text + [ + "", + "Legend:", + alt_configs.map do |config| + "- #{config}/#{base_config}: ratio of mean(#{config} times)/mean(#{base_config} times). >1 means #{base_config} is faster." + end, + "RSD is relative standard deviation (percent).", + "" + ].join("\n") + end + end +end diff --git a/lib/yjit_metrics/reports/speed_details_multi_platform_report.rb b/lib/yjit_metrics/reports/speed_details_multi_platform_report.rb new file mode 100644 index 000000000..c77e43ca7 --- /dev/null +++ b/lib/yjit_metrics/reports/speed_details_multi_platform_report.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true +require_relative "../report" +require_relative "./speed_details_report" + +module YJITMetrics + class SpeedDetailsMultiPlatformReport < Report + def self.report_name + "blog_speed_details" + end + + def self.single_report_class + ::YJITMetrics::SpeedDetailsReport + end + + # Report-extensions tries to be data-agnostic. That doesn't work very well here. + # It turns out that the platforms in the result set determine a lot of the + # files we generate. So we approximate by generating (sometimes-empty) indicator + # files. That way we still rebuild all the platform-specific files if they have + # been removed or a new type is added. + def self.report_extensions + single_report_class.report_extensions + end + + def initialize(config_names, results, benchmarks: []) + # We need to instantiate N sub-reports for N platforms + @platforms = results.platforms + @sub_reports = {} + @platforms.each do |platform| + platform_config_names = config_names.select { |name| name.start_with?(platform) } + + # If we can't find a config with stats for this platform, is there one in x86_64? + unless platform_config_names.detect { |config| config.include?("yjit_stats") } + x86_stats_config = config_names.detect { |config| config.start_with?("x86_64") && config.include?("yjit_stats") } + puts "Can't find #{platform} stats config, falling back to using x86_64 stats" + platform_config_names << x86_stats_config if x86_stats_config + end + + raise("Can't find a stats config for this platform in #{config_names.inspect}!") if platform_config_names.empty? + @sub_reports[platform] = self.class.single_report_class.new(platform_config_names, results, platform: platform, benchmarks: benchmarks) + if @sub_reports[platform].inactive + puts "Platform config names: #{platform_config_names.inspect}" + puts "All config names: #{config_names.inspect}" + raise "Unable to produce stats-capable report for platform #{platform.inspect} in SpeedDetailsMultiplatformReport!" + end + end + end + + def write_file(filename) + # First, write out per-platform reports + @sub_reports.values.each do |report| + # Each sub-report will add the platform name for itself + report.write_file(filename) + end + + # extensions: + + # For each of these types, we'll just include for each platform and we can switch display + # in the Jekyll site. They exist, but there's no combined multiplatform version. + # We'll create an empty 'tracker' file for the combined version. + self.class.report_extensions.each do |ext| + outfile = "#{filename}.#{ext}" + File.open(outfile, "w") { |f| f.write("") } + end + end + end +end diff --git a/lib/yjit_metrics/reports/speed_details_report.rb b/lib/yjit_metrics/reports/speed_details_report.rb new file mode 100644 index 000000000..4076997d6 --- /dev/null +++ b/lib/yjit_metrics/reports/speed_details_report.rb @@ -0,0 +1,502 @@ +# frozen_string_literal: true +require "victor" + +require_relative "./bloggable_single_report" + +module YJITMetrics + # This report is to compare YJIT's speedup versus other Rubies for a single run or block of runs, + # with a single YJIT head-of-master. + class SpeedDetailsReport < BloggableSingleReport + # This report requires a platform name and can't be auto-instantiated by basic_report.rb. + # Instead, its child report(s) can instantiate it for a specific platform. + #def self.report_name + # "blog_speed_details" + #end + + def self.report_extensions + [ "html", "svg", "head.svg", "back.svg", "micro.svg", "tripwires.json", "csv" ] + end + + def initialize(orig_config_names, results, platform:, benchmarks: []) + # Dumb hack for subclasses until we refactor everything. + return super(orig_config_names, results, benchmarks: benchmarks) unless self.class == YJITMetrics::SpeedDetailsReport + + unless YJITMetrics::PLATFORMS.include?(platform) + raise "Invalid platform for #{self.class.name}: #{platform.inspect}!" + end + @platform = platform + + # Permit non-same-platform stats config + config_names = orig_config_names.select { |name| name.start_with?(platform) || name.include?("yjit_stats") } + raise("Can't find any stats configuration in #{orig_config_names.inspect}!") if config_names.empty? + + # Set up the parent class, look up relevant data + super(config_names, results, benchmarks: benchmarks) + return if @inactive # Can't get stats? Bail out. + + look_up_data_by_ruby + + # Sort benchmarks by headline/micro category, then alphabetically + @benchmark_names.sort_by! { |bench_name| [ benchmark_category_index(bench_name), bench_name ] } + + @headings = [ "bench" ] + + @configs_with_human_names.flat_map { |name, config| [ "#{name} (ms)", "#{name} RSD" ] } + + @configs_with_human_names.flat_map { |name, config| config == @baseline_config ? [] : [ "#{name} spd", "#{name} spd RSD" ] } + + [ "% in YJIT" ] + + # Col formats are only used when formatting entries for a text table, not for CSV + @col_formats = [ "%s" ] + # Benchmark name + [ "%.1f", "%.2f%%" ] * @configs_with_human_names.size + # Mean and RSD per-Ruby + [ "%.2fx", "%.2f%%" ] * (@configs_with_human_names.size - 1) + # Speedups per-Ruby + [ "%.2f%%" ] # YJIT ratio + + @col_formats[13] = "%.2fx" # Boldface the YJIT speedup column. + + calc_speed_stats_by_config + end + + # Printed to console + def report_table_data + @benchmark_names.map.with_index do |bench_name, idx| + [ bench_name ] + + @configs_with_human_names.flat_map { |name, config| [ @mean_by_config[config][idx], @rsd_pct_by_config[config][idx] ] } + + @configs_with_human_names.flat_map { |name, config| config == @baseline_config ? [] : @speedup_by_config[config][idx] } + + [ @yjit_ratio[idx] ] + end + end + + # Listed on the details page + def details_report_table_data + @benchmark_names.map.with_index do |bench_name, idx| + bench_desc = ( BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:desc] ) || "(no description available)" + + if BENCHMARK_METADATA[bench_name] && BENCHMARK_METADATA[bench_name][:single_file] + bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}.rb" + else + bench_url = "https://github.com/Shopify/yjit-bench/blob/main/benchmarks/#{bench_name}/benchmark.rb" + end + + [ "#{bench_name}" ] + + @configs_with_human_names.flat_map { |name, config| [ @mean_by_config[config][idx], @rsd_pct_by_config[config][idx] ] } + + @configs_with_human_names.flat_map { |name, config| config == @baseline_config ? [] : @speedup_by_config[config][idx] } + + [ @yjit_ratio[idx] ] + end + end + + def to_s + # This is just used to print the table to the console + format_as_table(@headings, @col_formats, report_table_data) + + "\nRSD is relative standard deviation (stddev / mean), expressed as a percent.\n" + + "Spd is the speed (iters/second) of the optimised implementation -- 2.0x would be twice as many iters per second.\n" + end + + # For the SVG, we calculate ratios from 0 to 1 for how far across the graph area a coordinate is. + # Then we convert them here to the actual size of the graph. + def ratio_to_x(ratio) + (ratio * 1000).to_s + end + + def ratio_to_y(ratio) + (ratio * 600.0).to_s + end + + def svg_object(relative_values_by_config_and_benchmark, benchmarks: @benchmark_names) + svg = Victor::SVG.new :template => :minimal, + :viewBox => "0 0 1000 600", + :xmlns => "http://www.w3.org/2000/svg", + "xmlns:xlink" => "http://www.w3.org/1999/xlink" # background: '#ddd' + + # Reserve some width on the left for the axis. Include a bit of right-side whitespace. + left_axis_width = 0.05 + right_whitespace = 0.01 + + # Reserve some height for the legend and bottom height for x-axis labels + bottom_key_height = 0.17 + top_whitespace = 0.05 + + plot_left_edge = left_axis_width + plot_top_edge = top_whitespace + plot_bottom_edge = 1.0 - bottom_key_height + plot_width = 1.0 - left_axis_width - right_whitespace + plot_height = 1.0 - bottom_key_height - top_whitespace + plot_right_edge = 1.0 - right_whitespace + + svg.rect x: ratio_to_x(plot_left_edge), y: ratio_to_y(plot_top_edge), + width: ratio_to_x(plot_width), height: ratio_to_y(plot_height), + stroke: Theme.axis_color, + fill: Theme.background_color + + + # Basic info on Ruby configs and benchmarks + ruby_configs = @configs_with_human_names.map { |name, config| config } + ruby_human_names = @configs_with_human_names.map(&:first) + ruby_config_bar_colour = Hash[ruby_configs.zip(Theme.bar_chart_colors)] + baseline_colour = ruby_config_bar_colour[@baseline_config] + baseline_strokewidth = 2 + n_configs = ruby_configs.size + n_benchmarks = benchmarks.size + + + # How high do ratios go? + max_value = benchmarks.map do |bench_name| + bench_idx = @benchmark_names.index(bench_name) + relative_values_by_config_and_benchmark.values.map { |by_bench| by_bench[bench_idx][0] }.compact.max + end.max + + if max_value.nil? + $stderr.puts "Error finding Y axis. Benchmarks: #{benchmarks.inspect}." + $stderr.puts "data: #{relative_values_by_config_and_benchmark.inspect}" + raise "Error finding axis Y scale for benchmarks: #{benchmarks.inspect}" + end + + # Now let's calculate some widths... + + # Within each benchmark's horizontal span we'll want 3 or 4 bars plus a bit of whitespace. + # And we'll reserve 5% of the plot's width for whitespace on the far left and again on the far right. + plot_padding_ratio = 0.05 + plot_effective_width = plot_width * (1.0 - 2 * plot_padding_ratio) + plot_effective_left = plot_left_edge + plot_width * plot_padding_ratio + + # And some heights... + plot_top_whitespace = 0.15 * plot_height + plot_effective_top = plot_top_edge + plot_top_whitespace + plot_effective_height = plot_height - plot_top_whitespace + + # Add axis markers down the left side + tick_length = 0.008 + font_size = "small" + # This is the largest power-of-10 multiple of the no-JIT mean that we'd see on the axis. Often it's 1 (ten to the zero.) + largest_power_of_10 = 10.0 ** Math.log10(max_value).to_i + # Let's get some nice even numbers for possible distances between ticks + candidate_division_values = + [ largest_power_of_10 * 5, largest_power_of_10 * 2, largest_power_of_10, largest_power_of_10 / 2, largest_power_of_10 / 5, + largest_power_of_10 / 10, largest_power_of_10 / 20 ] + # We'll try to show between about 4 and 10 ticks along the axis, at nice even-numbered spots. + division_value = candidate_division_values.detect do |div_value| + divs_shown = (max_value / div_value).to_i + divs_shown >= 4 && divs_shown <= 10 + end + raise "Error figuring out axis scale with max ratio: #{max_value.inspect} (pow10: #{largest_power_of_10.inspect})!" if division_value.nil? + division_ratio_per_value = plot_effective_height / max_value + + # Now find all the y-axis tick locations + divisions = [] + cur_div = 0.0 + loop do + divisions.push cur_div + cur_div += division_value + break if cur_div > max_value + end + + divisions.each do |div_value| + tick_distance_from_zero = div_value / max_value + tick_y = plot_effective_top + (1.0 - tick_distance_from_zero) * plot_effective_height + svg.line x1: ratio_to_x(plot_left_edge - tick_length), y1: ratio_to_y(tick_y), + x2: ratio_to_x(plot_left_edge), y2: ratio_to_y(tick_y), + stroke: Theme.axis_color + svg.text ("%.1f" % div_value), + x: ratio_to_x(plot_left_edge - 3 * tick_length), y: ratio_to_y(tick_y), + text_anchor: "end", + font_weight: "bold", + font_size: font_size, + fill: Theme.text_color + end + + # Set up the top legend with coloured boxes and Ruby config names + top_legend_box_height = 0.032 + top_legend_box_width = 0.12 + text_height = 0.015 + + top_legend_item_width = plot_effective_width / n_configs + n_configs.times do |config_idx| + item_center_x = plot_effective_left + top_legend_item_width * (config_idx + 0.5) + item_center_y = plot_top_edge + 0.025 + legend_text_color = Theme.text_on_bar_color + if @configs_with_human_names[config_idx][1] == @baseline_config + legend_text_color = Theme.axis_color + left = item_center_x - 0.5 * top_legend_box_width + y = item_center_y - 0.5 * top_legend_box_height + top_legend_box_height + svg.line \ + x1: ratio_to_x(left), + y1: ratio_to_y(y), + x2: ratio_to_x(left + top_legend_box_width), + y2: ratio_to_y(y), + stroke: baseline_colour, + "stroke-width": 2 + else + svg.rect \ + x: ratio_to_x(item_center_x - 0.5 * top_legend_box_width), + y: ratio_to_y(item_center_y - 0.5 * top_legend_box_height), + width: ratio_to_x(top_legend_box_width), + height: ratio_to_y(top_legend_box_height), + fill: ruby_config_bar_colour[ruby_configs[config_idx]], + **Theme.legend_box_attrs + end + svg.text @configs_with_human_names[config_idx][0], + x: ratio_to_x(item_center_x), + y: ratio_to_y(item_center_y + 0.5 * text_height), + font_size: font_size, + text_anchor: "middle", + font_weight: "bold", + fill: legend_text_color, + **(legend_text_color == Theme.text_on_bar_color ? Theme.legend_text_attrs : {}) + end + + baseline_y = plot_effective_top + (1.0 - (1.0 / max_value)) * plot_effective_height + + bar_data = [] + + # Okay. Now let's plot a lot of boxes and whiskers. + benchmarks.each.with_index do |bench_name, bench_short_idx| + bar_data << {label: bench_name.delete_suffix('.rb'), bars: []} + bench_idx = @benchmark_names.index(bench_name) + + ruby_configs.each.with_index do |config, config_idx| + human_name = ruby_human_names[config_idx] + + relative_value, rsd_pct = relative_values_by_config_and_benchmark[config][bench_idx] + + if config == @baseline_config + # Sanity check. + raise "Unexpected relative value for baseline config" if relative_value != 1.0 + end + + # If relative_value is nil, there's no such benchmark in this specific case. + if relative_value != nil + rsd_ratio = rsd_pct / 100.0 + bar_height_ratio = relative_value / max_value + + # The calculated number is rel stddev and is scaled by bar height. + stddev_ratio = bar_height_ratio * rsd_ratio + + tooltip_text = "#{"%.2f" % relative_value}x baseline (#{human_name})" + + if config == @baseline_config + next + end + + bar_data.last[:bars] << { + value: bar_height_ratio, + fill: ruby_config_bar_colour[config], + label: sprintf("%.2f", relative_value), + tooltip: tooltip_text, + stddev_ratio: stddev_ratio, + } + end + end + end + + geomeans = ruby_configs.each_with_object({}) do |config, h| + next unless relative_values_by_config_and_benchmark[config] + values = benchmarks.map { |bench| relative_values_by_config_and_benchmark[config][ @benchmark_names.index(bench) ]&.first }.compact + h[config] = geomean(values) + end + + bar_data << { + label: "geomean*", + label_attrs: {font_style: "italic"}, + bars: ruby_configs.map.with_index do |config, index| + next if config == @baseline_config + value = geomeans[config] + { + value: value / max_value, + fill: ruby_config_bar_colour[config], + label: sprintf("%.2f", value), + tooltip: sprintf("%.2fx baseline (%s)", value, ruby_human_names[index]), + } + end.compact, + } + + # Determine bar width by counting the bars and adding the number of groups + # for bar-sized space before each group, plus one for the right side of the graph. + num_groups = bar_data.size + bar_width = plot_width / (num_groups + bar_data.map { |x| x[:bars].size }.sum + 1) + + bar_labels = [] + + # Start at the y-axis. + left = plot_left_edge + bar_data.each.with_index do |data, group_index| + data[:bars].each.with_index do |bar, bar_index| + # Move position one width over to place this bar. + left += bar_width + + bar_left = left + bar_center = bar_left + 0.5 * bar_width + bar_right = bar_left + bar_width + bar_top = plot_effective_top + (1.0 - bar[:value]) * plot_effective_height + bar_height = bar[:value] * plot_effective_height + + svg.rect \ + x: ratio_to_x(bar_left), + y: ratio_to_y(bar_top), + width: ratio_to_x(bar_width), + height: ratio_to_y(bar_height), + fill: bar[:fill], + data_tooltip: bar[:tooltip] + + if bar[:label] + bar_labels << { + x: bar_left + 0.002, + y: bar_top - 0.0125, + text: bar[:label], + } + end + + if bar[:stddev_ratio]&.nonzero? + # Whiskers should be centered around the top of the bar, at a distance of one stddev. + stddev_top = bar_top - bar[:stddev_ratio] * plot_effective_height + stddev_bottom = bar_top + bar[:stddev_ratio] * plot_effective_height + + svg.line \ + x1: ratio_to_x(bar_left), + y1: ratio_to_y(stddev_top), + x2: ratio_to_x(bar_right), + y2: ratio_to_y(stddev_top), + **Theme.stddev_marker_attrs + svg.line \ + x1: ratio_to_x(bar_left), + y1: ratio_to_y(stddev_bottom), + x2: ratio_to_x(bar_right), + y2: ratio_to_y(stddev_bottom), + **Theme.stddev_marker_attrs + svg.line \ + x1: ratio_to_x(bar_center), + y1: ratio_to_y(stddev_top), + x2: ratio_to_x(bar_center), + y2: ratio_to_y(stddev_bottom), + **Theme.stddev_marker_attrs + end + end + + # Place a tick on the x-axis in the middle of the group and print label. + group_right = left + bar_width + group_left = (group_right - (bar_width * data[:bars].size)) + middle = group_left + (group_right - group_left) / 2 + svg.line \ + x1: ratio_to_x(middle), + y1: ratio_to_y(plot_bottom_edge), + x2: ratio_to_x(middle), + y2: ratio_to_y(plot_bottom_edge + tick_length), + stroke: Theme.axis_color + + text_end_x = middle + text_end_y = plot_bottom_edge + tick_length * 3 + svg.text data[:label], + x: ratio_to_x(text_end_x), + y: ratio_to_y(text_end_y), + fill: Theme.text_color, + font_size: font_size, + text_anchor: "end", + transform: "rotate(-60, #{ratio_to_x(text_end_x)}, #{ratio_to_y(text_end_y)})", + **data.fetch(:label_attrs, {}) + + # After a group of bars leave the space of one bar width before the next group. + left += bar_width + end + + # Horizontal line for baseline of CRuby at 1.0. + svg.line x1: ratio_to_x(plot_left_edge), y1: ratio_to_y(baseline_y), x2: ratio_to_x(plot_right_edge), y2: ratio_to_y(baseline_y), stroke: baseline_colour, "stroke-width": baseline_strokewidth + + # Do value labels last so that they are above bars, variance whiskers, etc. + bar_labels.each do |label| + font_size = "0.5em" # xx-small is equivalent to 9px or 0.5625em at the default browser font size. + label_text_height = text_height * 0.8 + text_length = 0.0175 + transform = "rotate(-60, #{ratio_to_x(label[:x] + (bar_width * 0.5))}, #{ratio_to_y(label[:y])})" + + svg.rect \ + x: ratio_to_x(label[:x] - text_length * 0.01), + y: ratio_to_y(label[:y] - 0.925 * label_text_height), + width: ratio_to_x(text_length * 1.02), + height: ratio_to_y(label_text_height), + transform: transform, + **Theme.bar_text_background_attrs + + svg.text label[:text], + x: ratio_to_x(label[:x]), + y: ratio_to_y(label[:y]), + fill: Theme.text_color, + font_size: font_size, + text_anchor: "start", + textLength: ratio_to_x(text_length), + transform: transform, + **Theme.bar_text_attrs + end + + svg + end + + def tripwires + tripwires = {} + micro = micro_benchmarks + @benchmark_names.each_with_index do |bench_name, idx| + tripwires[bench_name] = { + mean: @mean_by_config[@with_yjit_config][idx], + rsd_pct: @rsd_pct_by_config[@with_yjit_config][idx], + micro: micro.include?(bench_name), + } + end + tripwires + end + + def html_template_path + File.expand_path("../report_templates/blog_speed_details.html.erb", __dir__) + end + + def relative_values_by_config_and_benchmark + @speedup_by_config + end + + def write_file(filename) + if @inactive + # Can't get stats? Write an empty file. + self.class.report_extensions.each do |ext| + File.open(filename + ".#{@platform}.#{ext}", "w") { |f| f.write("") } + end + + return + end + + head_bench = headline_benchmarks + micro_bench = micro_benchmarks + back_bench = @benchmark_names - head_bench - micro_bench + + if head_bench.empty? + puts "Warning: when writing file #{filename.inspect}, headlining benchmark list is empty!" + end + if micro_bench.empty? + puts "Warning: when writing file #{filename.inspect}, micro benchmark list is empty!" + end + if back_bench.empty? + puts "Warning: when writing file #{filename.inspect}, miscellaneous benchmark list is empty!" + end + + [ + [ @benchmark_names, ".svg" ], + [ head_bench, ".head.svg" ], + [ micro_bench, ".micro.svg" ], + [ back_bench, ".back.svg" ], + ].each do |bench_names, extension| + if bench_names.empty? + contents = "" + else + contents = svg_object(relative_values_by_config_and_benchmark, benchmarks: bench_names).render + end + + File.open(filename + "." + @platform + extension, "w") { |f| f.write(contents) } + end + + # First the 'regular' details report, with tables and text descriptions + script_template = ERB.new File.read(html_template_path) + html_output = script_template.result(binding) + File.open(filename + ".#{@platform}.html", "w") { |f| f.write(html_output) } + + # The Tripwire report is used to tell when benchmark performance drops suddenly + json_data = tripwires + File.open(filename + ".#{@platform}.tripwires.json", "w") { |f| f.write JSON.pretty_generate json_data } + + write_to_csv(filename + ".#{@platform}.csv", [@headings] + report_table_data) + end + end +end diff --git a/lib/yjit_metrics/reports/speed_headline_report.rb b/lib/yjit_metrics/reports/speed_headline_report.rb new file mode 100644 index 000000000..bcaa185a5 --- /dev/null +++ b/lib/yjit_metrics/reports/speed_headline_report.rb @@ -0,0 +1,127 @@ +# frozen_string_literal: true +require_relative "./bloggable_single_report" + +# This very small report is to give the quick headlines and summary for a YJIT comparison. +module YJITMetrics + class SpeedHeadlineReport < BloggableSingleReport + def self.report_name + "blog_speed_headline" + end + + def self.report_extensions + ["html"] + end + + def format_speedup(ratio) + if ratio >= 1.01 + "%.1f%% faster than" % ((ratio - 1.0) * 100) + elsif ratio < 0.99 + "%.1f%% slower than" % ((1.0 - ratio) * 100) + else + "the same speed as" + end + end + + def platforms + @result_set.platforms + end + + def yjit_bench_file_url(path) + "https://github.com/Shopify/yjit-bench/blob/#{@result_set.full_run_info&.dig("git_versions", "yjit_bench") || "main"}/#{path}" + end + + def ruby_version(config) + @result_set.ruby_version_for_config(config) + end + + X86_ONLY = ENV['ALLOW_ARM_ONLY_REPORTS'] != '1' + + def initialize(config_names, results, benchmarks: []) + # Give the headline data for x86 processors, not ARM64. + # No x86 data? Then no headline. + x86_configs = config_names.select { |name| name.include?("x86_64") } + if x86_configs.empty? + if X86_ONLY + @no_data = true + puts "WARNING: no x86_64 data for data: #{config_names.inspect}" + return + end + else + config_names = x86_configs + end + + # Set up the parent class, look up relevant data + super + return if @inactive # Can't get stats? Bail out. + + platform = "x86_64" + if !X86_ONLY && !results.platforms.include?(platform) + platform = results.platforms[0] + end + look_up_data_by_ruby(only_platforms: [platform]) + + # Report the headlining speed comparisons versus current prerelease MJIT if available, or fall back to MJIT + if @mjit_is_incomplete + @with_mjit_config = @with_mjit30_config + else + @with_mjit_config = @with_mjit_latest_config || @with_mjit30_config + end + @mjit_name = "MJIT" + @mjit_name = "MJIT (3.0)" if @with_mjit_config == @with_mjit30_config + + # Sort benchmarks by headline/micro category, then alphabetically + @benchmark_names.sort_by! { |bench_name| + [ benchmark_category_index(bench_name), + #-@yjit_stats[bench_name][0]["compiled_iseq_count"], + bench_name ] } + + calc_speed_stats_by_config + + # For these ratios we compare current yjit and no_jit directly (not @baseline_config). + + # "Ratio of total times" method + #@yjit_vs_cruby_ratio = @total_time_by_config[@no_jit_config] / @total_time_by_config[@with_yjit_config] + + headline_runtimes = headline_benchmarks.map do |bench_name| + bench_idx = @benchmark_names.index(bench_name) + + bench_no_jit_mean = @mean_by_config[@no_jit_config][bench_idx] + bench_yjit_mean = @mean_by_config[@with_yjit_config][bench_idx] + prev_yjit_mean = @mean_by_config.dig(@prev_yjit_config, bench_idx) + + [ bench_yjit_mean, bench_no_jit_mean, prev_yjit_mean ] + end + # Geometric mean of headlining benchmarks only + @yjit_vs_cruby_ratio = geomean headline_runtimes.map { |yjit_mean, no_jit_mean, _| no_jit_mean / yjit_mean } + + if @prev_yjit_config + @yjit_vs_prev_yjit_ratio = geomean headline_runtimes.map { |yjit_mean, _, prev_yjit| prev_yjit / yjit_mean } + end + + @railsbench_idx = @benchmark_names.index("railsbench") + if @railsbench_idx + @yjit_vs_cruby_railsbench_ratio = @mean_by_config[@no_jit_config][@railsbench_idx] / @mean_by_config[@with_yjit_config][@railsbench_idx] + @yjit_vs_prev_yjit_railsbench_ratio = @mean_by_config[@prev_yjit_config][@railsbench_idx] / @mean_by_config[@with_yjit_config][@railsbench_idx] + end + end + + def to_s + return "(This run had no x86 results)" if @no_data + script_template = ERB.new File.read(__dir__ + "/../report_templates/blog_speed_headline.html.erb") + script_template.result(binding) # Evaluate an Erb template with template_settings + end + + def write_file(filename) + if @inactive || @no_data + # Can't get stats? Write an empty file. + self.class.report_extensions.each do |ext| + File.open(filename + ".#{ext}", "w") { |f| f.write("") } + end + return + end + + html_output = self.to_s + File.open(filename + ".html", "w") { |f| f.write(html_output) } + end + end +end diff --git a/lib/yjit_metrics/reports/total_to_iter_report.rb b/lib/yjit_metrics/reports/total_to_iter_report.rb new file mode 100644 index 000000000..f18f9ae2b --- /dev/null +++ b/lib/yjit_metrics/reports/total_to_iter_report.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true +require_relative "../report" + +module YJITMetrics + # This is intended to show the total time taken to get to + # a particular iteration, to help understand warmup + class TotalToIterReport < Report + def self.report_name + "total_to_iter" + end + + def initialize(config_names, results, benchmarks: []) + raise "Not yet updated for multi-platform!" + + super + + @headings_by_config = {} + @col_formats_by_config = {} + @report_data_by_config = {} + + @config_names.each do |config| + times = @result_set.times_for_config_by_benchmark(config, in_runs: true) + warmups = @result_set.warmups_for_config_by_benchmark(config, in_runs: true) + + # Combine times and warmups for each run, for each benchmark + all_iters = {} + times.keys.each do |benchmark_name| + all_iters[benchmark_name] = warmups[benchmark_name].zip(times[benchmark_name]).map { |warmups, real_iters| warmups + real_iters } + end + + benchmark_names = filter_benchmark_names(times.keys) + raise "No benchmarks found for config #{config.inspect}!" if benchmark_names.empty? + max_num_runs = benchmark_names.map { |bn| times[bn].size }.max + + # For every benchmark, check the fewest iterations/run. + min_iters_per_benchmark = benchmark_names.map { |bn| all_iters[bn].map { |run| run.size }.min } + + most_cols_of_benchmarks = min_iters_per_benchmark.max + + showcased_iters = [1, 5, 10, 50, 100, 200, 500, 1000, 5000, 10_000, 50_000, 100_000].select { |i| i <= most_cols_of_benchmarks } + + @headings_by_config[config] = + [ "bench", "samples" ] + + showcased_iters.map { |iter| "iter ##{iter}" } + + showcased_iters.map { |iter| "RSD ##{iter}" } + @col_formats_by_config[config] = + [ "%s", "%d" ] + + showcased_iters.map { "%.1fms" } + + showcased_iters.map { "%.2f%%" } + @report_data_by_config[config] = [] + + benchmark_names.each do |benchmark_name| + # We assume that for each config/benchmark combo we have the same number of warmup runs as timed runs + all_runs = all_iters[benchmark_name] + num_runs = all_runs.size + min_iters = all_runs.map { |run| run.size }.min + + iters_present = showcased_iters.select { |i| i <= min_iters } + end_nils = [nil] * (showcased_iters.size - iters_present.size) + + iter_N_mean = [] + iter_N_rsd = [] + + iters_present.each do |iter_num| + # For this report, we want the *total* non-harness time to get to an iteration number + iter_series = all_runs.map { |run| (0..(iter_num - 1)).map { |idx| run[idx] }.sum } + iter_N_mean.push mean(iter_series) + iter_N_rsd.push rel_stddev_pct(iter_series) + end + + @report_data_by_config[config].push([benchmark_name, num_runs] + iter_N_mean + end_nils + iter_N_rsd + end_nils) + end + end + end + + def to_s + output = "" + + @config_names.each do |config_name| + output.concat("Total Time to Iteration N for #{config_name.capitalize}:\n\n") + + output.concat(format_as_table(@headings_by_config[config_name], + @col_formats_by_config[config_name], + @report_data_by_config[config_name])) + + output.concat("Each iteration is a set of samples of that iteration in a series.\n") + output.concat("RSD is relative standard deviation - the standard deviation divided by the mean of the series.\n") + output.concat("Samples is the number of runs (samples taken) for each specific iteration number.\n") + output.concat("\n\n") + end + + output + end + + def write_file(filename) + @config_names.each do |config_name| + headings = @headings_by_config[config_name] + report_data = @report_data_by_config[config_name] + write_to_csv("#{filename}_#{config_name}.csv", [headings] + report_data) + end + end + end +end diff --git a/lib/yjit_metrics/reports/variable_warmup_report.rb b/lib/yjit_metrics/reports/variable_warmup_report.rb new file mode 100644 index 000000000..b8dd59a6d --- /dev/null +++ b/lib/yjit_metrics/reports/variable_warmup_report.rb @@ -0,0 +1,186 @@ +# frozen_string_literal: true +require_relative "../report" + +# And here is where we get into... cleverness :-/ + +# This report intends to look over the most recent results for a specific benchmark and Ruby configuration +# and determine how much warmup is really required or useful. Where possible we should be a bit conservative +# and run additional warmups, and we should check to see if we might be inadequately warming up a particular +# combination. + +# We don't want to let warmup or number of iterations get so high that we run over the GitHub Actions +# maximum job duration. + +module YJITMetrics + class VariableWarmupReport < Report + def self.report_name + "variable_warmup" + end + + def self.report_extensions + "warmup_settings.json" + end + + # The internal state of these is huge - reduce the size of debug output when calling a bad + # method... + def inspect + "VariableWarmupReport<#{self.object_id}>" + end + + CORRELATION_THRESHOLD = 0.1 + + def look_up_data_by_ruby + # Order matters here - we push No-JIT, then MJIT(s), then YJIT. For each one we sort by platform name. + # It matters because we want the output reports to be stable with no churn in Git. + bench_configs = YJITMetrics::DEFAULT_YJIT_BENCH_CI_SETTINGS["configs"] + configs = @result_set.config_names + config_order = [] + config_order += configs.select { |c| c["prev_ruby_no_jit"] }.sort # optional + config_order += configs.select { |c| c["prod_ruby_no_jit"] }.sort + config_order += configs.select { |c| c["prod_ruby_with_mjit"] }.sort # MJIT is optional, may be empty + config_order += configs.select { |c| c["prev_ruby_yjit"] }.sort # optional + config_order += configs.select { |c| c["prod_ruby_with_yjit"] }.sort + config_order += configs.select { |c| c["yjit_stats"] }.sort # Stats configs *also* take time to run + @configs_with_human_names = @result_set.configs_with_human_names(config_order) + + # Grab relevant data from the ResultSet + @warmups_by_config = {} + @times_by_config = {} + @iters_by_config = {} + @ruby_metadata_by_config = {} + @bench_metadata_by_config = {} + + @configs_with_human_names.map { |name, config| config }.each do |config| + @warmups_by_config[config] = @result_set.warmups_for_config_by_benchmark(config, in_runs: true) + @times_by_config[config] = @result_set.times_for_config_by_benchmark(config, in_runs: true) + + @warmups_by_config[config].keys.each do |bench_name| + @iters_by_config[config] ||= {} + # For each run, add its warmups to its timed iterations in a single array. + runs = @warmups_by_config[config][bench_name].zip(@times_by_config[config][bench_name]).map { |a, b| a + b } + @iters_by_config[config][bench_name] = runs + end + + @ruby_metadata_by_config[config] = @result_set.metadata_for_config(config) + @bench_metadata_by_config[config] = @result_set.benchmark_metadata_for_config_by_benchmark(config) + end + + all_bench_names = @times_by_config[config_order[-1]].keys + @benchmark_names = filter_benchmark_names(all_bench_names) + + @times_by_config.each do |config_name, config_results| + if config_results.nil? || config_results.empty? + raise("No results for configuration #{config_name.inspect} in #{self.class}!") + end + end + end + + def initialize(config_names, results, + default_yjit_bench_settings: ::YJITMetrics::DEFAULT_YJIT_BENCH_CI_SETTINGS, benchmarks: []) + + # Set up the parent class, look up relevant data + super(config_names, results, benchmarks: benchmarks) + + @default_yjit_bench_settings = default_yjit_bench_settings + + look_up_data_by_ruby + end + + # Figure out how many iterations, warmup and non-, for each Ruby config and benchmark + def iterations_for_configs_and_benchmarks(default_settings) + # Note: default_configs are config *roots*, not full configurations + default_configs = default_settings["configs"].keys.sort + + warmup_settings = default_configs.to_h do |config| + [ config, @benchmark_names.to_h do |bench_name| + [ bench_name, + { + # Conservative defaults - sometimes these are for Ruby configs we know nothing about, + # because they're not present in recent-at-the-time benchmark data. + warmup_itrs: default_settings["min_warmup_itrs"], + min_bench_itrs: default_settings["min_bench_itrs"], + min_bench_time: 0, + } + ] + end + ] + end + + @benchmark_names.each do |bench_name| + idx = @benchmark_names.index(bench_name) + + # Number of iterations is chosen per-benchmark, but stays the same across all configs. + # Find the fastest mean iteration across all configs. + summary = @result_set.summary_by_config_and_benchmark + fastest_itr_time_ms = default_configs.map do |config| + summary.dig(config, bench_name, "mean") + end.compact.min || 10_000_000.0 + + min_itrs_needed = (default_settings["min_bench_time"] * 1000.0 / fastest_itr_time_ms).to_i + min_itrs_needed = [ min_itrs_needed, default_settings["min_bench_itrs"] ].max + + default_configs.each do |config| + config_settings = default_settings["configs"][config] + + itr_time_ms = summary.dig(config, bench_name, "mean") + ws = warmup_settings[config][bench_name] + raise "No warmup settings found for #{config.inspect}/#{bench_name.inspect}!" if ws.nil? + + ws[:min_bench_itrs] = min_itrs_needed + + # Do we have an estimate of how long this takes per iteration? If so, include it. + ws[:itr_time_ms] = ("%.2f" % [ws[:itr_time_ms], itr_time_ms].compact.max) unless itr_time_ms.nil? + + # Warmup is chosen per-config to reduce unneeded warmup for low-warmup configs + ws[:warmup_itrs] = config_settings[:max_warmup_itrs] + if config_settings[:max_warmup_time] && itr_time_ms + # itr_time_ms is in milliseconds, while max_warmup_time is in seconds + max_allowed_warmup = config_settings[:max_warmup_time] * 1000.0 / itr_time_ms + # Choose the tighter of the two warmup limits + ws[:warmup_itrs] = max_allowed_warmup if ws[:warmup_itrs] > max_allowed_warmup + end + + if itr_time_ms + itrs = ws[:warmup_itrs] + ws[:min_bench_itrs] + est_time_ms = itrs * (itr_time_ms || 0.0) + ws[:estimated_time] = ((est_time_ms + 999.0) / 1000).to_i # Round up for elapsed time + else + ws[:estimated_time] = 0 unless ws[:estimated_time] + end + #puts "Est time #{config.inspect} #{bench_name.inspect}: #{itrs} * #{"%.1f" % (itr_time_ms || 0.0)}ms = #{ws[:estimated_time].inspect}sec" + end + end + + platform_configs = {} + @configs_with_human_names.values.each do |config| + config_platform = YJITMetrics::PLATFORMS.detect { |platform| config.start_with?(platform) } + platform_configs[config_platform] ||= [] + platform_configs[config_platform] << config + end + + # How much total time have we allocated to running benchmarks per platform? + platform_configs.each do |platform, configs| + est_time = configs.map do |config| + warmup_settings[config].values.map { |s| s[:estimated_time] || 0.0 }.sum + end.sum + warmup_settings["#{platform}_estimated_time"] = est_time + + # Do we need to reduce the time taken? + if est_time > default_settings["max_itr_time"] + warn "Maximum allowed time: #{default_settings["max_itr_time"].inspect}sec" + warn "Estimated run time on #{platform}: #{est_time.inspect}sec" + warn "This is where logic to do something statistical and clever would go!" + end + end + + warmup_settings + end + + def write_file(filename) + settings = iterations_for_configs_and_benchmarks(@default_yjit_bench_settings) + + puts "Writing file: #{filename}.warmup_settings.json" + File.open(filename + ".warmup_settings.json", "w") { |f| f.puts JSON.pretty_generate settings } + end + end +end diff --git a/lib/yjit_metrics/reports/warmup_report.rb b/lib/yjit_metrics/reports/warmup_report.rb new file mode 100644 index 000000000..a4bd26acf --- /dev/null +++ b/lib/yjit_metrics/reports/warmup_report.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true +require_relative "../report" + +# This is intended to be a simple warmup report, showing how long it takes +# one or more Ruby implementations to reach full performance, per-benchmark. +module YJITMetrics + class WarmupReport < Report + def self.report_name + "warmup" + end + + def initialize(config_names, results, benchmarks: []) + raise "Not yet updated for multi-platform!" + + super + + @headings_by_config = {} + @col_formats_by_config = {} + @report_data_by_config = {} + + @config_names.each do |config| + times = @result_set.times_for_config_by_benchmark(config, in_runs: true) + warmups = @result_set.warmups_for_config_by_benchmark(config, in_runs: true) + + # Combine times and warmups for each run, for each benchmark + all_iters = {} + times.keys.each do |benchmark_name| + all_iters[benchmark_name] = warmups[benchmark_name].zip(times[benchmark_name]).map { |warmups, real_iters| warmups + real_iters } + end + + benchmark_names = filter_benchmark_names(times.keys) + raise "No benchmarks found for config #{config.inspect}!" if benchmark_names.empty? + + # For every benchmark, check the fewest iterations/run. + min_iters_per_benchmark = benchmark_names.map { |bn| all_iters[bn].map { |run| run.size }.min } + + most_cols_of_benchmarks = min_iters_per_benchmark.max + + showcased_iters = [1, 5, 10, 50, 100, 200, 500, 1000, 5000, 10_000, 50_000, 100_000].select { |i| i <= most_cols_of_benchmarks } + + @headings_by_config[config] = + [ "bench", "samples" ] + + showcased_iters.map { |iter| "iter ##{iter}" } + + showcased_iters.map { |iter| "RSD ##{iter}" } + @col_formats_by_config[config] = + [ "%s", "%d" ] + + showcased_iters.map { "%.1fms" } + + showcased_iters.map { "%.2f%%" } + @report_data_by_config[config] = [] + + benchmark_names.each do |benchmark_name| + # We assume that for each config/benchmark combo we have the same number of warmup runs as timed runs + all_runs = all_iters[benchmark_name] + num_runs = all_runs.size + min_iters = all_runs.map { |run| run.size }.min + + iters_present = showcased_iters.select { |i| i <= min_iters } + end_nils = [nil] * (showcased_iters.size - iters_present.size) + + iter_N_mean = [] + iter_N_rsd = [] + + iters_present.each do |iter_num| + iter_series = all_runs.map { |run| run[iter_num - 1] } # Human-readable "iteration #10" is array index 9 + iter_N_mean.push mean(iter_series) + iter_N_rsd.push rel_stddev_pct(iter_series) + end + + @report_data_by_config[config].push([benchmark_name, num_runs] + iter_N_mean + end_nils + iter_N_rsd + end_nils) + end + end + end + + def to_s + output = "" + + @config_names.each do |config_name| + output.concat("Warmup for #{config_name.capitalize}:\n\n") + + output.concat(format_as_table(@headings_by_config[config_name], + @col_formats_by_config[config_name], + @report_data_by_config[config_name])) + + output.concat("Each iteration is a set of samples of that iteration in a series.\n") + output.concat("RSD is relative standard deviation - the standard deviation divided by the mean of the series.\n") + output.concat("Samples is the number of runs (samples taken) for each specific iteration number.\n") + output.concat("\n\n") + end + + output + end + + def write_file(filename) + @config_names.each do |config_name| + headings = @headings_by_config[config_name] + report_data = @report_data_by_config[config_name] + write_to_csv("#{filename}_#{config_name}.csv", [headings] + report_data) + end + end + end +end diff --git a/lib/yjit_metrics/reports/yjit_stats_exit_report.rb b/lib/yjit_metrics/reports/yjit_stats_exit_report.rb new file mode 100644 index 000000000..daf86751f --- /dev/null +++ b/lib/yjit_metrics/reports/yjit_stats_exit_report.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true +require_relative "./yjit_stats_report" + +module YJITMetrics + # This is intended to match the exit report printed by debug YJIT when stats are turned on. + # Note that this is somewhat complex to keep up to date. We don't store literal YJIT exit + # reports. In fact, exit reports are often meant to mimic a situation that never existed, + # where multiple runs are combined and then a hypothetical exit report is printed for them. + # So we don't store a real, literal exit report, which sometimes never happened. + # + # Instead we periodically update the logic and templates for the exit reports to match + # the current YJIT stats data. Keep in mind that older YJIT stats data often has different + # stats -- including renamed stats, or stats not collected for years, etc. So that means + # the code for exit reports may need to be more robust than the code from YJIT, which + # only has to deal with stats from its own exact YJIT version. + # + # Despite that, the logic here intentionally follows the structure of YJIT's own exit + # reports so that it's not too difficult to update. Make sure to rebuild all the old + # exit reports when you update this to ensure that you don't have any that crash because + # of missing or renamed stats. + class YJITStatsExitReport < YJITStatsReport + def self.report_name + "yjit_stats_default" + end + + def to_s + exit_report_for_benchmarks(@benchmark_names) + end + + def write_file(filename) + text_output = self.to_s + File.open(filename + ".txt", "w") { |f| f.write(text_output) } + end + end +end diff --git a/lib/yjit_metrics/reports/yjit_stats_multi_ruby_report.rb b/lib/yjit_metrics/reports/yjit_stats_multi_ruby_report.rb new file mode 100644 index 000000000..1baf564d8 --- /dev/null +++ b/lib/yjit_metrics/reports/yjit_stats_multi_ruby_report.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true +require_relative "./yjit_stats_report" + +module YJITMetrics + # Note: this is now unused in normal operation, but is still in unit tests for reporting. + # This report is to compare YJIT's time-in-JIT versus its speedup for various benchmarks. + class YJITStatsMultiRubyReport < YJITStatsReport + def self.report_name + "yjit_stats_multi" + end + + def initialize(config_names, results, benchmarks: []) + # Set up the YJIT stats parent class + super + + # We've figured out which config is the YJIT stats. Now which one is production stats with YJIT turned on? + alt_configs = config_names - [ @stats_config ] + with_yjit_configs = alt_configs.select { |name| name.end_with?("prod_ruby_with_yjit") } + raise "We found more than one candidate with-YJIT config (#{with_yjit_configs.inspect}) in this result set!" if with_yjit_configs.size > 1 + raise "We didn't find any config that looked like a with-YJIT config among #{config_names.inspect}!" if with_yjit_configs.empty? + @with_yjit_config = with_yjit_configs[0] + + alt_configs -= with_yjit_configs + no_yjit_configs = alt_configs.select { |name| name.end_with?("prod_ruby_no_jit") } + raise "We found more than one candidate no-YJIT config (#{no_yjit_configs.inspect}) in this result set!" if no_yjit_configs.size > 1 + raise "We didn't find any config that looked like a no-YJIT config among #{config_names.inspect}!" if no_yjit_configs.empty? + @no_yjit_config = no_yjit_configs[0] + + # Let's calculate some report data + times_by_config = {} + [ @with_yjit_config, @no_yjit_config ].each { |config| times_by_config[config] = results.times_for_config_by_benchmark(config) } + @headings = [ "bench", @with_yjit_config + " (ms)", "speedup (%)", "% in YJIT" ] + @col_formats = [ "%s", "%.1f", "%.2f", "%.2f" ] + + @benchmark_names = filter_benchmark_names(times_by_config[@no_yjit_config].keys) + + times_by_config.each do |config_name, results| + raise("No results for configuration #{config_name.inspect} in PerBenchRubyComparison!") if results.nil? || results.empty? + end + + stats = results.yjit_stats_for_config_by_benchmark(@stats_config) + + @report_data = @benchmark_names.map do |benchmark_name| + no_yjit_config_times = times_by_config[@no_yjit_config][benchmark_name] + no_yjit_mean = mean(no_yjit_config_times) + with_yjit_config_times = times_by_config[@with_yjit_config][benchmark_name] + with_yjit_mean = mean(with_yjit_config_times) + yjit_ratio = no_yjit_mean / with_yjit_mean + yjit_speedup_pct = (yjit_ratio - 1.0) * 100.0 + + # A benchmark run may well return multiple sets of YJIT stats per benchmark name/type. + # For these calculations we just add all relevant counters together. + this_bench_stats = combined_stats_data_for_benchmarks([benchmark_name]) + + side_exits = total_exit_count(this_bench_stats) + retired_in_yjit = (this_bench_stats["exec_instruction"] || this_bench_stats["yjit_insns_count"]) - side_exits + total_insns_count = retired_in_yjit + this_bench_stats["vm_insns_count"] + yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count + + [ benchmark_name, with_yjit_mean, yjit_speedup_pct, yjit_ratio_pct ] + end + end + + def to_s + format_as_table(@headings, @col_formats, @report_data) + end + + def write_file(filename) + text_output = self.to_s + File.open(filename + ".txt", "w") { |f| f.write(text_output) } + end + end +end diff --git a/lib/yjit_metrics/reports/yjit_stats_report.rb b/lib/yjit_metrics/reports/yjit_stats_report.rb new file mode 100644 index 000000000..3c7f61344 --- /dev/null +++ b/lib/yjit_metrics/reports/yjit_stats_report.rb @@ -0,0 +1,182 @@ +# frozen_string_literal: true +require_relative "../report" + +module YJITMetrics + class YJITStatsReport < Report + attr_reader :stats_config + + # The report only runs on benchmarks that match the ones specified *and* that are present in + # the data files. This is that final list of benchmarks. + attr_reader :benchmark_names + + # If we can't get stats data, we can't usefully run this report. + attr_reader :inactive + + def initialize(stats_configs, results, benchmarks: []) + raise("No stats configs given for YJIT stats report!") if stats_configs.empty? + + super + + bad_configs = stats_configs - results.available_configs + raise "Unknown configurations in report: #{bad_configs.inspect}!" unless bad_configs.empty? + + # Take the specified reporting configurations and filter by which ones contain YJIT stats. The result should + # be a single configuration to report on. + filtered_stats_configs = results.configs_containing_full_yjit_stats & stats_configs + + # The surrounding code seems to be from before we started running multiple platforms, + # so if that's what we have (multiple platforms) just limit to one so we can get the report. + if filtered_stats_configs.size > 1 + # If the configs are the same but for different platforms, pick one. + # This regexp should be a constant but when this file is loaded + # the PLATFORMS constant hasn't been defined yet. + if filtered_stats_configs.map { |c| c.sub(/^#{Regexp.union(YJITMetrics::PLATFORMS)}_/, '') }.uniq.size == 1 + x86 = filtered_stats_configs.select { |c| c.start_with?("x86_64") } + filtered_stats_configs = x86 unless x86.empty? + end + end + + @inactive = false + if filtered_stats_configs.empty? + puts "We didn't find any config with YJIT stats among #{stats_configs.inspect}!" if filtered_stats_configs.empty? + @inactive = true + return + elsif filtered_stats_configs.size > 1 + puts "We found more than one config with YJIT stats (#{filtered_stats_configs.inspect}) in this result set!" + @inactive = true + return + end + @stats_config = filtered_stats_configs.first + + @result_set = results + @only_benchmarks = benchmarks + + bench_yjit_stats = @result_set.yjit_stats_for_config_by_benchmark(@stats_config) + raise("Config #{@stats_config.inspect} collected no YJIT stats!") if bench_yjit_stats.nil? || bench_yjit_stats.values.all?(&:empty?) + + # Only run benchmarks if there is no list of "only run these" benchmarks, or if the benchmark name starts with one of the list elements + @benchmark_names = filter_benchmark_names(bench_yjit_stats.keys) + end + + # Pretend that all these listed benchmarks ran inside a single Ruby process. Combine their statistics, as though you were + # about to print an exit report. + def combined_stats_data_for_benchmarks(benchmark_names) + raise("Can't query stats for an inactive stats-based report!") if @inactive + + unless benchmark_names.all? { |benchmark_name| @benchmark_names.include?(benchmark_name) } + raise "No data found for benchmark #{benchmark_name.inspect}!" + end + + all_yjit_stats = @result_set.yjit_stats_for_config_by_benchmark(@stats_config) + relevant_stats = benchmark_names.flat_map { |benchmark_name| all_yjit_stats[benchmark_name] }.select { |data| !data.empty? } + + if relevant_stats.empty? + raise "No YJIT stats data found for benchmarks: #{benchmark_names.inspect}!" + end + + # For each key in the YJIT statistics, add up the value for that key in all datasets. Note: all_stats is a non-numeric key. + yjit_stats_keys = relevant_stats[0].keys - ["all_stats"] + yjit_data = {} + yjit_stats_keys.each do |stats_key| + # Unknown keys default to 0 + entries = relevant_stats.map { |dataset| dataset[stats_key] }.compact + if entries[0].is_a?(Integer) + yjit_data[stats_key] = entries.sum(0) + elsif entries[0].is_a?(Float) + yjit_data[stats_key] = entries.sum(0.0) + elsif entries[0].is_a?(Hash) && entries[0].empty? || entries[0].values[0].is_a?(Integer) + yjit_data[stats_key] = {} + sub_keys = entries.flat_map(&:keys).uniq + sub_keys.each do |sub_key| + yjit_data[stats_key][sub_key] = entries.sum(0) { |entry| entry[sub_key] } + end + else + raise "Failing for #{stats_key.inspect} with unknown entry type #{entries[0].class}!" + end + end + yjit_data + end + + def total_exit_count(stats, prefix: "exit_") + total = 0 + stats.each do |k,v| + total += v if k.start_with?(prefix) + end + total + end + + def exit_report_for_benchmarks(benchmarks) + # Bindings for use inside ERB report template + stats = combined_stats_data_for_benchmarks(benchmarks) + side_exits = total_exit_count(stats) + total_exits = side_exits + stats["leave_interp_return"] + + # Number of instructions that finish executing in YJIT + retired_in_yjit = (stats["exec_instruction"] || stats["yjit_insns_count"]) - side_exits + + # Average length of instruction sequences executed by YJIT + avg_len_in_yjit = retired_in_yjit.to_f / total_exits + + # Proportion of instructions that retire in YJIT + total_insns_count = retired_in_yjit + stats["vm_insns_count"] + yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count + + report_template = ERB.new File.read(__dir__ + "/../report_templates/yjit_stats_exit.erb") + report_template.result(binding) # Evaluate with the local variables right here + end + + def sorted_exit_counts(stats, prefix:, how_many: 20, left_pad: 4) + prefix_text = "" + + exits = [] + stats.each do |k, v| + if k.start_with?(prefix) + exits.push [k.to_s.delete_prefix(prefix), v] + end + end + + exits = exits.sort_by { |name, count| [-count, name] }[0...how_many] + side_exits = total_exit_count(stats) + + top_n_total = exits.map { |name, count| count }.sum + top_n_exit_pct = 100.0 * top_n_total / side_exits + + prefix_text = "Top-#{how_many} most frequent exit ops (#{"%.1f" % top_n_exit_pct}% of exits):\n" + + longest_insn_name_len = exits.map { |name, count| name.length }.max + prefix_text + exits.map do |name, count| + padding = longest_insn_name_len + left_pad + padded_name = "%#{padding}s" % name + padded_count = "%10d" % count + percent = 100.0 * count / side_exits + formatted_percent = "%.1f" % percent + "#{padded_name}: #{padded_count} (#{formatted_percent})" + end.join("\n") + end + + def counters_section(counters, prefix:, prompt:) + text = prompt + "\n" + + counters = counters.filter { |key, _| key.start_with?(prefix) } + counters.filter! { |_, value| value != 0 } + counters.transform_keys! { |key| key.to_s.delete_prefix(prefix) } + + if counters.empty? + text.concat(" (all relevant counters are zero)") + return text + end + + counters = counters.to_a + counters.sort_by! { |_, counter_value| counter_value } + longest_name_length = counters.max_by { |name, _| name.length }.first.length + total = counters.sum { |_, counter_value| counter_value } + + counters.reverse_each do |name, value| + percentage = value.to_f * 100 / total + text.concat(" %*s %10d (%4.1f%%)\n" % [longest_name_length, name, value, percentage]) + end + + text + end + end +end diff --git a/lib/yjit_metrics/result_set.rb b/lib/yjit_metrics/result_set.rb new file mode 100644 index 000000000..1ad410999 --- /dev/null +++ b/lib/yjit_metrics/result_set.rb @@ -0,0 +1,518 @@ +# frozen_string_literal: true + +require_relative "./stats" +require_relative "./theme" + +# Encapsulate multiple benchmark runs across multiple Ruby configurations. +# Do simple calculations, reporting and file I/O. +# +# Note that a JSON file with many results can be quite large. +# Normally it's appropriate to store raw data as multiple JSON files +# that contain one set of runs each. Large multi-Ruby datasets +# may not be practical to save as full raw data. +module YJITMetrics + class ResultSet + include YJITMetrics::Stats + + def initialize + @times = {} + @warmups = {} + @benchmark_metadata = {} + @ruby_metadata = {} + @yjit_stats = {} + @peak_mem = {} + @empty = true + end + + def empty? + @empty + end + + def config_names + @times.keys + end + + def platforms + @ruby_metadata.map { |config, hash| hash["platform"] }.uniq + end + + # "Fragments" are, in effect, a quick human-readable way to summarise a particular + # compile-time-plus-run-time Ruby configuration. Doing this in general would + # require serious AI, but we don't need it in general. We have a few specific + # cases we care about. + # + # Right now we're just checking the config name. It would be better, but harder, + # to actually verify the configuration from the config's Ruby metadata (and other + # metadata?) and make sure the config does what it's labelled as. + CONFIG_NAME_SPECIAL_CASE_FRAGMENTS = { + "prod_ruby_with_yjit" => "YJIT ", + "prev_ruby_yjit" => "YJIT ", + "prod_ruby_with_mjit" => "MJIT", + "ruby_30_with_mjit" => "MJIT-3.0", + "prod_ruby_no_jit" => "CRuby ", + "prev_ruby_no_jit" => "CRuby ", + "truffleruby" => "TruffleRuby", + "yjit_stats" => "YJIT Stats", + } + def table_of_configs_by_fragment(configs) + configs_by_fragment = {} + frag_by_length = CONFIG_NAME_SPECIAL_CASE_FRAGMENTS.keys.sort_by { |k| -k.length } # Sort longest-first + configs.each do |config| + longest_frag = frag_by_length.detect { |k| config.include?(k) } + unless longest_frag + raise "Trying to sort config #{config.inspect} by fragment, but no fragment matches!" + end + configs_by_fragment[longest_frag] ||= [] + configs_by_fragment[longest_frag] << config + end + configs_by_fragment + end + + # Add a table of configurations, distinguished by platform, compile-time config, runtime config and whatever + # else we can determine from config names and/or result data. Only include configurations for which we have + # results. Order by the req_configs order, if supplied, otherwise by order results were added in (internal + # hash table order.) + # NOTE: This is currently only used by variable_warmup_report which discards the actual human names + # (it gets used to select and order the configs). + def configs_with_human_names(req_configs = nil) + # Only use requested configs for which we have data + if req_configs + # Preserve req_configs order + c_n = config_names + only_configs = req_configs.select {|config| c_n.include?(config) } + else + only_configs = config_names() + end + + if only_configs.size == 0 + puts "No requested configurations have any data..." + puts "Requested configurations: #{req_configs.inspect} #{req_configs == nil ? "(nil means use all)" : ""}" + puts "Configs we have data for: #{@times.keys.inspect}" + raise("Can't generate human names table without any configurations!") + end + + configs_by_platform = {} + only_configs.each do |config| + config_platform = @ruby_metadata[config]["platform"] + configs_by_platform[config_platform] ||= [] + configs_by_platform[config_platform] << config + end + + # TODO: Get rid of this branch and the next and just use "human_name platform" consistently. + + # If each configuration only exists for a single platform, we'll use the platform names as human-readable names. + if configs_by_platform.values.map(&:size).max == 1 + out = {} + # Order output by req_config + req_configs.each do |config| + platform = configs_by_platform.detect { |platform, plat_configs| plat_configs.include?(config) } + out[platform] = config + end + return out + end + + # If all configurations are on the *same* platform, we'll use names like YJIT and MJIT and MJIT(3.0) + if configs_by_platform.size == 1 + # Sort list of configs by what fragments (Ruby version plus runtime config) they contain + by_fragment = table_of_configs_by_fragment(only_configs) + + # If no two configs have the same Ruby version plus runtime config, then that's how we'll name them. + frags_with_multiple_configs = by_fragment.keys.select { |frag| (by_fragment[frag] || []).length > 1 } + if frags_with_multiple_configs.empty? + out = {} + # Order by req_configs + req_configs.each do |config| + fragment = by_fragment.detect { |frag, configs| configs[0] == config }.first + human_name = insert_version_for_config(CONFIG_NAME_SPECIAL_CASE_FRAGMENTS[fragment], config) + out[human_name] = config + end + return out + end + + unsortable_configs = frags_with_multiple_configs.flat_map { |frag| by_fragment[frag] } + puts "Fragments with multiple configs: #{frags_with_multiple_configs.inspect}" + puts "Configs we can't sort by fragment: #{unsortable_configs.inspect}" + raise "We only have one platform, but we can't sort by fragment... Need finer distinctions!" + end + + # Okay. We have at least two platforms. Now things get stickier. + by_platform_and_fragment = {} + configs_by_platform.each do |platform, configs| + by_platform_and_fragment[platform] = table_of_configs_by_fragment(configs) + end + hard_to_name_configs = by_platform_and_fragment.values.flat_map(&:values).select { |configs| configs.size > 1 }.inject([], &:+).uniq + + # If no configuration shares *both* platform *and* fragment, we can name by platform and fragment. + if hard_to_name_configs.empty? + plat_frag_table = {} + by_platform_and_fragment.each do |platform, frag_table| + CONFIG_NAME_SPECIAL_CASE_FRAGMENTS.each do |fragment, human_name| + next unless frag_table[fragment] + + single_config = frag_table[fragment][0] + human_name = insert_version_for_config(human_name, single_config) + plat_frag_table[single_config] = "#{human_name} #{platform}" + end + end + + # Now reorder the table by req_configs + out = {} + req_configs.each do |config| + out[plat_frag_table[config]] = config + end + return out + end + + raise "Complicated case in configs_with_human_names! Hard to distinguish between: #{hard_to_name_configs.inspect}!" + end + + # These objects have absolutely enormous internal data, and we don't want it printed out with + # every exception. + def inspect + "YJITMetrics::ResultSet<#{object_id}>" + end + + # A ResultSet normally expects to see results with this structure: + # + # { + # "times" => { "benchname1" => [ 11.7, 14.5, 16.7, ... ], "benchname2" => [...], ... }, + # "benchmark_metadata" => { "benchname1" => {...}, "benchname2" => {...}, ... }, + # "ruby_metadata" => {...}, + # "yjit_stats" => { "benchname1" => [{...}, {...}...], "benchname2" => [{...}, {...}, ...] } + # } + # + # Note that this input structure doesn't represent runs (subgroups of iterations), + # such as when restarting the benchmark and doing, say, 10 groups of 300 + # iterations. To represent that, you would call this method 10 times, once per + # run. Runs will be kept separate internally, but by default are returned as a + # combined single array. + # + # Every benchmark run is assumed to come with a corresponding metadata hash + # and (optional) hash of YJIT stats. However, there should normally only + # be one set of Ruby metadata, not one per benchmark run. Ruby metadata is + # assumed to be constant for a specific compiled copy of Ruby over all runs. + def add_for_config(config_name, benchmark_results, normalize_bench_names: true) + if !benchmark_results.has_key?("version") + puts "No version entry in benchmark results - falling back to version 1 file format." + + benchmark_results["times"].keys.each do |benchmark_name| + # v1 JSON files are always single-run, so wrap them in a one-element array. + benchmark_results["times"][benchmark_name] = [ benchmark_results["times"][benchmark_name] ] + benchmark_results["warmups"][benchmark_name] = [ benchmark_results["warmups"][benchmark_name] ] + benchmark_results["yjit_stats"][benchmark_name] = [ benchmark_results["yjit_stats"][benchmark_name] ] + + # Various metadata is still in the same format for v2. + end + elsif benchmark_results["version"] != 2 + raise "Getting data from JSON in bad format!" + else + # JSON file is marked as version 2, so all's well. + end + + @empty = false + + @times[config_name] ||= {} + benchmark_results["times"].each do |benchmark_name, times| + benchmark_name = benchmark_name.sub(/.rb$/, "") if normalize_bench_names + @times[config_name][benchmark_name] ||= [] + @times[config_name][benchmark_name].concat(times) + end + + @warmups[config_name] ||= {} + (benchmark_results["warmups"] || {}).each do |benchmark_name, warmups| + benchmark_name = benchmark_name.sub(/.rb$/, "") if normalize_bench_names + @warmups[config_name][benchmark_name] ||= [] + @warmups[config_name][benchmark_name].concat(warmups) + end + + @yjit_stats[config_name] ||= {} + benchmark_results["yjit_stats"].each do |benchmark_name, stats_array| + next if stats_array.nil? + + stats_array.compact! + + next if stats_array.empty? + + benchmark_name = benchmark_name.sub(/.rb$/, "") if normalize_bench_names + @yjit_stats[config_name][benchmark_name] ||= [] + @yjit_stats[config_name][benchmark_name].concat(stats_array) + end + + @benchmark_metadata[config_name] ||= {} + benchmark_results["benchmark_metadata"].each do |benchmark_name, metadata_for_benchmark| + benchmark_name = benchmark_name.sub(/.rb$/, "") if normalize_bench_names + @benchmark_metadata[config_name][benchmark_name] ||= metadata_for_benchmark + if @benchmark_metadata[config_name][benchmark_name] != metadata_for_benchmark + # We don't print this warning only once because it's really bad, and because we'd like to show it for all + # relevant problem benchmarks. But mostly because it's really bad: don't combine benchmark runs with + # different settings into one result set. + $stderr.puts "WARNING: multiple benchmark runs of #{benchmark_name} in #{config_name} have different benchmark metadata!" + end + end + + @ruby_metadata[config_name] ||= benchmark_results["ruby_metadata"] + ruby_meta = @ruby_metadata[config_name] + if ruby_meta != benchmark_results["ruby_metadata"] && !@printed_ruby_metadata_warning + print "Ruby metadata is meant to *only* include information that should always be\n" + + " the same for the same Ruby executable. Please verify that you have not added\n" + + " inappropriate Ruby metadata or accidentally used the same name for two\n" + + " different Ruby executables. (Additional mismatches in this result set won't show warnings.)\n" + puts "Metadata 1: #{ruby_meta.inspect}" + puts "Metadata 2: #{benchmark_results["ruby_metadata"].inspect}" + @printed_ruby_metadata_warning = true + end + unless ruby_meta["arch"] + # Our harness didn't record arch until adding ARM64 support. If a collected data file doesn't set it, + # autodetect from RUBY_DESCRIPTION. We only check x86_64 since all older data should only be on x86_64, + # which was all we supported. + if ruby_meta["RUBY_DESCRIPTION"].include?("x86_64") + ruby_meta["arch"] = "x86_64-unknown" + else + raise "No arch provided in data file, and no x86_64 detected in RUBY_DESCRIPTION!" + end + end + recognized_platforms = YJITMetrics::PLATFORMS + ["arm64"] + ruby_meta["platform"] ||= recognized_platforms.detect { |platform| (ruby_meta["uname -a"] || "").downcase.include?(platform) } + ruby_meta["platform"] ||= recognized_platforms.detect { |platform| (ruby_meta["arch"] || "").downcase.include?(platform) } + + raise "Uknown platform" if !ruby_meta["platform"] + + ruby_meta["platform"] = ruby_meta["platform"].sub(/^arm(\d+)$/, 'aarch\1') + #@platform ||= ruby_meta["platform"] + + #if @platform != ruby_meta["platform"] + # raise "A single ResultSet may only contain data from one platform, not #{@platform.inspect} AND #{ruby_meta["platform"].inspect}!" + #end + + @full_run ||= benchmark_results["full_run"] + if @full_run != benchmark_results["full_run"] + warn "The 'full_run' data should not change within the same run!" + end + + @peak_mem[config_name] ||= {} + benchmark_results["peak_mem_bytes"].each do |benchmark_name, mem_bytes| + benchmark_name = benchmark_name.sub(/.rb$/, "") if normalize_bench_names + @peak_mem[config_name][benchmark_name] ||= [] + @peak_mem[config_name][benchmark_name].concat(mem_bytes) + end + end + + # This returns a hash-of-arrays by configuration name + # containing benchmark results (times) per + # benchmark for the specified config. + # + # If in_runs is specified, the array will contain + # arrays (runs) of samples. Otherwise all samples + # from all runs will be combined. + def times_for_config_by_benchmark(config, in_runs: false) + raise("No results for configuration: #{config.inspect}!") if !@times.has_key?(config) || @times[config].empty? + + return @times[config] if in_runs + + data = {} + @times[config].each do |benchmark_name, runs| + data[benchmark_name] = runs.inject([]) { |arr, piece| arr.concat(piece) } + end + data + end + + # This returns a hash-of-arrays by configuration name + # containing warmup results (times) per + # benchmark for the specified config. + # + # If in_runs is specified, the array will contain + # arrays (runs) of samples. Otherwise all samples + # from all runs will be combined. + def warmups_for_config_by_benchmark(config, in_runs: false) + return @warmups[config] if in_runs + data = {} + @warmups[config].each do |benchmark_name, runs| + data[benchmark_name] = runs.inject([]) { |arr, piece| arr.concat(piece) } + end + data + end + + # This returns a hash-of-arrays by config name + # containing YJIT statistics, if gathered, per + # benchmark run for the specified config. For configs + # that don't collect YJIT statistics, the array + # will be empty. + # + # If in_runs is specified, the array will contain + # arrays (runs) of samples. Otherwise all samples + # from all runs will be combined. + def yjit_stats_for_config_by_benchmark(config, in_runs: false) + return @yjit_stats[config] if in_runs + data = {} + @yjit_stats[config].each do |benchmark_name, runs| + data[benchmark_name] ||= [] + runs.each { |run| data[benchmark_name].concat(run) } + end + data + end + + def peak_mem_bytes_for_config_by_benchmark(config) + @peak_mem[config] + end + + # This returns a hash-of-hashes by config name + # containing per-benchmark metadata (parameters) per + # benchmark for the specified config. + def benchmark_metadata_for_config_by_benchmark(config) + @benchmark_metadata[config] + end + + # This returns a hash of metadata for the given config name + def metadata_for_config(config) + @ruby_metadata[config] + end + + def ruby_version_for_config(config) + return unless metadata = @ruby_metadata[config] + + if (match = metadata["RUBY_DESCRIPTION"]&.match(/^(?:ruby\s+)?([0-9.]+\S*)/)) + match[1] + else + metadata["RUBY_VERSION"] + end + end + + def full_run_info + @full_run + end + + def insert_version_for_config(str, config) + str.sub(//, ruby_version_for_config(config)) + end + + # What Ruby configurations does this ResultSet contain data for? + def available_configs + @ruby_metadata.keys + end + + def benchmarks + @benchmark_metadata.values.flat_map(&:keys).uniq + end + + # Sometimes you just want all the yjit_stats fields added up. + # + # This should return a hash-of-hashes where the top level key + # key is the benchmark name and each hash value is the combined stats + # for a single benchmark across whatever number of runs is present. + # + # This may not work as expected if you have full YJIT stats only + # sometimes for a given config - which normally should never be + # the case. + def combined_yjit_stats_for_config_by_benchmark(config) + data = {} + @yjit_stats[config].each do |benchmark_name, runs| + stats = {} + runs.map(&:flatten).map(&:first).each do |run| + raise "Internal error! #{run.class.name} is not a hash!" unless run.is_a?(Hash) + + stats["all_stats"] = run["all_stats"] if run["all_stats"] + (run.keys - ["all_stats"]).each do |key| + if run[key].is_a?(Integer) + stats[key] ||= 0 + stats[key] += run[key] + elsif run[key].is_a?(Float) + stats[key] ||= 0.0 + stats[key] += run[key] + elsif run[key].is_a?(Hash) + stats[key] ||= {} + run[key].each do |subkey, subval| + stats[key][subkey] ||= 0 + stats[key][subkey] += subval + end + else + raise "Unexpected stat type #{run[key].class}!" + end + end + end + data[benchmark_name] = stats + end + data + end + + # Summarize the data by config. If it's a YJIT config with full stats, get the highlights of the exit report too. + SUMMARY_STATS = [ + "inline_code_size", + "outlined_code_size", + #"exec_instruction", # exec_instruction changed name to yjit_insns_count -- only one of the two will be present in a dataset + "yjit_insns_count", + "vm_insns_count", + "compiled_iseq_count", + "leave_interp_return", + "compiled_block_count", + "invalidation_count", + "constant_state_bumps", + ] + def summary_by_config_and_benchmark + summary = {} + available_configs.each do |config| + summary[config] = {} + + times_by_bench = times_for_config_by_benchmark(config) + times_by_bench.each do |bench, results| + summary[config][bench] = { + "mean" => mean(results), + "stddev" => stddev(results), + "rel_stddev" => rel_stddev(results), + } + end + + mem_by_bench = peak_mem_bytes_for_config_by_benchmark(config) + times_by_bench.keys.each do |bench| + summary[config][bench]["peak_mem_bytes"] = mem_by_bench[bench] + end + + all_stats = combined_yjit_stats_for_config_by_benchmark(config) + all_stats.each do |bench, stats| + summary[config][bench]["yjit_stats"] = stats.slice(*SUMMARY_STATS) + summary[config][bench]["yjit_stats"]["yjit_insns_count"] ||= stats["exec_instruction"] + + # Do we have full YJIT stats? If so, let's add the relevant summary bits + if stats["all_stats"] + out_stats = summary[config][bench]["yjit_stats"] + out_stats["side_exits"] = stats.inject(0) { |total, (k, v)| total + (k.start_with?("exit_") ? v : 0) } + out_stats["total_exits"] = out_stats["side_exits"] + out_stats["leave_interp_return"] + out_stats["retired_in_yjit"] = (out_stats["exec_instruction"] || out_stats["yjit_insns_count"]) - out_stats["side_exits"] + out_stats["avg_len_in_yjit"] = out_stats["retired_in_yjit"].to_f / out_stats["total_exits"] + out_stats["total_insns_count"] = out_stats["retired_in_yjit"] + out_stats["vm_insns_count"] + out_stats["yjit_ratio_pct"] = 100.0 * out_stats["retired_in_yjit"] / out_stats["total_insns_count"] + end + end + end + summary + end + + # What Ruby configurations, if any, have full YJIT statistics available? + def configs_containing_full_yjit_stats + @yjit_stats.keys.select do |config_name| + stats = @yjit_stats[config_name] + + # Every benchmark gets a key/value pair in stats, and every + # value is an array of arrays -- each run gets an array, and + # each measurement in the run gets an array. + + # Even "non-stats" YJITs now have statistics, but not "full" statistics + + # If stats is nil or empty, this isn't a full-yjit-stats config + if stats.nil? || stats.empty? + false + else + # For each benchmark, grab its array of runs + vals = stats.values + + vals.all? { |run_values| } + end + + # Stats is a hash of the form { "30_ifelse" => [ { "all_stats" => true, "inline_code_size" => 5572282, ...}, {...} ], "30k_methods" => [ {}, {} ]} + # We want to make sure every run has an all_stats hash key. + !stats.nil? && + !stats.empty? && + !stats.values.all? { |val| val.nil? || val[0].nil? || val[0][0].nil? || val[0][0]["all_stats"].nil? } + end + end + end +end diff --git a/lib/yjit_metrics/stats.rb b/lib/yjit_metrics/stats.rb new file mode 100644 index 000000000..06fdfd154 --- /dev/null +++ b/lib/yjit_metrics/stats.rb @@ -0,0 +1,149 @@ +# frozen_string_literal: true +# Statistical methods +module YJITMetrics + module Stats + def sum(values) + return values.sum(0.0) + end + + def sum_or_nil(values) + return nil if values.nil? + sum(values) + end + + def mean(values) + return values.sum(0.0) / values.size + end + + def mean_or_nil(values) + return nil if values.nil? + mean(values) + end + + def geomean(values) + exponent = 1.0 / values.size + values.inject(1.0, &:*) ** exponent + end + + def geomean_or_nil(values) + return nil if values.nil? + geomean(values) + end + + def stddev(values) + return 0 if values.size <= 1 + + xbar = mean(values) + diff_sqrs = values.map { |v| (v-xbar)*(v-xbar) } + # Bessel's correction requires dividing by length - 1, not just length: + # https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation + variance = diff_sqrs.sum(0.0) / (values.length - 1) + return Math.sqrt(variance) + end + + def stddev_or_nil(values) + return nil if values.nil? + stddev(values) + end + + def rel_stddev(values) + stddev(values) / mean(values) + end + + def rel_stddev_or_nil(values) + return nil if values.nil? + rel_stddev(values) + end + + def rel_stddev_pct(values) + 100.0 * stddev(values) / mean(values) + end + + def rel_stddev_pct_or_nil(values) + return nil if values.nil? + rel_stddev_pct(values) + end + + # See https://en.wikipedia.org/wiki/Covariance#Definition and/or + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Covariance (two-pass algorithm) + def covariance(x, y) + raise "Trying to take the covariance of two different-sized arrays!" if x.size != y.size + + x_mean = mean(x) + y_mean = mean(y) + + cov = 0.0 + (0...(x.size)).each do |i| + cov += (x[i] - x_mean) * (y[i] - y_mean) / x.size + end + + cov + end + + # See https://en.wikipedia.org/wiki/Pearson_correlation_coefficient + # I'm not convinced this is correct. It definitely doesn't match the least-squares correlation coefficient below. + def pearson_correlation(x, y) + raise "Trying to take the Pearson correlation of two different-sized arrays!" if x.size != y.size + + ## Some random Ruby guy method + #xx_prod = x.map { |xi| xi * xi } + #yy_prod = y.map { |yi| yi * yi } + #xy_prod = (0...(x.size)).map { |i| x[i] * y[i] } + # + #x_sum = x.sum + #y_sum = y.sum + # + #num = xy_prod.sum - (x_sum * y_sum) / x.size + #den = Math.sqrt(xx_prod.sum - x_sum ** 2.0 / x.size) * (yy_prod.sum - y_sum ** 2.0 / x.size) + # + #num/den + + # Wikipedia translation of the definition + x_mean = mean(x) + y_mean = mean(y) + num = (0...(x.size)).map { |i| (x[i] - x_mean) * (y[i] - y_mean) }.sum + den = Math.sqrt((0...(x.size)).map { |i| (x[i] - x_mean) ** 2.0 }.sum) * + Math.sqrt((0...(x.size)).map { |i| (y[i] - y_mean) ** 2.0 }.sum) + num / den + end + + # See https://mathworld.wolfram.com/LeastSquaresFitting.html + def least_squares_slope_intercept_and_correlation(x, y) + raise "Trying to take the least-squares slope of two different-sized arrays!" if x.size != y.size + + x_mean = mean(x) + y_mean = mean(y) + + xx_sum_of_squares = x.map { |xi| (xi - x_mean)**2.0 }.sum + yy_sum_of_squares = y.map { |yi| (yi - y_mean)**2.0 }.sum + xy_sum_of_squares = (0...(x.size)).map { |i| (x[i] - x_mean) * (y[i] - y_mean) }.sum + + slope = xy_sum_of_squares / xx_sum_of_squares + intercept = y_mean - slope * x_mean + + r_squared = xy_sum_of_squares ** 2.0 / (xx_sum_of_squares * yy_sum_of_squares) + + [slope, intercept, r_squared] + end + + # code taken from https://github.com/clbustos/statsample/blob/master/lib/statsample/regression/simple.rb#L74 + # (StatSample Ruby gem, simple linear regression.) + def simple_regression_slope(x, y) + raise "Trying to take the least-squares slope of two different-sized arrays!" if x.size != y.size + + x_mean = mean(x) + y_mean = mean(y) + + num = den = 0.0 + (0...x.size).each do |i| + num += (x[i] - x_mean) * (y[i] - y_mean) + den += (x[i] - x_mean)**2.0 + end + + slope = num / den + #intercept = y_mean - slope * x_mean + + slope + end + end +end diff --git a/lib/yjit-metrics/theme.rb b/lib/yjit_metrics/theme.rb similarity index 100% rename from lib/yjit-metrics/theme.rb rename to lib/yjit_metrics/theme.rb diff --git a/lib/yjit_metrics/timeline_report.rb b/lib/yjit_metrics/timeline_report.rb new file mode 100644 index 000000000..c86eaf579 --- /dev/null +++ b/lib/yjit_metrics/timeline_report.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true +# Class for reports that use a longer series of times, each with its own report/data. +module YJITMetrics + class TimelineReport + # This is the Munin palette from Shutterstock Rickshaw + MUNIN_PALETTE = [ + '#00cc00', + '#0066b3', + '#ff8000', + '#ffcc00', + '#330099', + '#990099', + '#ccff00', + '#ff0000', + '#808080', + '#008f00', + '#00487d', + '#b35a00', + '#b38f00', + '#6b006b', + '#8fb300', + '#b30000', + '#bebebe', + '#80ff80', + '#80c9ff', + '#ffc080', + '#ffe680', + '#aa80ff', + '#ee00cc', + '#ff8080', + '#666600', + '#ffbfff', + '#00ffcc', + '#cc6699', + '#999900', + # If we add one colour we get 29 entries, it's not divisible by the number of platforms and won't get weird repeats + '#003399', + ] + + include YJITMetrics::Stats + + def self.subclasses + @subclasses ||= [] + @subclasses + end + + def self.inherited(subclass) + YJITMetrics::TimelineReport.subclasses.push(subclass) + end + + def self.report_name_hash + out = {} + + @subclasses.select { |s| s.respond_to?(:report_name) }.each do |subclass| + name = subclass.report_name + + raise "Duplicated report name: #{name.inspect}!" if out[name] + + out[name] = subclass + end + + out + end + + def initialize(context) + @context = context + end + + # Look for "PLATFORM_#{name}"; prefer specified platform if present. + def find_config(name, platform: "x86_64") + matches = @context[:configs].select { |c| c.end_with?(name) } + matches.detect { |c| c.start_with?(platform) } || matches.first + end + + # Strip PLATFORM from beginning of name + def platform_of_config(config) + YJITMetrics::PLATFORMS.each do |p| + return p if config.start_with?("#{p}_") + end + + raise "Unknown platform in config '#{config}'" + end + end +end diff --git a/lib/yjit_metrics/timeline_reports/blog_timeline_report.rb b/lib/yjit_metrics/timeline_reports/blog_timeline_report.rb new file mode 100644 index 000000000..fd1862add --- /dev/null +++ b/lib/yjit_metrics/timeline_reports/blog_timeline_report.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true +require_relative "../timeline_report" + +module YJITMetrics + class BlogTimelineReport < TimelineReport + def self.report_name + "blog_timeline" + end + + def self.report_extensions + [ "html", "recent.html" ] + end + + # These objects have *gigantic* internal state. For debuggability, don't print the whole thing. + def inspect + "BlogTimelineReport<#{object_id}>" + end + + NUM_RECENT=100 + def initialize(context) + super + + yjit_config_root = "prod_ruby_with_yjit" + + # This should match the JS parser in the template file + time_format = "%Y %m %d %H %M %S" + + @series = {} + YJITMetrics::PLATFORMS.each { |platform| @series[platform] = { :recent => [], :all_time => [] } } + + @context[:benchmark_order].each.with_index do |benchmark, idx| + color = MUNIN_PALETTE[idx % MUNIN_PALETTE.size] + YJITMetrics::PLATFORMS.each do |platform| + config = "#{platform}_#{yjit_config_root}" + points = @context[:timestamps].map do |ts| + this_point = @context[:summary_by_timestamp].dig(ts, config, benchmark) + if this_point + this_ruby_desc = @context[:ruby_desc_by_config_and_timestamp][config][ts] || "unknown" + # These fields are from the ResultSet summary + [ ts.strftime(time_format), this_point["mean"], this_point["stddev"], this_ruby_desc ] + else + nil + end + end + points.compact! + next if points.empty? + + visible = @context[:selected_benchmarks].include?(benchmark) + + s_all_time = { config: config, benchmark: benchmark, name: "#{yjit_config_root}-#{benchmark}", platform: platform, visible: visible, color: color, data: points } + s_recent = s_all_time.dup + s_recent[:data] = s_recent[:data].last(NUM_RECENT) + + @series[platform][:recent].push s_recent + @series[platform][:all_time].push s_all_time + end + end + end + + def write_files(out_dir) + [:recent, :all_time].each do |duration| + YJITMetrics::PLATFORMS.each do |platform| + begin + @data_series = @series[platform][duration] + + script_template = ERB.new File.read(__dir__ + "/../report_templates/blog_timeline_data_template.js.erb") + text = script_template.result(binding) + File.open("#{out_dir}/reports/timeline/blog_timeline.data.#{platform}.#{duration}.js", "w") { |f| f.write(text) } + rescue + puts "Error writing data file for #{platform} #{duration} data!" + raise + end + end + end + + script_template = ERB.new File.read(__dir__ + "/../report_templates/blog_timeline_d3_template.html.erb") + html_output = script_template.result(binding) # Evaluate an Erb template with template_settings + File.open("#{out_dir}/_includes/reports/blog_timeline.html", "w") { |f| f.write(html_output) } + end + end +end diff --git a/lib/yjit_metrics/timeline_reports/memory_timeline_report.rb b/lib/yjit_metrics/timeline_reports/memory_timeline_report.rb new file mode 100644 index 000000000..2b8baecbb --- /dev/null +++ b/lib/yjit_metrics/timeline_reports/memory_timeline_report.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true +require_relative "../timeline_report" + +module YJITMetrics + class MemoryTimelineReport < TimelineReport + def self.report_name + "memory_timeline" + end + + def self.report_extensions + [ "html", "recent.html" ] + end + + # These objects have *gigantic* internal state. For debuggability, don't print the whole thing. + def inspect + "MemoryTimelineReport<#{object_id}>" + end + + CONFIG_NAMES = { + "no-jit" => "prod_ruby_no_jit", + "yjit" => "prod_ruby_with_yjit", + } + CONFIG_ROOTS = CONFIG_NAMES.values + NUM_RECENT = 100 + + def initialize(context) + super + + ruby_config_roots = CONFIG_NAMES.values + + # This should match the JS parser in the template file + time_format = "%Y %m %d %H %M %S" + + @series = {} + YJITMetrics::PLATFORMS.each { |platform| @series[platform] = { :recent => [], :all_time => [] } } + + color_idx = 0 + @context[:benchmark_order].each.with_index do |benchmark, idx| + CONFIG_NAMES.each do |config_human_name, config_root| + color = MUNIN_PALETTE[color_idx % MUNIN_PALETTE.size] + color_idx += 1 + + YJITMetrics::PLATFORMS.each do |platform| + config = "#{platform}_#{config_root}" + points = @context[:timestamps].map do |ts| + this_point = @context[:summary_by_timestamp].dig(ts, config, benchmark) + if this_point + this_ruby_desc = @context[:ruby_desc_by_config_and_timestamp][config][ts] || "unknown" + # These fields are from the ResultSet summary - peak_mem_bytes is an array because multiple runs are possible + [ ts.strftime(time_format), this_point["peak_mem_bytes"].max, this_ruby_desc ] + else + nil + end + end + points.compact! + next if points.empty? + + visible = @context[:selected_benchmarks].include?(benchmark) + + s_all_time = { config: config, config_human_name: config_human_name, benchmark: benchmark, name: "#{config_root}-#{benchmark}", platform: platform, visible: visible, color: color, data: points } + s_recent = s_all_time.dup + s_recent[:data] = s_recent[:data].last(NUM_RECENT) + + @series[platform][:recent].push s_recent + @series[platform][:all_time].push s_all_time + end + end + end + end + + def write_files(out_dir) + [:recent, :all_time].each do |duration| + YJITMetrics::PLATFORMS.each do |platform| + begin + @data_series = @series[platform][duration].select { |s| CONFIG_ROOTS.any? { |config_root| s[:config].include?(config_root) } } + + script_template = ERB.new File.read(__dir__ + "/../report_templates/memory_timeline_data_template.js.erb") + text = script_template.result(binding) + File.open("#{out_dir}/reports/timeline/memory_timeline.data.#{platform}.#{duration}.js", "w") { |f| f.write(text) } + rescue + puts "Error writing data file for #{platform} #{duration} data!" + raise + end + end + end + + script_template = ERB.new File.read(__dir__ + "/../report_templates/memory_timeline_d3_template.html.erb") + html_output = script_template.result(binding) # Evaluate an Erb template with template_settings + File.open("#{out_dir}/_includes/reports/memory_timeline.html", "w") { |f| f.write(html_output) } + end + end +end diff --git a/lib/yjit_metrics/timeline_reports/mini_timelines_report.rb b/lib/yjit_metrics/timeline_reports/mini_timelines_report.rb new file mode 100644 index 000000000..e69582850 --- /dev/null +++ b/lib/yjit_metrics/timeline_reports/mini_timelines_report.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true +require_relative "../timeline_report" + +module YJITMetrics + class MiniTimelinesReport < TimelineReport + def self.report_name + "mini_timelines" + end + + # These objects have *gigantic* internal state. For debuggability, don't print the whole thing. + def inspect + "MiniTimelinesReport<#{object_id}>" + end + + def initialize(context) + super + + config = find_config("prod_ruby_with_yjit") + platform = platform_of_config(config) + + # This should match the JS parser in the template file + time_format = "%Y %m %d %H %M %S" + + @series = [] + + @context[:selected_benchmarks].each do |benchmark| + points = @context[:timestamps].map do |ts| + this_point = @context[:summary_by_timestamp].dig(ts, config, benchmark) + if this_point + this_ruby_desc = @context[:ruby_desc_by_config_and_timestamp][config][ts] || "unknown" + # These fields are from the ResultSet summary + [ ts.strftime(time_format), this_point["mean"], this_ruby_desc ] + else + nil + end + end + points.compact! + next if points.empty? + + @series.push({ config: config, benchmark: benchmark, name: "#{config}-#{benchmark}", platform: platform, data: points }) + end + + #@series.sort_by! { |s| s[:name] } + end + + def write_files(out_dir) + script_template = ERB.new File.read(__dir__ + "/../report_templates/mini_timeline_d3_template.html.erb") + html_output = script_template.result(binding) # Evaluate an Erb template with template_settings + File.open("#{out_dir}/_includes/reports/mini_timelines.html", "w") { |f| f.write(html_output) } + end + end +end diff --git a/lib/yjit_metrics/timeline_reports/yjit_speedup_timeline_report.rb b/lib/yjit_metrics/timeline_reports/yjit_speedup_timeline_report.rb new file mode 100644 index 000000000..3e88913e5 --- /dev/null +++ b/lib/yjit_metrics/timeline_reports/yjit_speedup_timeline_report.rb @@ -0,0 +1,173 @@ +# frozen_string_literal: true +require_relative "../timeline_report" + +module YJITMetrics + class YJITSpeedupTimelineReport < TimelineReport + def self.report_name + "yjit_stats_timeline" + end + + # These objects have *gigantic* internal state. For debuggability, don't print the whole thing. + def inspect + "YJITSpeedupTimelineReport<#{object_id}>" + end + + NUM_RECENT=100 + def initialize(context) + super + + yjit_config_root = "prod_ruby_with_yjit" + stats_config_root = "yjit_stats" + no_jit_config_root = "prod_ruby_no_jit" + x86_stats_config = "x86_64_#{stats_config_root}" + + # This should match the JS parser in the template file + time_format = "%Y %m %d %H %M %S" + + @series = {} + YJITMetrics::PLATFORMS.each { |platform| @series[platform] = { :recent => [], :all_time => [] } } + + @context[:benchmark_order].each do |benchmark| + YJITMetrics::PLATFORMS.each do |platform| + yjit_config = "#{platform}_#{yjit_config_root}" + stats_config = "#{platform}_#{stats_config_root}" + no_jit_config = "#{platform}_#{no_jit_config_root}" + points = @context[:timestamps_with_stats].map do |ts| + this_point_yjit = @context[:summary_by_timestamp].dig(ts, yjit_config, benchmark) + this_point_cruby = @context[:summary_by_timestamp].dig(ts, no_jit_config, benchmark) + # If no same-platform stats, fall back to x86_64 stats if available + this_point_stats = @context[:summary_by_timestamp].dig(ts, stats_config, benchmark) || + @context[:summary_by_timestamp].dig(ts, x86_stats_config, benchmark) + if this_point_yjit && this_point_stats + this_ruby_desc = @context[:ruby_desc_by_config_and_timestamp][yjit_config][ts] || "unknown" + # These fields are from the ResultSet summary + out = { + time: ts.strftime(time_format), + yjit_speedup: this_point_cruby["mean"] / this_point_yjit["mean"], + ratio_in_yjit: this_point_stats["yjit_stats"]["yjit_ratio_pct"], + side_exits: this_point_stats["yjit_stats"]["side_exits"], + invalidation_count: this_point_stats["yjit_stats"]["invalidation_count"] || 0, + ruby_desc: this_ruby_desc, + } + if out[:ratio_in_yjit].nil? || out[:side_exits].nil? || out[:invalidation_count].nil? + puts "Problem location: Benchmark #{benchmark.inspect} platform #{platform.inspect} timestamp #{ts.inspect}" + puts "Stats config(s): #{stats_config.inspect} / #{x86_stats_config.inspect}" + puts "Bad output sample: #{out.inspect}" + puts "Stats array: #{this_point_stats["yjit_stats"]}" + raise("Found point with nil as summary!") + end + out + else + nil + end + end + points.compact! + next if points.empty? + + visible = @context[:selected_benchmarks].include?(benchmark) + + s = { config: yjit_config, benchmark: benchmark, name: "#{yjit_config_root}-#{benchmark}", platform: platform, visible: visible, data: points } + s_recent = { config: yjit_config, benchmark: benchmark, name: "#{yjit_config_root}-#{benchmark}", platform: platform, visible: visible, data: points.last(NUM_RECENT) } + @series[platform][:all_time].push s + @series[platform][:recent].push s_recent + end + end + + # Grab the stats fields from the first stats point (for the first platform that has data). + @stats_fields = @series.values.reject { |v| v[:all_time].empty? }[0][:all_time][0][:data][0].keys - [:time, :ruby_desc] + + # Calculate overall yjit speedup, yjit ratio, etc. over all benchmarks per-platform + YJITMetrics::PLATFORMS.each do |platform| + yjit_config = "#{platform}_#{yjit_config_root}" + # No Ruby desc for this? If so, that means no results for this platform + next unless @context[:ruby_desc_by_config_and_timestamp][yjit_config] + + data_mean = [] + data_geomean = [] + @context[:timestamps_with_stats].map.with_index do |ts, t_idx| + # No Ruby desc for this platform/timestamp combo? If so, that means no results for this platform and timestamp. + next unless @context[:ruby_desc_by_config_and_timestamp][yjit_config][ts] + + ruby_desc = @context[:ruby_desc_by_config_and_timestamp][yjit_config][ts] || "unknown" + point_mean = { + time: ts.strftime(time_format), + ruby_desc: ruby_desc, + } + point_geomean = point_mean.dup + @stats_fields.each do |field| + begin + points = @context[:benchmark_order].map.with_index do |bench, b_idx| + t_str = ts.strftime(time_format) + this_bench_data = @series[platform][:all_time][b_idx] + if this_bench_data + t_in_series = this_bench_data[:data].detect { |point_info| point_info[:time] == t_str } + t_in_series ? t_in_series[field] : nil + else + nil + end + end + rescue + STDERR.puts "Error in yjit_stats_timeline calculating field #{field} for TS #{ts.inspect} for all #{platform} benchmarks" + raise + end + points.compact! + raise("No data points for stat #{field.inspect} for TS #{ts.inspect}") if points.empty? + point_mean[field] = mean(points) + point_geomean[field] = geomean(points) + end + + data_mean.push(point_mean) + data_geomean.push(point_geomean) + end + overall_mean = { config: yjit_config, benchmark: "overall-mean", name: "#{yjit_config_root}-overall-mean", platform: platform, visible: true, data: data_mean } + overall_geomean = { config: yjit_config, benchmark: "overall-geomean", name: "#{yjit_config_root}-overall-geomean", platform: platform, visible: true, data: data_geomean } + overall_mean_recent = { config: yjit_config, benchmark: "overall-mean", name: "#{yjit_config_root}-overall-mean", platform: platform, visible: true, data: data_mean.last(NUM_RECENT) } + overall_geomean_recent = { config: yjit_config, benchmark: "overall-geomean", name: "#{yjit_config_root}-overall-geomean", platform: platform, visible: true, data: data_geomean.last(NUM_RECENT) } + + @series[platform][:all_time].prepend overall_geomean + @series[platform][:all_time].prepend overall_mean + @series[platform][:recent].prepend overall_geomean_recent + @series[platform][:recent].prepend overall_mean_recent + end + + # Recent and all-time series have different numbers of benchmarks. To keep everybody in sync, we set + # the colours here in Ruby and pass them through. + color_by_benchmark = {} + (["overall-mean", "overall-geomean"] + @context[:benchmark_order]).each.with_index do |bench, idx| + color_by_benchmark[bench] = MUNIN_PALETTE[idx % MUNIN_PALETTE.size] + end + @series.each do |platform, hash| + hash.each do |duration, all_series| + all_series.each.with_index do |series, idx| + series[:color] = color_by_benchmark[series[:benchmark]] + if series[:color].nil? + raise "Error for #{platform} #{duration} w/ bench #{series[:benchmark].inspect}!" + end + end + end + end + end + + def write_files(out_dir) + [:recent, :all_time].each do |duration| + YJITMetrics::PLATFORMS.each do |platform| + begin + @data_series = @series[platform][duration] + + script_template = ERB.new File.read(__dir__ + "/../report_templates/yjit_stats_timeline_data_template.js.erb") + text = script_template.result(binding) + File.open("#{out_dir}/reports/timeline/yjit_stats_timeline.data.#{platform}.#{duration}.js", "w") { |f| f.write(text) } + rescue + puts "Error writing data file for #{platform} #{duration} data!" + raise + end + end + end + + script_template = ERB.new File.read(__dir__ + "/../report_templates/yjit_stats_timeline_d3_template.html.erb") + #File.write("/tmp/erb_template.txt", script_template.src) + html_output = script_template.result(binding) # Evaluate an Erb template with template_settings + File.open("#{out_dir}/_includes/reports/yjit_stats_timeline.html", "w") { |f| f.write(html_output) } + end + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index e3c906ef2..c33a44dc2 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,4 +1,3 @@ require "minitest/autorun" -require_relative "../lib/yjit-metrics" -require_relative "../lib/yjit-metrics/bench-results" +require_relative "../lib/yjit_metrics" diff --git a/timeline_report.rb b/timeline_report.rb index 87d26e817..e2f34e634 100755 --- a/timeline_report.rb +++ b/timeline_report.rb @@ -2,9 +2,7 @@ require "json" require "optparse" -require_relative "lib/yjit-metrics" - -require_relative "lib/yjit-metrics/timeline_report_types/all_timeline_reports_lib" +require_relative "lib/yjit_metrics" report_class_by_name = YJITMetrics::TimelineReport.report_name_hash # By sorting, we make sure that the first report name that returns true from .start_with? is the "real" match.