Use correct formula for standard deviation of a sample

Finally learned why that's the case and it's called Bessel's Correction https://en.wikipedia.org/wiki/Bessel's_correction
bencheeorg · Mar 10, 2018 · 4097141 · 4097141
1 parent 2e4141c
commit 4097141
Show file tree

Hide file tree

Showing 4 changed files with 81 additions and 37 deletions.
diff --git a/.tool-versions b/.tool-versions
@@ -1,2 +1,2 @@
-elixir 1.6.0
+elixir 1.6.3
 erlang 20.2
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,9 @@ everything will run without error before running the full set of benchmarks.
 benchmarks with each input before running the actual suite. This should save
 time while actually writing the code for your benchmarks.
 
+### Bugfixes (User Facing)
+* Standard Deviation is now calculated correctly for being a sample of the population (divided by `n - 1` and not just `n`)
+
 ## 0.12.1 (2018-02-26)
 
 ### Bugfixes (User Facing)

diff --git a/lib/benchee/statistics.ex b/lib/benchee/statistics.ex
@@ -101,8 +101,8 @@ defmodule Benchee.Statistics do
       iex> scenarios = [
       ...>   %Benchee.Benchmark.Scenario{
       ...>     job_name: "My Job",
-      ...>     run_times: [200, 400, 400, 400, 500, 500, 700, 900],
-      ...>     memory_usages: [200, 400, 400, 400, 500, 500, 700, 900],
+      ...>     run_times: [200, 400, 400, 400, 500, 500, 500, 700, 900],
+      ...>     memory_usages: [200, 400, 400, 400, 500, 500, 500, 700, 900],
       ...>     input_name: "Input",
       ...>     input: "Input"
       ...>   }
@@ -114,8 +114,8 @@ defmodule Benchee.Statistics do
         scenarios: [
           %Benchee.Benchmark.Scenario{
             job_name: "My Job",
-            run_times: [200, 400, 400, 400, 500, 500, 700, 900],
-            memory_usages: [200, 400, 400, 400, 500, 500, 700, 900],
+            run_times: [200, 400, 400, 400, 500, 500, 500, 700, 900],
+            memory_usages: [200, 400, 400, 400, 500, 500, 500, 700, 900],
             input_name: "Input",
             input: "Input",
             run_time_statistics: %Benchee.Statistics{
@@ -124,25 +124,25 @@ defmodule Benchee.Statistics do
               std_dev:       200.0,
               std_dev_ratio: 0.4,
               std_dev_ips:   800.0,
-              median:        450.0,
-              percentiles:   %{50 => 450.0, 99 => 900.0},
-              mode:          400,
+              median:        500.0,
+              percentiles:   %{50 => 500.0, 99 => 900.0},
+              mode:          [500, 400],
               minimum:       200,
               maximum:       900,
-              sample_size:   8
+              sample_size:   9
             },
             memory_usage_statistics: %Benchee.Statistics{
               average:       500.0,
               ips:           2000.0,
               std_dev:       200.0,
               std_dev_ratio: 0.4,
               std_dev_ips:   800.0,
-              median:        450.0,
-              percentiles:   %{50 => 450.0, 99 => 900.0},
-              mode:          400,
+              median:        500.0,
+              percentiles:   %{50 => 500.0, 99 => 900.0},
+              mode:          [500, 400],
               minimum:       200,
               maximum:       900,
-              sample_size:   8
+              sample_size:   9
             }
           }
         ],
@@ -173,26 +173,56 @@ defmodule Benchee.Statistics do
 
   ## Examples
 
-      iex> run_times = [200, 400, 400, 400, 500, 500, 700, 900]
+      iex> run_times = [200, 400, 400, 400, 500, 500, 500, 700, 900]
       iex> Benchee.Statistics.job_statistics(run_times)
       %Benchee.Statistics{
         average:       500.0,
         ips:           2000.0,
         std_dev:       200.0,
         std_dev_ratio: 0.4,
         std_dev_ips:   800.0,
-        median:        450.0,
-        percentiles:   %{50 => 450.0, 99 => 900.0},
-        mode:          400,
+        median:        500.0,
+        percentiles:   %{50 => 500.0, 99 => 900.0},
+        mode:          [500, 400],
         minimum:       200,
         maximum:       900,
-        sample_size:   8
+        sample_size:   9
+      }
+
+      iex> Benchee.Statistics.job_statistics([100])
+      %Benchee.Statistics{
+        average:       100.0,
+        ips:           10_000.0,
+        std_dev:       0,
+        std_dev_ratio: 0.0,
+        std_dev_ips:   0.0,
+        median:        100.0,
+        percentiles:   %{50 => 100.0, 99 => 100.0},
+        mode:          nil,
+        minimum:       100,
+        maximum:       100,
+        sample_size:   1
+      }
+
+      iex> Benchee.Statistics.job_statistics([])
+      %Benchee.Statistics{
+        average:       nil,
+        ips:           nil,
+        std_dev:       nil,
+        std_dev_ratio: nil,
+        std_dev_ips:   nil,
+        median:        nil,
+        percentiles:   nil,
+        mode:          nil,
+        minimum:       nil,
+        maximum:       nil,
+        sample_size:   0
       }
 
   """
   @spec job_statistics(samples) :: __MODULE__.t()
   def job_statistics([]) do
-    %__MODULE__{}
+    %__MODULE__{sample_size: 0}
   end
 
   def job_statistics(run_times) do
@@ -234,8 +264,8 @@ defmodule Benchee.Statistics do
   iex> scenarios = [
   ...>   %Benchee.Benchmark.Scenario{
   ...>     job_name: "My Job",
-  ...>     run_times: [200, 400, 400, 400, 500, 500, 700, 900],
-  ...>     memory_usages: [200, 400, 400, 400, 500, 500, 700, 900],
+  ...>     run_times: [200, 400, 400, 400, 500, 500, 500, 700, 900],
+  ...>     memory_usages: [200, 400, 400, 400, 500, 500, 500, 700, 900],
   ...>     input_name: "Input",
   ...>     input: "Input"
   ...>   }
@@ -248,8 +278,8 @@ defmodule Benchee.Statistics do
     scenarios: [
       %Benchee.Benchmark.Scenario{
         job_name: "My Job",
-        run_times: [200, 400, 400, 400, 500, 500, 700, 900],
-        memory_usages: [200, 400, 400, 400, 500, 500, 700, 900],
+        run_times: [200, 400, 400, 400, 500, 500, 500, 700, 900],
+        memory_usages: [200, 400, 400, 400, 500, 500, 500, 700, 900],
         input_name: "Input",
         input: "Input",
         run_time_statistics: %Benchee.Statistics{
@@ -258,25 +288,25 @@ defmodule Benchee.Statistics do
           std_dev:       200.0,
           std_dev_ratio: 0.4,
           std_dev_ips:   800.0,
-          median:        450.0,
-          percentiles:   %{25 => 400.0, 50 => 450.0, 75 => 650.0, 99 => 900.0},
-          mode:          400,
+          median:        500.0,
+          percentiles:   %{25 => 400.0, 50 => 500.0, 75 => 600.0, 99 => 900.0},
+          mode:          [500, 400],
           minimum:       200,
           maximum:       900,
-          sample_size:   8
+          sample_size:   9
         },
         memory_usage_statistics: %Benchee.Statistics{
           average:       500.0,
           ips:           2000.0,
           std_dev:       200.0,
           std_dev_ratio: 0.4,
           std_dev_ips:   800.0,
-          median:        450.0,
-          percentiles:   %{50 => 450.0, 99 => 900.0},
-          mode:          400,
+          median:        500.0,
+          percentiles:   %{50 => 500.0, 99 => 900.0},
+          mode:          [500, 400],
           minimum:       200,
           maximum:       900,
-          sample_size:   8
+          sample_size:   9
         }
       }
     ]
@@ -298,13 +328,14 @@ defmodule Benchee.Statistics do
     Duration.microseconds({1, :second}) / average_microseconds
   end
 
-  defp standard_deviation(samples, average, iterations) do
+  defp standard_deviation(_samples, _average, 1), do: 0
+  defp standard_deviation(samples, average, sample_size) do
     total_variance =
       Enum.reduce(samples, 0, fn sample, total ->
         total + :math.pow(sample - average, 2)
       end)
 
-    variance = total_variance / iterations
+    variance = total_variance / (sample_size - 1)
     :math.sqrt(variance)
   end
 end
diff --git a/test/benchee/statistics_test.exs b/test/benchee/statistics_test.exs
@@ -47,6 +47,16 @@ defmodule Benchee.StatistcsTest do
       assert stats.mode == 55
     end
 
+    @standard_deviation_sample [600, 470, 170, 430, 300]
+    test "statistical standard deviation is calculated correctly" do
+      scenarios = [%Scenario{run_times: @standard_deviation_sample, memory_usages: @standard_deviation_sample}]
+      suite = Statistics.statistics(%Suite{scenarios: scenarios, configuration: %{measure_memory: false}})
+
+      [%Scenario{run_time_statistics: stats}] = suite.scenarios
+      assert_in_delta stats.std_dev, 164.7, 0.1
+      assert_in_delta stats.std_dev_ratio, 0.41, 0.01
+    end
+
     test "preserves all other keys in the map handed to it" do
       suite = %Suite{
         scenarios: [],
@@ -67,8 +77,8 @@ defmodule Benchee.StatistcsTest do
 
     defp sample_1_asserts(stats) do
       assert stats.average == 394.0
-      assert_in_delta stats.std_dev, 147.32, 0.01
-      assert_in_delta stats.std_dev_ratio, 0.37, 0.01
+      assert_in_delta stats.std_dev, 164.71, 0.01
+      assert_in_delta stats.std_dev_ratio, 0.41, 0.01
       assert_in_delta stats.ips, 2538, 1
       assert stats.median == 430.0
       assert stats.minimum == 170
@@ -79,8 +89,8 @@ defmodule Benchee.StatistcsTest do
 
     defp sample_2_asserts(stats) do
       assert stats.average == 14.0
-      assert_in_delta stats.std_dev, 5.25, 0.01
-      assert_in_delta stats.std_dev_ratio, 0.37, 0.01
+      assert_in_delta stats.std_dev, 5.76, 0.01
+      assert_in_delta stats.std_dev_ratio, 0.41, 0.01
       assert_in_delta stats.ips, 71428, 1
       assert stats.median == 14.0
       assert stats.minimum == 7