From d5fb106adad7d07d249c3ff7fda31d89c2fd5c13 Mon Sep 17 00:00:00 2001 From: Ryan Dearing Date: Wed, 1 Apr 2020 18:47:59 -0600 Subject: [PATCH 01/11] add a sliding window where entries slide off after a period of time --- lib/semian/time_sliding_window.rb | 88 +++++++++++++++++++++++++++++++ test/time_sliding_window_test.rb | 64 ++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 lib/semian/time_sliding_window.rb create mode 100644 test/time_sliding_window_test.rb diff --git a/lib/semian/time_sliding_window.rb b/lib/semian/time_sliding_window.rb new file mode 100644 index 00000000..3f915480 --- /dev/null +++ b/lib/semian/time_sliding_window.rb @@ -0,0 +1,88 @@ +require 'thread' + +module Semian + module Simple + + class TimeSlidingWindow #:nodoc: + extend Forwardable + + def_delegators :@window, :size, :empty?, :length + attr_reader :time_window_millis + + Pair = Struct.new(:head, :tail) + + # A sliding window is a structure that stores the most recent entries that were pushed within the last slice of time + def initialize(time_window) + @time_window_millis = time_window * 1000 + @window = [] + end + + def count(arg) + vals = @window.map { |pair| pair.tail} + vals.count(arg) + end + + def push(value) + remove_old # make room + @window << Pair.new(current_time, value) + self + end + + alias_method :<<, :push + + def clear + @window.clear + self + end + + def last + @window.last&.tail + end + + def remove_old + midtime = current_time - time_window_millis + # special case, everything is too old + @window.clear if !@window.empty? and @window.last.head < midtime + # otherwise we find the index position where the cutoff is + idx = (0...@window.size).bsearch { |n| @window[n].head >= midtime } + @window.slice!(0, idx) if idx + end + + alias_method :destroy, :clear + + private + + def current_time + Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond) + end + end + end + + module ThreadSafe + class TimeSlidingWindow < Simple::TimeSlidingWindow + def initialize(*) + super + @lock = Mutex.new + end + + # #size, #last, and #clear are not wrapped in a mutex. For the first two, + # the worst-case is a thread-switch at a timing where they'd receive an + # out-of-date value--which could happen with a mutex as well. + # + # As for clear, it's an all or nothing operation. Doesn't matter if we + # have the lock or not. + + def count(*) + @lock.synchronize { super } + end + + def remove_old + @lock.synchronize { super } + end + + def push(*) + @lock.synchronize { super } + end + end + end +end diff --git a/test/time_sliding_window_test.rb b/test/time_sliding_window_test.rb new file mode 100644 index 00000000..dcfec14b --- /dev/null +++ b/test/time_sliding_window_test.rb @@ -0,0 +1,64 @@ +require 'test_helper' + +class TestTimeSlidingWindow < Minitest::Test + def setup + @sliding_window = ::Semian::ThreadSafe::TimeSlidingWindow.new(0.5) # Timecop doesn't work with a monotonic clock + @sliding_window.clear + end + + def teardown + @sliding_window.destroy + end + + def test_sliding_window_push + assert_equal(0, @sliding_window.size) + @sliding_window << 1 + assert_sliding_window(@sliding_window, [1], 500) + @sliding_window << 5 + assert_sliding_window(@sliding_window, [1, 5], 500) + end + + def test_special_everything_too_old + @sliding_window << 0 << 1 + sleep(0.501) + assert_sliding_window(@sliding_window, [], 500) + end + + + def test_sliding_window_edge_falloff + assert_equal(0, @sliding_window.size) + @sliding_window << 0 << 1 << 2 << 3 << 4 << 5 << 6 << 7 + assert_sliding_window(@sliding_window, [0, 1, 2, 3, 4, 5, 6, 7], 500) + sleep(0.251) + @sliding_window << 8 << 9 << 10 + sleep(0.251) + assert_sliding_window(@sliding_window, [8, 9, 10], 500) + @sliding_window << 11 + sleep(0.251) + assert_sliding_window(@sliding_window, [11], 500) + end + + def test_sliding_window_count + @sliding_window << true << false << true << false << true << true << true + assert_equal(5, @sliding_window.count(true)) + assert_equal(2, @sliding_window.count(false)) + end + + def test_issue + @window = @sliding_window.instance_variable_get("@window") + @window << ::Semian::ThreadSafe::TimeSlidingWindow::Pair.new(338019700.707, true) + @window << ::Semian::ThreadSafe::TimeSlidingWindow::Pair.new(338019701.707, true) + @sliding_window << false + puts('break') + end + + private + + def assert_sliding_window(sliding_window, array, time_window_millis) + # Get private member, the sliding_window doesn't expose the entire array + sliding_window.remove_old + data = sliding_window.instance_variable_get("@window").map { |pair| pair.tail } + assert_equal(array, data) + assert_equal(time_window_millis, sliding_window.time_window_millis) + end +end From bad8a75aab050198f7659e21a03f7b7954d3fe0f Mon Sep 17 00:00:00 2001 From: Ryan Dearing Date: Wed, 1 Apr 2020 18:52:05 -0600 Subject: [PATCH 02/11] circuit breaker that opens based on error rate (% errors in time window) --- lib/semian.rb | 20 ++ lib/semian/error_rate_circuit_breaker.rb | 167 +++++++++++ test/error_rate_circuit_breaker_test.rb | 342 +++++++++++++++++++++++ test/semian_test.rb | 18 ++ 4 files changed, 547 insertions(+) create mode 100644 lib/semian/error_rate_circuit_breaker.rb create mode 100644 test/error_rate_circuit_breaker_test.rb diff --git a/lib/semian.rb b/lib/semian.rb index 7cdd504f..084a624c 100644 --- a/lib/semian.rb +++ b/lib/semian.rb @@ -8,9 +8,11 @@ require 'semian/platform' require 'semian/resource' require 'semian/circuit_breaker' +require 'semian/error_rate_circuit_breaker' require 'semian/protected_resource' require 'semian/unprotected_resource' require 'semian/simple_sliding_window' +require 'semian/time_sliding_window' require 'semian/simple_integer' require 'semian/simple_state' require 'semian/lru_hash' @@ -245,9 +247,27 @@ def thread_safe=(thread_safe) private + def create_error_rate_circuit_breaker(name, **options) + require_keys!([:success_threshold, :error_percent_threshold, :error_timeout, + :request_volume_threshold, :window_size], options) + + exceptions = options[:exceptions] || [] + ErrorRateCircuitBreaker.new(name, + success_threshold: options[:success_threshold], + error_percent_threshold: options[:error_percent_threshold], + error_timeout: options[:error_timeout], + exceptions: Array(exceptions) + [::Semian::BaseError], + half_open_resource_timeout: options[:half_open_resource_timeout], + request_volume_threshold: options[:request_volume_threshold], + window_size: options[:window_size], + implementation: implementation(**options)) + end + def create_circuit_breaker(name, **options) circuit_breaker = options.fetch(:circuit_breaker, true) return unless circuit_breaker + return create_error_rate_circuit_breaker(name, **options) if options.key?(:error_percent_threshold) + require_keys!([:success_threshold, :error_threshold, :error_timeout], options) exceptions = options[:exceptions] || [] diff --git a/lib/semian/error_rate_circuit_breaker.rb b/lib/semian/error_rate_circuit_breaker.rb new file mode 100644 index 00000000..562357dd --- /dev/null +++ b/lib/semian/error_rate_circuit_breaker.rb @@ -0,0 +1,167 @@ +module Semian + class ErrorRateCircuitBreaker #:nodoc: + extend Forwardable + + def_delegators :@state, :closed?, :open?, :half_open? + + attr_reader :name, :half_open_resource_timeout, :error_timeout, :state, :last_error, :error_percent_threshold, + :request_volume_threshold, :success_count_threshold + + def initialize(name, exceptions:, error_percent_threshold:, error_timeout:, window_size:, + request_volume_threshold:, success_threshold:, implementation:, half_open_resource_timeout: nil) + + raise 'error_threshold_percent should be between 0.0 and 1.0 exclusive' unless (0.0001...1.0).cover?(error_percent_threshold) + + @name = name.to_sym + @error_timeout = error_timeout + @exceptions = exceptions + @half_open_resource_timeout = half_open_resource_timeout + @error_percent_threshold = error_percent_threshold + @last_error_time = nil + @request_volume_threshold = request_volume_threshold + @success_count_threshold = success_threshold + + @results = implementation::TimeSlidingWindow.new(window_size) + @state = implementation::State.new + + reset + end + + def acquire(resource = nil, &block) + return yield if disabled? + transition_to_half_open if transition_to_half_open? + + raise OpenCircuitError unless request_allowed? + + result = nil + begin + result = maybe_with_half_open_resource_timeout(resource, &block) + rescue *@exceptions => error + if !error.respond_to?(:marks_semian_circuits?) || error.marks_semian_circuits? + mark_failed(error) + end + raise error + else + mark_success + end + result + end + + def transition_to_half_open? + open? && error_timeout_expired? && !half_open? + end + + def request_allowed? + closed? || half_open? || transition_to_half_open? + end + + def mark_failed(error) + push_error(error) + if closed? + transition_to_open if error_threshold_reached? + elsif half_open? + transition_to_open + end + end + + def mark_success + @results << true + return unless half_open? + transition_to_close if success_threshold_reached? + end + + def reset + @last_error_time = nil + @results.clear + transition_to_close + end + + def destroy + @state.destroy + end + + # TODO understand what this is used for inside Semian lib + def in_use? + return false if error_timeout_expired? + @results.count(false) > 0 + end + + private + + def current_time + Process.clock_gettime(Process::CLOCK_MONOTONIC) + end + + def transition_to_close + notify_state_transition(:closed) + log_state_transition(:closed) + @state.close! + @results.clear + end + + def transition_to_open + notify_state_transition(:open) + log_state_transition(:open) + @state.open! + end + + def transition_to_half_open + notify_state_transition(:half_open) + log_state_transition(:half_open) + @state.half_open! + @results.clear + end + + def success_threshold_reached? + @results.count(true) >= @success_count_threshold + end + + def error_threshold_reached? + return false if @results.empty? or @results.length < @request_volume_threshold + @results.count(false).to_f / @results.length.to_f >= @error_percent_threshold + end + + def error_timeout_expired? + return false unless @last_error_time + current_time - @last_error_time >= @error_timeout + end + + def push_error(error) + @last_error = error + @last_error_time = current_time + @results << false + end + + def log_state_transition(new_state) + return if @state.nil? || new_state == @state.value + + str = "[#{self.class.name}] State transition from #{@state.value} to #{new_state}." + str << " success_count=#{@results.count(true)} error_count=#{@results.count(false)}" + str << " success_count_threshold=#{@success_count_threshold} error_count_percent=#{@error_percent_threshold}" + str << " error_timeout=#{@error_timeout} error_last_at=\"#{@last_error_time}\"" + str << " name=\"#{@name}\"" + Semian.logger.info(str) + end + + def notify_state_transition(new_state) + Semian.notify(:state_change, self, nil, nil, state: new_state) + end + + def disabled? + ENV['SEMIAN_CIRCUIT_BREAKER_DISABLED'] || ENV['SEMIAN_DISABLED'] + end + + def maybe_with_half_open_resource_timeout(resource, &block) + result = + if half_open? && @half_open_resource_timeout && resource.respond_to?(:with_resource_timeout) + resource.with_resource_timeout(@half_open_resource_timeout) do + block.call + end + else + block.call + end + + result + end + end +end diff --git a/test/error_rate_circuit_breaker_test.rb b/test/error_rate_circuit_breaker_test.rb new file mode 100644 index 00000000..ced39e6e --- /dev/null +++ b/test/error_rate_circuit_breaker_test.rb @@ -0,0 +1,342 @@ +require 'test_helper' + +class TestErrorRateCircuitBreaker < Minitest::Test + include CircuitBreakerHelper + + def setup + @strio = StringIO.new + Semian.logger = Logger.new @strio + begin + Semian.destroy(:testing) + rescue + nil + end + @resource = ::Semian::ErrorRateCircuitBreaker.new(:testing, + exceptions: [Exception], + error_percent_threshold: 0.5, + error_timeout: 1, + window_size: 2, + request_volume_threshold: 2, + implementation: ::Semian::ThreadSafe, + success_threshold: 1, + half_open_resource_timeout: nil) + end + + # Timecop doesn't work with MONOTONIC_CLOCK + def sleep_for_sec(seconds) + sleep(seconds) + end + + def half_open_circuit(resource = @resource) + open_circuit!(resource) + assert_circuit_opened(resource) + sleep_for_sec(1.1) + assert resource.transition_to_half_open?, 'Expect breaker to be half-open' + end + + def test_error_threshold_must_be_between_0_and_1 + assert_raises RuntimeError do + ::Semian::ErrorRateCircuitBreaker.new(:testing, + exceptions: [Exception], + error_percent_threshold: 1.0, + error_timeout: 1, + window_size: 2, + request_volume_threshold: 2, + implementation: ::Semian::ThreadSafe, + success_threshold: 1, + half_open_resource_timeout: nil) + end + + assert_raises RuntimeError do + ::Semian::ErrorRateCircuitBreaker.new(:testing, + exceptions: [Exception], + error_percent_threshold: 0.0, + error_timeout: 1, + window_size: 2, + request_volume_threshold: 2, + implementation: ::Semian::ThreadSafe, + success_threshold: 1, + half_open_resource_timeout: nil) + end + end + + def test_acquire_yield_when_the_circuit_is_closed + block_called = false + @resource.acquire { block_called = true } + assert_equal true, block_called + end + + def test_acquire_raises_circuit_open_error_when_the_circuit_is_open + open_circuit! + assert_raises Semian::OpenCircuitError do + @resource.acquire { 1 + 1 } + end + assert_match(/State transition from closed to open/, @strio.string) + end + + def test_after_error_threshold_the_circuit_is_open + open_circuit! + assert_circuit_opened + end + + def test_after_error_timeout_is_elapsed_requests_are_attempted_again + half_open_circuit + assert_circuit_closed + end + + def test_until_success_threshold_is_reached_a_single_error_will_reopen_the_circuit + half_open_circuit + trigger_error! + assert_circuit_opened + end + + def test_once_success_threshold_is_reached_only_error_threshold_will_open_the_circuit_again + half_open_circuit + assert_circuit_closed + trigger_error! + assert_circuit_closed + trigger_error! + assert_circuit_opened + end + + def test_reset_allow_to_close_the_circuit_and_forget_errors + open_circuit! + @resource.reset + assert_match(/State transition from open to closed/, @strio.string) + assert_circuit_closed + end + + def test_errors_more_than_duration_apart_doesnt_open_circuit + trigger_error! + sleep_for_sec(2) # allow time for error to slide off time window + trigger_error! + assert_circuit_closed + end + + def test_errors_under_threshold_doesnt_open_circuit + # 60% success rate + @resource.acquire { 1 + 1} + @resource.acquire { 1 + 1} + trigger_error! + @resource.acquire { 1 + 1} + trigger_error! + assert_circuit_closed + end + + def test_request_allowed_query_doesnt_trigger_transitions + open_circuit! + refute_predicate @resource, :request_allowed? + assert_predicate @resource, :open? + sleep_for_sec(1.1) + assert_predicate @resource, :request_allowed? + assert_predicate @resource, :open? + end + + def test_open_close_open_cycle + resource = ::Semian::ErrorRateCircuitBreaker.new(:testing, + exceptions: [Exception], + error_percent_threshold: 0.5, + error_timeout: 1, + window_size: 2, + request_volume_threshold: 2, + implementation: ::Semian::ThreadSafe, + success_threshold: 2, + half_open_resource_timeout: nil) + open_circuit!(resource) + assert_circuit_opened(resource) + + sleep_for_sec(1.1) + + assert_circuit_closed(resource) + + assert resource.half_open? + assert_circuit_closed(resource) + + assert resource.closed? + + open_circuit!(resource) + assert_circuit_opened(resource) + + sleep_for_sec(1.1) + + + assert_circuit_closed(resource) + + assert resource.half_open? + assert_circuit_closed(resource) + + assert resource.closed? + + + end + + def test_env_var_disables_circuit_breaker + ENV['SEMIAN_CIRCUIT_BREAKER_DISABLED'] = '1' + open_circuit! + assert_circuit_closed + ensure + ENV.delete('SEMIAN_CIRCUIT_BREAKER_DISABLED') + end + + def test_semian_wide_env_var_disables_circuit_breaker + ENV['SEMIAN_DISABLED'] = '1' + open_circuit! + assert_circuit_closed + ensure + ENV.delete('SEMIAN_DISABLED') + end + + class RawResource + def timeout + @timeout || 2 + end + + def with_resource_timeout(timeout) + prev_timeout = @timeout + @timeout = timeout + yield + ensure + @timeout = prev_timeout + end + end + + def test_changes_resource_timeout_when_configured + resource = ::Semian::ErrorRateCircuitBreaker.new(:resource_timeout, + exceptions: [Exception], + error_percent_threshold: 0.5, + error_timeout: 1, + window_size: 2, + request_volume_threshold: 2, + implementation: ::Semian::ThreadSafe, + success_threshold: 2, + half_open_resource_timeout: 0.123) + + half_open_circuit(resource) + assert_circuit_closed(resource) + assert resource.half_open? + + raw_resource = RawResource.new + + triggered = false + resource.acquire(raw_resource) do + triggered = true + assert_equal 0.123, raw_resource.timeout + end + + assert triggered + assert_equal 2, raw_resource.timeout + end + + def test_doesnt_change_resource_timeout_when_closed + resource = ::Semian::ErrorRateCircuitBreaker.new(:resource_timeout, + exceptions: [Exception], + error_percent_threshold: 0.5, + error_timeout: 1, + window_size: 2, + request_volume_threshold: 2, + implementation: ::Semian::ThreadSafe, + success_threshold: 2, + half_open_resource_timeout: 0.123) + + raw_resource = RawResource.new + + triggered = false + resource.acquire(raw_resource) do + triggered = true + assert_equal 2, raw_resource.timeout + end + + assert triggered + assert_equal 2, raw_resource.timeout + end + + def test_doesnt_blow_up_when_configured_half_open_timeout_but_adapter_doesnt_support + resource = ::Semian::ErrorRateCircuitBreaker.new(:resource_timeout, + exceptions: [Exception], + error_percent_threshold: 0.5, + error_timeout: 1, + window_size: 2, + request_volume_threshold: 2, + implementation: ::Semian::ThreadSafe, + success_threshold: 2, + half_open_resource_timeout: 0.123) + + raw_resource = Object.new + + triggered = false + resource.acquire(raw_resource) do + triggered = true + end + + assert triggered + end + + class SomeErrorThatMarksCircuits < SomeError + def marks_semian_circuits? + true + end + end + + class SomeSubErrorThatDoesNotMarkCircuits < SomeErrorThatMarksCircuits + def marks_semian_circuits? + false + end + end + + def test_opens_circuit_when_error_has_marks_semian_circuits_equal_to_true + 2.times { trigger_error!(@resource, SomeErrorThatMarksCircuits) } + assert_circuit_opened + end + + def test_does_not_open_circuit_when_error_has_marks_semian_circuits_equal_to_false + 2.times { trigger_error!(@resource, SomeSubErrorThatDoesNotMarkCircuits) } + assert_circuit_closed + end + + def test_notify_state_transition + name = :test_notify_state_transition + + events = [] + Semian.subscribe(:test_notify_state_transition) do |event, resource, _scope, _adapter, payload| + if event == :state_change + events << {name: resource.name, state: payload[:state]} + end + end + + # Creating a resource should generate a :closed notification. + resource = ::Semian::ErrorRateCircuitBreaker.new(name, + exceptions: [Exception], + error_percent_threshold: 0.6666, + error_timeout: 1, + window_size: 2, + request_volume_threshold: 2, + implementation: ::Semian::ThreadSafe, + success_threshold: 1, + half_open_resource_timeout: 0.123) + assert_equal(1, events.length) + assert_equal(name, events[0][:name]) + assert_equal(:closed, events[0][:state]) + + # Acquiring a resource doesn't generate a transition. + resource.acquire { nil } + assert_equal(1, events.length) + + # error_threshold_percent failures causes a transition to open. + 2.times { trigger_error!(resource) } + assert_equal(2, events.length) + assert_equal(name, events[1][:name]) + assert_equal(:open, events[1][:state]) + + sleep_for_sec(1.1) + + # Acquiring the resource successfully generates a transition to half_open, then closed. + resource.acquire { nil } + assert_equal(4, events.length) + assert_equal(name, events[2][:name]) + assert_equal(:half_open, events[2][:state]) + assert_equal(name, events[3][:name]) + assert_equal(:closed, events[3][:state]) + ensure + Semian.unsubscribe(:test_notify_state_transition) + end +end diff --git a/test/semian_test.rb b/test/semian_test.rb index 65d6958f..416dd45e 100644 --- a/test/semian_test.rb +++ b/test/semian_test.rb @@ -68,6 +68,24 @@ def test_register_with_bulkhead_missing_options assert_equal exception.message, "Must pass exactly one of ticket or quota" end + def test_register_with_error_rate_circuitbreaker + resource = Semian.register( + :testing_error_rate, + success_threshold: 1, + error_percent_threshold: 0.2, + request_volume_threshold: 1, + window_size: 10, + error_timeout: 5, + circuit_breaker: true, + bulkhead: false, + thread_safety_disabled: false, + ) + + assert resource, Semian[:testing_error_rate] + assert resource.circuit_breaker.state.instance_of?(Semian::ThreadSafe::State) + assert resource.circuit_breaker.instance_of?(Semian::ErrorRateCircuitBreaker) + end + def test_unsuported_constants assert defined?(Semian::BaseError) assert defined?(Semian::SyscallError) From 73a839d873ecdfad964bcdf5e5741d9bd3ef0b68 Mon Sep 17 00:00:00 2001 From: Ryan Dearing Date: Fri, 3 Apr 2020 16:16:12 -0600 Subject: [PATCH 03/11] cleanup ErrorRateCircuitBreaker --- lib/semian/error_rate_circuit_breaker.rb | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/lib/semian/error_rate_circuit_breaker.rb b/lib/semian/error_rate_circuit_breaker.rb index 562357dd..5be95e6b 100644 --- a/lib/semian/error_rate_circuit_breaker.rb +++ b/lib/semian/error_rate_circuit_breaker.rb @@ -21,7 +21,7 @@ def initialize(name, exceptions:, error_percent_threshold:, error_timeout:, wind @request_volume_threshold = request_volume_threshold @success_count_threshold = success_threshold - @results = implementation::TimeSlidingWindow.new(window_size) + @window = implementation::TimeSlidingWindow.new(window_size) @state = implementation::State.new reset @@ -65,14 +65,14 @@ def mark_failed(error) end def mark_success - @results << true + @window << true return unless half_open? transition_to_close if success_threshold_reached? end def reset @last_error_time = nil - @results.clear + @window.clear transition_to_close end @@ -80,10 +80,9 @@ def destroy @state.destroy end - # TODO understand what this is used for inside Semian lib def in_use? return false if error_timeout_expired? - @results.count(false) > 0 + @window.count(false) > 0 end private @@ -96,7 +95,7 @@ def transition_to_close notify_state_transition(:closed) log_state_transition(:closed) @state.close! - @results.clear + @window.clear end def transition_to_open @@ -109,16 +108,16 @@ def transition_to_half_open notify_state_transition(:half_open) log_state_transition(:half_open) @state.half_open! - @results.clear + @window.clear end def success_threshold_reached? - @results.count(true) >= @success_count_threshold + @window.count(true) >= @success_count_threshold end def error_threshold_reached? - return false if @results.empty? or @results.length < @request_volume_threshold - @results.count(false).to_f / @results.length.to_f >= @error_percent_threshold + return false if @window.empty? or @window.length < @request_volume_threshold + @window.count(false).to_f / @window.length.to_f >= @error_percent_threshold end def error_timeout_expired? @@ -129,14 +128,14 @@ def error_timeout_expired? def push_error(error) @last_error = error @last_error_time = current_time - @results << false + @window << false end def log_state_transition(new_state) return if @state.nil? || new_state == @state.value str = "[#{self.class.name}] State transition from #{@state.value} to #{new_state}." - str << " success_count=#{@results.count(true)} error_count=#{@results.count(false)}" + str << " success_count=#{@window.count(true)} error_count=#{@window.count(false)}" str << " success_count_threshold=#{@success_count_threshold} error_count_percent=#{@error_percent_threshold}" str << " error_timeout=#{@error_timeout} error_last_at=\"#{@last_error_time}\"" str << " name=\"#{@name}\"" From 08e851fea88d327e3a7c0154bb53bc12a91a8215 Mon Sep 17 00:00:00 2001 From: Ryan Dearing Date: Fri, 24 Apr 2020 14:41:43 -0600 Subject: [PATCH 04/11] circuit breaker implementation that opens based on time spent in error --- lib/semian.rb | 27 +++- lib/semian/error_rate_circuit_breaker.rb | 72 ++++++--- lib/semian/time_sliding_window.rb | 34 ++-- test/error_rate_circuit_breaker_test.rb | 194 +++++++++++++++-------- test/helpers/circuit_breaker_helper.rb | 31 +++- test/semian_test.rb | 3 +- test/time_sliding_window_test.rb | 51 +++--- 7 files changed, 284 insertions(+), 128 deletions(-) diff --git a/lib/semian.rb b/lib/semian.rb index 084a624c..da5e5b45 100644 --- a/lib/semian.rb +++ b/lib/semian.rb @@ -128,6 +128,9 @@ def to_s # # +circuit_breaker+: The boolean if you want a circuit breaker acquired for your resource. Default true. # + # +circuit_breaker_type+: The string representing the type of circuit breaker, one of :normal or :error_rate + # Default normal (optional) + # # +bulkhead+: The boolean if you want a bulkhead to be acquired for your resource. Default true. # # +tickets+: Number of tickets. If this value is 0, the ticket count will not be set, @@ -155,6 +158,18 @@ def to_s # +exceptions+: An array of exception classes that should be accounted as resource errors. Default []. # (circuit breaker) # + # +error_percent_threshold+: The percentage of time spent making calls that ultimately ended in error + # that will trigger the circuit opening (error_rate circuit breaker required) + # + # +request_volume_threshold+: The number of calls that must happen within the time_window before the circuit + # will consider opening based on error_percent_threshold. For example, if the value is 20, then if only 19 requests + # are received in the rolling window the circuit will not trip open even if all 19 failed. + # Without this the circuit would open if the first request was an error (100% failure rate). + # (error_rate circuit breaker required) + # + # +time_window+: The time window in seconds over which the error rate will be calculated + # (error_rate circuit breaker required) + # # Returns the registered resource. def register(name, **options) circuit_breaker = create_circuit_breaker(name, **options) @@ -249,7 +264,7 @@ def thread_safe=(thread_safe) def create_error_rate_circuit_breaker(name, **options) require_keys!([:success_threshold, :error_percent_threshold, :error_timeout, - :request_volume_threshold, :window_size], options) + :request_volume_threshold, :time_window], options) exceptions = options[:exceptions] || [] ErrorRateCircuitBreaker.new(name, @@ -259,14 +274,20 @@ def create_error_rate_circuit_breaker(name, **options) exceptions: Array(exceptions) + [::Semian::BaseError], half_open_resource_timeout: options[:half_open_resource_timeout], request_volume_threshold: options[:request_volume_threshold], - window_size: options[:window_size], + time_window: options[:time_window], implementation: implementation(**options)) end def create_circuit_breaker(name, **options) circuit_breaker = options.fetch(:circuit_breaker, true) return unless circuit_breaker - return create_error_rate_circuit_breaker(name, **options) if options.key?(:error_percent_threshold) + + type = options.fetch(:circuit_breaker_type, :normal) + unless [:normal, :error_rate].include?(type) + raise ArgumentError, "Unknown 'circuit_breaker_type': #{type}, should be :normal or :error_rate" + end + + return create_error_rate_circuit_breaker(name, **options) if type == :error_rate require_keys!([:success_threshold, :error_threshold, :error_timeout], options) diff --git a/lib/semian/error_rate_circuit_breaker.rb b/lib/semian/error_rate_circuit_breaker.rb index 5be95e6b..2175362b 100644 --- a/lib/semian/error_rate_circuit_breaker.rb +++ b/lib/semian/error_rate_circuit_breaker.rb @@ -7,21 +7,22 @@ class ErrorRateCircuitBreaker #:nodoc: attr_reader :name, :half_open_resource_timeout, :error_timeout, :state, :last_error, :error_percent_threshold, :request_volume_threshold, :success_count_threshold - def initialize(name, exceptions:, error_percent_threshold:, error_timeout:, window_size:, - request_volume_threshold:, success_threshold:, implementation:, half_open_resource_timeout: nil) + def initialize(name, exceptions:, error_percent_threshold:, error_timeout:, time_window:, + request_volume_threshold:, success_threshold:, implementation:, + half_open_resource_timeout: nil, time_source: nil) raise 'error_threshold_percent should be between 0.0 and 1.0 exclusive' unless (0.0001...1.0).cover?(error_percent_threshold) @name = name.to_sym - @error_timeout = error_timeout + @error_timeout = error_timeout * 1000 @exceptions = exceptions @half_open_resource_timeout = half_open_resource_timeout @error_percent_threshold = error_percent_threshold @last_error_time = nil @request_volume_threshold = request_volume_threshold @success_count_threshold = success_threshold - - @window = implementation::TimeSlidingWindow.new(window_size) + @time_source = time_source ? time_source : -> { Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond) } + @window = implementation::TimeSlidingWindow.new(time_window, @time_source) @state = implementation::State.new reset @@ -33,16 +34,17 @@ def acquire(resource = nil, &block) raise OpenCircuitError unless request_allowed? + time_start = current_time result = nil begin result = maybe_with_half_open_resource_timeout(resource, &block) rescue *@exceptions => error if !error.respond_to?(:marks_semian_circuits?) || error.marks_semian_circuits? - mark_failed(error) + mark_failed(error, current_time - time_start) end raise error else - mark_success + mark_success(current_time - time_start) end result end @@ -55,8 +57,8 @@ def request_allowed? closed? || half_open? || transition_to_half_open? end - def mark_failed(error) - push_error(error) + def mark_failed(error, time_spent) + push_error(error, time_spent) if closed? transition_to_open if error_threshold_reached? elsif half_open? @@ -64,8 +66,8 @@ def mark_failed(error) end end - def mark_success - @window << true + def mark_success(time_spent) + @window << [true, time_spent] return unless half_open? transition_to_close if success_threshold_reached? end @@ -82,26 +84,26 @@ def destroy def in_use? return false if error_timeout_expired? - @window.count(false) > 0 + error_count > 0 end private def current_time - Process.clock_gettime(Process::CLOCK_MONOTONIC) + @time_source.call end def transition_to_close notify_state_transition(:closed) log_state_transition(:closed) @state.close! - @window.clear end def transition_to_open notify_state_transition(:open) log_state_transition(:open) @state.open! + @window.clear end def transition_to_half_open @@ -112,12 +114,32 @@ def transition_to_half_open end def success_threshold_reached? - @window.count(true) >= @success_count_threshold + success_count >= @success_count_threshold end def error_threshold_reached? - return false if @window.empty? or @window.length < @request_volume_threshold - @window.count(false).to_f / @window.length.to_f >= @error_percent_threshold + return false if @window.empty? || @window.length < @request_volume_threshold + success_time_spent, error_time_spent = calculate_time_spent + total_time = error_time_spent + success_time_spent + error_time_spent / total_time >= @error_percent_threshold + end + + def calculate_time_spent + @window.each_with_object([0.0, 0.0]) do |entry, sum| + if entry[0] == true + sum[0] = entry[1] + sum[0] + else + sum[1] = entry[1] + sum[1] + end + end + end + + def error_count + @window.count { |entry| entry[0] == false }.to_f + end + + def success_count + @window.count { |entry| entry[0] == true }.to_f end def error_timeout_expired? @@ -125,17 +147,17 @@ def error_timeout_expired? current_time - @last_error_time >= @error_timeout end - def push_error(error) + def push_error(error, time_spent) @last_error = error @last_error_time = current_time - @window << false + @window << [false, time_spent] end def log_state_transition(new_state) return if @state.nil? || new_state == @state.value str = "[#{self.class.name}] State transition from #{@state.value} to #{new_state}." - str << " success_count=#{@window.count(true)} error_count=#{@window.count(false)}" + str << " success_count=#{success_count} error_count=#{error_count}" str << " success_count_threshold=#{@success_count_threshold} error_count_percent=#{@error_percent_threshold}" str << " error_timeout=#{@error_timeout} error_last_at=\"#{@last_error_time}\"" str << " name=\"#{@name}\"" @@ -152,13 +174,13 @@ def disabled? def maybe_with_half_open_resource_timeout(resource, &block) result = - if half_open? && @half_open_resource_timeout && resource.respond_to?(:with_resource_timeout) - resource.with_resource_timeout(@half_open_resource_timeout) do - block.call - end - else + if half_open? && @half_open_resource_timeout && resource.respond_to?(:with_resource_timeout) + resource.with_resource_timeout(@half_open_resource_timeout) do block.call end + else + block.call + end result end diff --git a/lib/semian/time_sliding_window.rb b/lib/semian/time_sliding_window.rb index 3f915480..6732b7b1 100644 --- a/lib/semian/time_sliding_window.rb +++ b/lib/semian/time_sliding_window.rb @@ -12,18 +12,26 @@ class TimeSlidingWindow #:nodoc: Pair = Struct.new(:head, :tail) # A sliding window is a structure that stores the most recent entries that were pushed within the last slice of time - def initialize(time_window) + def initialize(time_window, time_source = nil) @time_window_millis = time_window * 1000 + @time_source = time_source ? time_source : -> { Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond) } @window = [] end - def count(arg) + def count(&block) + _remove_old vals = @window.map { |pair| pair.tail} - vals.count(arg) + vals.count(&block) + end + + def each_with_object(memo, &block) + _remove_old + vals = @window.map { |pair| pair.tail} + vals.each_with_object(memo, &block) end def push(value) - remove_old # make room + _remove_old # make room @window << Pair.new(current_time, value) self end @@ -40,6 +48,14 @@ def last end def remove_old + _remove_old + end + + alias_method :destroy, :clear + + private + + def _remove_old midtime = current_time - time_window_millis # special case, everything is too old @window.clear if !@window.empty? and @window.last.head < midtime @@ -48,12 +64,8 @@ def remove_old @window.slice!(0, idx) if idx end - alias_method :destroy, :clear - - private - def current_time - Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond) + @time_source.call end end end @@ -76,6 +88,10 @@ def count(*) @lock.synchronize { super } end + def each_with_object(*) + @lock.synchronize { super } + end + def remove_old @lock.synchronize { super } end diff --git a/test/error_rate_circuit_breaker_test.rb b/test/error_rate_circuit_breaker_test.rb index ced39e6e..5624f1bb 100644 --- a/test/error_rate_circuit_breaker_test.rb +++ b/test/error_rate_circuit_breaker_test.rb @@ -12,35 +12,33 @@ def setup nil end @resource = ::Semian::ErrorRateCircuitBreaker.new(:testing, - exceptions: [Exception], + exceptions: [SomeError], error_percent_threshold: 0.5, error_timeout: 1, - window_size: 2, + time_window: 2, request_volume_threshold: 2, implementation: ::Semian::ThreadSafe, success_threshold: 1, - half_open_resource_timeout: nil) - end - - # Timecop doesn't work with MONOTONIC_CLOCK - def sleep_for_sec(seconds) - sleep(seconds) + half_open_resource_timeout: nil, + time_source: -> { Time.now.to_f * 1000 } + ) end def half_open_circuit(resource = @resource) - open_circuit!(resource) - assert_circuit_opened(resource) - sleep_for_sec(1.1) + Timecop.travel(-1.1) do + open_circuit!(resource) + assert_circuit_opened(resource) + end assert resource.transition_to_half_open?, 'Expect breaker to be half-open' end def test_error_threshold_must_be_between_0_and_1 assert_raises RuntimeError do ::Semian::ErrorRateCircuitBreaker.new(:testing, - exceptions: [Exception], + exceptions: [SomeError], error_percent_threshold: 1.0, error_timeout: 1, - window_size: 2, + time_window: 2, request_volume_threshold: 2, implementation: ::Semian::ThreadSafe, success_threshold: 1, @@ -49,10 +47,10 @@ def test_error_threshold_must_be_between_0_and_1 assert_raises RuntimeError do ::Semian::ErrorRateCircuitBreaker.new(:testing, - exceptions: [Exception], + exceptions: [SomeError], error_percent_threshold: 0.0, error_timeout: 1, - window_size: 2, + time_window: 2, request_volume_threshold: 2, implementation: ::Semian::ThreadSafe, success_threshold: 1, @@ -92,10 +90,10 @@ def test_until_success_threshold_is_reached_a_single_error_will_reopen_the_circu def test_once_success_threshold_is_reached_only_error_threshold_will_open_the_circuit_again half_open_circuit - assert_circuit_closed - trigger_error! - assert_circuit_closed - trigger_error! + assert_circuit_closed_elapse_time(@resource, 0.1) # one success + assert_circuit_closed_elapse_time(@resource, 0.1) # two success + trigger_error_elapse_time!(@resource, 0.11) # one failure + trigger_error_elapse_time!(@resource, 0.11) # two failures (>50%) assert_circuit_opened end @@ -107,67 +105,123 @@ def test_reset_allow_to_close_the_circuit_and_forget_errors end def test_errors_more_than_duration_apart_doesnt_open_circuit - trigger_error! - sleep_for_sec(2) # allow time for error to slide off time window + # allow time for error to slide off time window + Timecop.travel(-2) do + trigger_error! + end trigger_error! assert_circuit_closed end + def test_not_too_many_errors + resource = ::Semian::ErrorRateCircuitBreaker.new(:testing, + exceptions: [SomeError], + error_percent_threshold: 0.90, + error_timeout: 1, + time_window: 100, + request_volume_threshold: 2, + implementation: ::Semian::ThreadSafe, + success_threshold: 1, + half_open_resource_timeout: nil, + time_source: -> { Time.now.to_f * 1000 } + ) + + success_cnt = 0 + error_cnt = 0 + # time window is 100 seconds, we spend the first half of that making successful requests: + Timecop.travel(-50) do + time_start = Time.now.to_f + + while Time.now.to_f - time_start < 50 + resource.acquire { Timecop.travel(0.05) } + success_cnt = success_cnt + 1 + end + end + + # resource goes completely down (not timing out, down) + Timecop.travel(-10) do + time_start = Time.now.to_f + + while Time.now.to_f - time_start < 10 + begin + resource.acquire { + # connection errors happen quickly, using up very little time + Timecop.travel(0.005) + raise SomeError + } + rescue SomeError + end + + error_cnt = error_cnt + 1 + end + end + + assert_operator error_cnt, :<, 10 # this would ideally be some percentage + end + def test_errors_under_threshold_doesnt_open_circuit # 60% success rate - @resource.acquire { 1 + 1} - @resource.acquire { 1 + 1} + Timecop.travel(-2) do + @resource.acquire do + Timecop.travel(-1) + 1 + 1 + end + + @resource.acquire do + Timecop.return + 1 + 1 + end + end trigger_error! - @resource.acquire { 1 + 1} trigger_error! assert_circuit_closed end def test_request_allowed_query_doesnt_trigger_transitions - open_circuit! - refute_predicate @resource, :request_allowed? - assert_predicate @resource, :open? - sleep_for_sec(1.1) + Timecop.travel(-1.1) do + open_circuit! + refute_predicate @resource, :request_allowed? + assert_predicate @resource, :open? + end assert_predicate @resource, :request_allowed? assert_predicate @resource, :open? end def test_open_close_open_cycle resource = ::Semian::ErrorRateCircuitBreaker.new(:testing, - exceptions: [Exception], + exceptions: [SomeError], error_percent_threshold: 0.5, error_timeout: 1, - window_size: 2, + time_window: 2, request_volume_threshold: 2, implementation: ::Semian::ThreadSafe, success_threshold: 2, - half_open_resource_timeout: nil) - open_circuit!(resource) - assert_circuit_opened(resource) - - sleep_for_sec(1.1) + half_open_resource_timeout: nil, + time_source: -> {Time.now.to_f * 1000}) + Timecop.travel(-1.1) do + open_circuit!(resource) + assert_circuit_opened(resource) + end assert_circuit_closed(resource) assert resource.half_open? - assert_circuit_closed(resource) + + assert_circuit_closed_elapse_time(resource, 0.1) assert resource.closed? - open_circuit!(resource) + open_circuit!(resource, 1, 1) assert_circuit_opened(resource) - sleep_for_sec(1.1) - - - assert_circuit_closed(resource) - - assert resource.half_open? - assert_circuit_closed(resource) - - assert resource.closed? + Timecop.travel(1.1) do + assert_circuit_closed(resource) + assert resource.half_open? + assert_circuit_closed(resource) + assert resource.closed? + end end def test_env_var_disables_circuit_breaker @@ -202,14 +256,15 @@ def with_resource_timeout(timeout) def test_changes_resource_timeout_when_configured resource = ::Semian::ErrorRateCircuitBreaker.new(:resource_timeout, - exceptions: [Exception], + exceptions: [SomeError], error_percent_threshold: 0.5, error_timeout: 1, - window_size: 2, + time_window: 2, request_volume_threshold: 2, implementation: ::Semian::ThreadSafe, success_threshold: 2, - half_open_resource_timeout: 0.123) + half_open_resource_timeout: 0.123, + time_source: -> { Time.now.to_f * 1000 }) half_open_circuit(resource) assert_circuit_closed(resource) @@ -229,14 +284,15 @@ def test_changes_resource_timeout_when_configured def test_doesnt_change_resource_timeout_when_closed resource = ::Semian::ErrorRateCircuitBreaker.new(:resource_timeout, - exceptions: [Exception], + exceptions: [SomeError], error_percent_threshold: 0.5, error_timeout: 1, - window_size: 2, + time_window: 2, request_volume_threshold: 2, implementation: ::Semian::ThreadSafe, success_threshold: 2, - half_open_resource_timeout: 0.123) + half_open_resource_timeout: 0.123, + time_source: -> { Time.now.to_f * 1000 }) raw_resource = RawResource.new @@ -252,14 +308,15 @@ def test_doesnt_change_resource_timeout_when_closed def test_doesnt_blow_up_when_configured_half_open_timeout_but_adapter_doesnt_support resource = ::Semian::ErrorRateCircuitBreaker.new(:resource_timeout, - exceptions: [Exception], + exceptions: [SomeError], error_percent_threshold: 0.5, error_timeout: 1, - window_size: 2, + time_window: 2, request_volume_threshold: 2, implementation: ::Semian::ThreadSafe, success_threshold: 2, - half_open_resource_timeout: 0.123) + half_open_resource_timeout: 0.123, + time_source: -> { Time.now.to_f * 1000 }) raw_resource = Object.new @@ -305,37 +362,38 @@ def test_notify_state_transition # Creating a resource should generate a :closed notification. resource = ::Semian::ErrorRateCircuitBreaker.new(name, - exceptions: [Exception], + exceptions: [SomeError], error_percent_threshold: 0.6666, error_timeout: 1, - window_size: 2, + time_window: 2, request_volume_threshold: 2, implementation: ::Semian::ThreadSafe, success_threshold: 1, - half_open_resource_timeout: 0.123) + half_open_resource_timeout: 0.123, + time_source: -> { Time.now.to_f * 1000 }) assert_equal(1, events.length) assert_equal(name, events[0][:name]) assert_equal(:closed, events[0][:state]) # Acquiring a resource doesn't generate a transition. - resource.acquire { nil } + assert_circuit_closed_elapse_time(resource, 0.1) assert_equal(1, events.length) # error_threshold_percent failures causes a transition to open. - 2.times { trigger_error!(resource) } + 2.times { trigger_error_elapse_time!(resource, 0.12) } assert_equal(2, events.length) assert_equal(name, events[1][:name]) assert_equal(:open, events[1][:state]) - sleep_for_sec(1.1) - + Timecop.travel(1.1) do # Acquiring the resource successfully generates a transition to half_open, then closed. - resource.acquire { nil } - assert_equal(4, events.length) - assert_equal(name, events[2][:name]) - assert_equal(:half_open, events[2][:state]) - assert_equal(name, events[3][:name]) - assert_equal(:closed, events[3][:state]) + resource.acquire { nil } + assert_equal(4, events.length) + assert_equal(name, events[2][:name]) + assert_equal(:half_open, events[2][:state]) + assert_equal(name, events[3][:name]) + assert_equal(:closed, events[3][:state]) + end ensure Semian.unsubscribe(:test_notify_state_transition) end diff --git a/test/helpers/circuit_breaker_helper.rb b/test/helpers/circuit_breaker_helper.rb index 7d86ae0d..644ad468 100644 --- a/test/helpers/circuit_breaker_helper.rb +++ b/test/helpers/circuit_breaker_helper.rb @@ -3,8 +3,12 @@ module CircuitBreakerHelper private - def open_circuit!(resource = @resource, error_count = 2) - error_count.times { trigger_error!(resource) } + def open_circuit!(resource = @resource, error_count = 2, elapsed_time = nil) + if elapsed_time == nil + error_count.times { trigger_error!(resource) } + else + error_count.times { trigger_error_elapse_time!(resource, elapsed_time) } + end end def half_open_cicuit!(resource = @resource, backwards_time_travel = 10) @@ -18,6 +22,29 @@ def trigger_error!(resource = @resource, error = SomeError) rescue error end + def trigger_error_elapse_time!(resource = @resource, error = SomeError, elapsed_time) + Timecop.travel(-1 * elapsed_time) do + begin + resource.acquire { + Timecop.return_to_baseline + raise error + } + rescue error + end + end + end + + def assert_circuit_closed_elapse_time(resource = @resource, elapsed_time) + block_called = false + Timecop.travel(-1 * elapsed_time) do + resource.acquire do + Timecop.return_to_baseline + block_called = true + end + end + assert block_called, 'Expected the circuit to be closed, but it was open' + end + def assert_circuit_closed(resource = @resource) block_called = false resource.acquire { block_called = true } diff --git a/test/semian_test.rb b/test/semian_test.rb index 416dd45e..50080912 100644 --- a/test/semian_test.rb +++ b/test/semian_test.rb @@ -71,10 +71,11 @@ def test_register_with_bulkhead_missing_options def test_register_with_error_rate_circuitbreaker resource = Semian.register( :testing_error_rate, + circuit_breaker_type: :error_rate, success_threshold: 1, error_percent_threshold: 0.2, request_volume_threshold: 1, - window_size: 10, + time_window: 10, error_timeout: 5, circuit_breaker: true, bulkhead: false, diff --git a/test/time_sliding_window_test.rb b/test/time_sliding_window_test.rb index dcfec14b..923d5501 100644 --- a/test/time_sliding_window_test.rb +++ b/test/time_sliding_window_test.rb @@ -2,7 +2,7 @@ class TestTimeSlidingWindow < Minitest::Test def setup - @sliding_window = ::Semian::ThreadSafe::TimeSlidingWindow.new(0.5) # Timecop doesn't work with a monotonic clock + @sliding_window = ::Semian::ThreadSafe::TimeSlidingWindow.new(0.5, -> { Time.now.to_f * 1000 }) # Timecop doesn't work with a monotonic clock @sliding_window.clear end @@ -20,44 +20,55 @@ def test_sliding_window_push def test_special_everything_too_old @sliding_window << 0 << 1 - sleep(0.501) - assert_sliding_window(@sliding_window, [], 500) + Timecop.travel(0.501) do + assert_sliding_window(@sliding_window, [], 500) + end end - def test_sliding_window_edge_falloff assert_equal(0, @sliding_window.size) @sliding_window << 0 << 1 << 2 << 3 << 4 << 5 << 6 << 7 assert_sliding_window(@sliding_window, [0, 1, 2, 3, 4, 5, 6, 7], 500) - sleep(0.251) - @sliding_window << 8 << 9 << 10 - sleep(0.251) - assert_sliding_window(@sliding_window, [8, 9, 10], 500) - @sliding_window << 11 - sleep(0.251) - assert_sliding_window(@sliding_window, [11], 500) + Timecop.travel(0.251) do + @sliding_window << 8 << 9 << 10 + end + + Timecop.travel(0.251 * 2) do + assert_sliding_window(@sliding_window, [8, 9, 10], 500) + @sliding_window << 11 + end + Timecop.travel(0.251 * 3) do + assert_sliding_window(@sliding_window, [11], 500) + end end def test_sliding_window_count @sliding_window << true << false << true << false << true << true << true - assert_equal(5, @sliding_window.count(true)) - assert_equal(2, @sliding_window.count(false)) + assert_equal(5, @sliding_window.count {|e| e == true}) + assert_equal(2, @sliding_window.count {|e| e == false}) end - def test_issue - @window = @sliding_window.instance_variable_get("@window") - @window << ::Semian::ThreadSafe::TimeSlidingWindow::Pair.new(338019700.707, true) - @window << ::Semian::ThreadSafe::TimeSlidingWindow::Pair.new(338019701.707, true) - @sliding_window << false - puts('break') + def test_each_with_object + assert_equal(0, @sliding_window.size) + @sliding_window << [false, 1] << [false, 2] << [true, 1] << [true, 3] + result = @sliding_window.each_with_object([0.0, 0.0]) do |entry, sum| + if entry[0] == true + sum[0] = entry[1] + sum[0] + else + sum[1] = entry[1] + sum[1] + end + end + + assert_equal([4.0, 3.0], result) end + private def assert_sliding_window(sliding_window, array, time_window_millis) # Get private member, the sliding_window doesn't expose the entire array sliding_window.remove_old - data = sliding_window.instance_variable_get("@window").map { |pair| pair.tail } + data = sliding_window.instance_variable_get("@window").map(&:tail) assert_equal(array, data) assert_equal(time_window_millis, sliding_window.time_window_millis) end From 956211ca977843e82c953cefabee53c1d98fa1dc Mon Sep 17 00:00:00 2001 From: Ryan Dearing Date: Fri, 24 Apr 2020 14:50:03 -0600 Subject: [PATCH 05/11] style fixes --- lib/semian/time_sliding_window.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/semian/time_sliding_window.rb b/lib/semian/time_sliding_window.rb index 6732b7b1..f5b1c2d5 100644 --- a/lib/semian/time_sliding_window.rb +++ b/lib/semian/time_sliding_window.rb @@ -2,7 +2,6 @@ module Semian module Simple - class TimeSlidingWindow #:nodoc: extend Forwardable @@ -20,13 +19,13 @@ def initialize(time_window, time_source = nil) def count(&block) _remove_old - vals = @window.map { |pair| pair.tail} + vals = @window.map(&:tail) vals.count(&block) end def each_with_object(memo, &block) _remove_old - vals = @window.map { |pair| pair.tail} + vals = @window.map(&:tail) vals.each_with_object(memo, &block) end @@ -58,7 +57,7 @@ def remove_old def _remove_old midtime = current_time - time_window_millis # special case, everything is too old - @window.clear if !@window.empty? and @window.last.head < midtime + @window.clear if !@window.empty? && @window.last.head < midtime # otherwise we find the index position where the cutoff is idx = (0...@window.size).bsearch { |n| @window[n].head >= midtime } @window.slice!(0, idx) if idx From 8a6c96f327386c2c93b8d64e38def7f4a247a140 Mon Sep 17 00:00:00 2001 From: Ryan Dearing Date: Thu, 30 Apr 2020 16:27:00 -0600 Subject: [PATCH 06/11] code cleanup --- lib/semian/error_rate_circuit_breaker.rb | 12 ++++++------ test/error_rate_circuit_breaker_test.rb | 4 ++-- test/time_sliding_window_test.rb | 5 ++--- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/lib/semian/error_rate_circuit_breaker.rb b/lib/semian/error_rate_circuit_breaker.rb index 2175362b..b820ba25 100644 --- a/lib/semian/error_rate_circuit_breaker.rb +++ b/lib/semian/error_rate_circuit_breaker.rb @@ -5,11 +5,11 @@ class ErrorRateCircuitBreaker #:nodoc: def_delegators :@state, :closed?, :open?, :half_open? attr_reader :name, :half_open_resource_timeout, :error_timeout, :state, :last_error, :error_percent_threshold, - :request_volume_threshold, :success_count_threshold + :request_volume_threshold, :success_threshold, :time_window def initialize(name, exceptions:, error_percent_threshold:, error_timeout:, time_window:, - request_volume_threshold:, success_threshold:, implementation:, - half_open_resource_timeout: nil, time_source: nil) + request_volume_threshold:, success_threshold:, implementation:, + half_open_resource_timeout: nil, time_source: nil) raise 'error_threshold_percent should be between 0.0 and 1.0 exclusive' unless (0.0001...1.0).cover?(error_percent_threshold) @@ -20,7 +20,7 @@ def initialize(name, exceptions:, error_percent_threshold:, error_timeout:, time @error_percent_threshold = error_percent_threshold @last_error_time = nil @request_volume_threshold = request_volume_threshold - @success_count_threshold = success_threshold + @success_threshold = success_threshold @time_source = time_source ? time_source : -> { Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond) } @window = implementation::TimeSlidingWindow.new(time_window, @time_source) @state = implementation::State.new @@ -114,7 +114,7 @@ def transition_to_half_open end def success_threshold_reached? - success_count >= @success_count_threshold + success_count >= @success_threshold end def error_threshold_reached? @@ -158,7 +158,7 @@ def log_state_transition(new_state) str = "[#{self.class.name}] State transition from #{@state.value} to #{new_state}." str << " success_count=#{success_count} error_count=#{error_count}" - str << " success_count_threshold=#{@success_count_threshold} error_count_percent=#{@error_percent_threshold}" + str << " success_count_threshold=#{@success_threshold} error_count_percent=#{@error_percent_threshold}" str << " error_timeout=#{@error_timeout} error_last_at=\"#{@last_error_time}\"" str << " name=\"#{@name}\"" Semian.logger.info(str) diff --git a/test/error_rate_circuit_breaker_test.rb b/test/error_rate_circuit_breaker_test.rb index 5624f1bb..44204529 100644 --- a/test/error_rate_circuit_breaker_test.rb +++ b/test/error_rate_circuit_breaker_test.rb @@ -20,8 +20,7 @@ def setup implementation: ::Semian::ThreadSafe, success_threshold: 1, half_open_resource_timeout: nil, - time_source: -> { Time.now.to_f * 1000 } - ) + time_source: -> { Time.now.to_f * 1000 }) end def half_open_circuit(resource = @resource) @@ -114,6 +113,7 @@ def test_errors_more_than_duration_apart_doesnt_open_circuit end def test_not_too_many_errors + skip 'Pending decision on if this warrants another threshold' resource = ::Semian::ErrorRateCircuitBreaker.new(:testing, exceptions: [SomeError], error_percent_threshold: 0.90, diff --git a/test/time_sliding_window_test.rb b/test/time_sliding_window_test.rb index 923d5501..d22f22cc 100644 --- a/test/time_sliding_window_test.rb +++ b/test/time_sliding_window_test.rb @@ -44,8 +44,8 @@ def test_sliding_window_edge_falloff def test_sliding_window_count @sliding_window << true << false << true << false << true << true << true - assert_equal(5, @sliding_window.count {|e| e == true}) - assert_equal(2, @sliding_window.count {|e| e == false}) + assert_equal(5, @sliding_window.count { |e| e == true }) + assert_equal(2, @sliding_window.count { |e| e == false }) end def test_each_with_object @@ -62,7 +62,6 @@ def test_each_with_object assert_equal([4.0, 3.0], result) end - private def assert_sliding_window(sliding_window, array, time_window_millis) From 4adee5ed6db5220e1f3a320fdfe7fd0fa38dca75 Mon Sep 17 00:00:00 2001 From: Ryan Dearing Date: Fri, 1 May 2020 12:15:36 -0600 Subject: [PATCH 07/11] code cleanup --- lib/semian.rb | 6 +-- lib/semian/error_rate_circuit_breaker.rb | 10 ++-- lib/semian/time_sliding_window.rb | 19 +++---- test/error_rate_circuit_breaker_test.rb | 68 +++++------------------- test/semian_test.rb | 2 +- test/time_sliding_window_test.rb | 7 +-- 6 files changed, 32 insertions(+), 80 deletions(-) diff --git a/lib/semian.rb b/lib/semian.rb index da5e5b45..686fbac8 100644 --- a/lib/semian.rb +++ b/lib/semian.rb @@ -161,7 +161,7 @@ def to_s # +error_percent_threshold+: The percentage of time spent making calls that ultimately ended in error # that will trigger the circuit opening (error_rate circuit breaker required) # - # +request_volume_threshold+: The number of calls that must happen within the time_window before the circuit + # +minimum_request_volume+: The number of calls that must happen within the time_window before the circuit # will consider opening based on error_percent_threshold. For example, if the value is 20, then if only 19 requests # are received in the rolling window the circuit will not trip open even if all 19 failed. # Without this the circuit would open if the first request was an error (100% failure rate). @@ -264,7 +264,7 @@ def thread_safe=(thread_safe) def create_error_rate_circuit_breaker(name, **options) require_keys!([:success_threshold, :error_percent_threshold, :error_timeout, - :request_volume_threshold, :time_window], options) + :minimum_request_volume, :time_window], options) exceptions = options[:exceptions] || [] ErrorRateCircuitBreaker.new(name, @@ -273,7 +273,7 @@ def create_error_rate_circuit_breaker(name, **options) error_timeout: options[:error_timeout], exceptions: Array(exceptions) + [::Semian::BaseError], half_open_resource_timeout: options[:half_open_resource_timeout], - request_volume_threshold: options[:request_volume_threshold], + minimum_request_volume: options[:minimum_request_volume], time_window: options[:time_window], implementation: implementation(**options)) end diff --git a/lib/semian/error_rate_circuit_breaker.rb b/lib/semian/error_rate_circuit_breaker.rb index b820ba25..d3438d2a 100644 --- a/lib/semian/error_rate_circuit_breaker.rb +++ b/lib/semian/error_rate_circuit_breaker.rb @@ -5,13 +5,13 @@ class ErrorRateCircuitBreaker #:nodoc: def_delegators :@state, :closed?, :open?, :half_open? attr_reader :name, :half_open_resource_timeout, :error_timeout, :state, :last_error, :error_percent_threshold, - :request_volume_threshold, :success_threshold, :time_window + :minimum_request_volume, :success_threshold, :time_window def initialize(name, exceptions:, error_percent_threshold:, error_timeout:, time_window:, - request_volume_threshold:, success_threshold:, implementation:, + minimum_request_volume:, success_threshold:, implementation:, half_open_resource_timeout: nil, time_source: nil) - raise 'error_threshold_percent should be between 0.0 and 1.0 exclusive' unless (0.0001...1.0).cover?(error_percent_threshold) + raise 'error_threshold_percent should be between 0.0 and 1.0 exclusive' unless 0 < error_percent_threshold && error_percent_threshold < 1 @name = name.to_sym @error_timeout = error_timeout * 1000 @@ -19,7 +19,7 @@ def initialize(name, exceptions:, error_percent_threshold:, error_timeout:, time @half_open_resource_timeout = half_open_resource_timeout @error_percent_threshold = error_percent_threshold @last_error_time = nil - @request_volume_threshold = request_volume_threshold + @minimum_request_volume = minimum_request_volume @success_threshold = success_threshold @time_source = time_source ? time_source : -> { Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond) } @window = implementation::TimeSlidingWindow.new(time_window, @time_source) @@ -118,7 +118,7 @@ def success_threshold_reached? end def error_threshold_reached? - return false if @window.empty? || @window.length < @request_volume_threshold + return false if @window.empty? || @window.length < @minimum_request_volume success_time_spent, error_time_spent = calculate_time_spent total_time = error_time_spent + success_time_spent error_time_spent / total_time >= @error_percent_threshold diff --git a/lib/semian/time_sliding_window.rb b/lib/semian/time_sliding_window.rb index f5b1c2d5..090b3dc0 100644 --- a/lib/semian/time_sliding_window.rb +++ b/lib/semian/time_sliding_window.rb @@ -18,19 +18,19 @@ def initialize(time_window, time_source = nil) end def count(&block) - _remove_old + remove_old vals = @window.map(&:tail) vals.count(&block) end def each_with_object(memo, &block) - _remove_old + remove_old vals = @window.map(&:tail) vals.each_with_object(memo, &block) end def push(value) - _remove_old # make room + remove_old # make room @window << Pair.new(current_time, value) self end @@ -46,18 +46,15 @@ def last @window.last&.tail end - def remove_old - _remove_old - end - alias_method :destroy, :clear private - def _remove_old + def remove_old + return if @window.empty? midtime = current_time - time_window_millis # special case, everything is too old - @window.clear if !@window.empty? && @window.last.head < midtime + @window.clear if @window.last.head < midtime # otherwise we find the index position where the cutoff is idx = (0...@window.size).bsearch { |n| @window[n].head >= midtime } @window.slice!(0, idx) if idx @@ -91,10 +88,6 @@ def each_with_object(*) @lock.synchronize { super } end - def remove_old - @lock.synchronize { super } - end - def push(*) @lock.synchronize { super } end diff --git a/test/error_rate_circuit_breaker_test.rb b/test/error_rate_circuit_breaker_test.rb index 44204529..0300756b 100644 --- a/test/error_rate_circuit_breaker_test.rb +++ b/test/error_rate_circuit_breaker_test.rb @@ -16,11 +16,16 @@ def setup error_percent_threshold: 0.5, error_timeout: 1, time_window: 2, - request_volume_threshold: 2, + minimum_request_volume: 2, implementation: ::Semian::ThreadSafe, success_threshold: 1, half_open_resource_timeout: nil, time_source: -> { Time.now.to_f * 1000 }) + Timecop.return + end + + def teardown + Timecop.return end def half_open_circuit(resource = @resource) @@ -38,7 +43,7 @@ def test_error_threshold_must_be_between_0_and_1 error_percent_threshold: 1.0, error_timeout: 1, time_window: 2, - request_volume_threshold: 2, + minimum_request_volume: 2, implementation: ::Semian::ThreadSafe, success_threshold: 1, half_open_resource_timeout: nil) @@ -50,7 +55,7 @@ def test_error_threshold_must_be_between_0_and_1 error_percent_threshold: 0.0, error_timeout: 1, time_window: 2, - request_volume_threshold: 2, + minimum_request_volume: 2, implementation: ::Semian::ThreadSafe, success_threshold: 1, half_open_resource_timeout: nil) @@ -112,53 +117,6 @@ def test_errors_more_than_duration_apart_doesnt_open_circuit assert_circuit_closed end - def test_not_too_many_errors - skip 'Pending decision on if this warrants another threshold' - resource = ::Semian::ErrorRateCircuitBreaker.new(:testing, - exceptions: [SomeError], - error_percent_threshold: 0.90, - error_timeout: 1, - time_window: 100, - request_volume_threshold: 2, - implementation: ::Semian::ThreadSafe, - success_threshold: 1, - half_open_resource_timeout: nil, - time_source: -> { Time.now.to_f * 1000 } - ) - - success_cnt = 0 - error_cnt = 0 - # time window is 100 seconds, we spend the first half of that making successful requests: - Timecop.travel(-50) do - time_start = Time.now.to_f - - while Time.now.to_f - time_start < 50 - resource.acquire { Timecop.travel(0.05) } - success_cnt = success_cnt + 1 - end - end - - # resource goes completely down (not timing out, down) - Timecop.travel(-10) do - time_start = Time.now.to_f - - while Time.now.to_f - time_start < 10 - begin - resource.acquire { - # connection errors happen quickly, using up very little time - Timecop.travel(0.005) - raise SomeError - } - rescue SomeError - end - - error_cnt = error_cnt + 1 - end - end - - assert_operator error_cnt, :<, 10 # this would ideally be some percentage - end - def test_errors_under_threshold_doesnt_open_circuit # 60% success rate Timecop.travel(-2) do @@ -193,7 +151,7 @@ def test_open_close_open_cycle error_percent_threshold: 0.5, error_timeout: 1, time_window: 2, - request_volume_threshold: 2, + minimum_request_volume: 2, implementation: ::Semian::ThreadSafe, success_threshold: 2, half_open_resource_timeout: nil, @@ -260,7 +218,7 @@ def test_changes_resource_timeout_when_configured error_percent_threshold: 0.5, error_timeout: 1, time_window: 2, - request_volume_threshold: 2, + minimum_request_volume: 2, implementation: ::Semian::ThreadSafe, success_threshold: 2, half_open_resource_timeout: 0.123, @@ -288,7 +246,7 @@ def test_doesnt_change_resource_timeout_when_closed error_percent_threshold: 0.5, error_timeout: 1, time_window: 2, - request_volume_threshold: 2, + minimum_request_volume: 2, implementation: ::Semian::ThreadSafe, success_threshold: 2, half_open_resource_timeout: 0.123, @@ -312,7 +270,7 @@ def test_doesnt_blow_up_when_configured_half_open_timeout_but_adapter_doesnt_sup error_percent_threshold: 0.5, error_timeout: 1, time_window: 2, - request_volume_threshold: 2, + minimum_request_volume: 2, implementation: ::Semian::ThreadSafe, success_threshold: 2, half_open_resource_timeout: 0.123, @@ -366,7 +324,7 @@ def test_notify_state_transition error_percent_threshold: 0.6666, error_timeout: 1, time_window: 2, - request_volume_threshold: 2, + minimum_request_volume: 2, implementation: ::Semian::ThreadSafe, success_threshold: 1, half_open_resource_timeout: 0.123, diff --git a/test/semian_test.rb b/test/semian_test.rb index 50080912..58856b50 100644 --- a/test/semian_test.rb +++ b/test/semian_test.rb @@ -74,7 +74,7 @@ def test_register_with_error_rate_circuitbreaker circuit_breaker_type: :error_rate, success_threshold: 1, error_percent_threshold: 0.2, - request_volume_threshold: 1, + minimum_request_volume: 1, time_window: 10, error_timeout: 5, circuit_breaker: true, diff --git a/test/time_sliding_window_test.rb b/test/time_sliding_window_test.rb index d22f22cc..5e5c968e 100644 --- a/test/time_sliding_window_test.rb +++ b/test/time_sliding_window_test.rb @@ -4,10 +4,12 @@ class TestTimeSlidingWindow < Minitest::Test def setup @sliding_window = ::Semian::ThreadSafe::TimeSlidingWindow.new(0.5, -> { Time.now.to_f * 1000 }) # Timecop doesn't work with a monotonic clock @sliding_window.clear + Timecop.freeze end def teardown @sliding_window.destroy + Timecop.return end def test_sliding_window_push @@ -65,9 +67,8 @@ def test_each_with_object private def assert_sliding_window(sliding_window, array, time_window_millis) - # Get private member, the sliding_window doesn't expose the entire array - sliding_window.remove_old - data = sliding_window.instance_variable_get("@window").map(&:tail) + # each_with_object will remove old entries first + data = sliding_window.each_with_object([]) { |v, data| data.append(v) } assert_equal(array, data) assert_equal(time_window_millis, sliding_window.time_window_millis) end From 4cfcf5930df22611d4dfd201d8f3ac6e65b40feb Mon Sep 17 00:00:00 2001 From: Ryan Dearing Date: Fri, 1 May 2020 17:53:26 -0600 Subject: [PATCH 08/11] code cleanup --- lib/semian/error_rate_circuit_breaker.rb | 9 ++++++--- lib/semian/time_sliding_window.rb | 6 +++--- test/error_rate_circuit_breaker_test.rb | 3 +++ test/time_sliding_window_test.rb | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/lib/semian/error_rate_circuit_breaker.rb b/lib/semian/error_rate_circuit_breaker.rb index d3438d2a..7486f24d 100644 --- a/lib/semian/error_rate_circuit_breaker.rb +++ b/lib/semian/error_rate_circuit_breaker.rb @@ -5,7 +5,9 @@ class ErrorRateCircuitBreaker #:nodoc: def_delegators :@state, :closed?, :open?, :half_open? attr_reader :name, :half_open_resource_timeout, :error_timeout, :state, :last_error, :error_percent_threshold, - :minimum_request_volume, :success_threshold, :time_window + :minimum_request_volume, :success_threshold + + def_delegator :@window, :time_window_ms def initialize(name, exceptions:, error_percent_threshold:, error_timeout:, time_window:, minimum_request_volume:, success_threshold:, implementation:, @@ -14,7 +16,7 @@ def initialize(name, exceptions:, error_percent_threshold:, error_timeout:, time raise 'error_threshold_percent should be between 0.0 and 1.0 exclusive' unless 0 < error_percent_threshold && error_percent_threshold < 1 @name = name.to_sym - @error_timeout = error_timeout * 1000 + @error_timeout = error_timeout @exceptions = exceptions @half_open_resource_timeout = half_open_resource_timeout @error_percent_threshold = error_percent_threshold @@ -144,7 +146,7 @@ def success_count def error_timeout_expired? return false unless @last_error_time - current_time - @last_error_time >= @error_timeout + current_time - @last_error_time >= @error_timeout * 1000 end def push_error(error, time_spent) @@ -160,6 +162,7 @@ def log_state_transition(new_state) str << " success_count=#{success_count} error_count=#{error_count}" str << " success_count_threshold=#{@success_threshold} error_count_percent=#{@error_percent_threshold}" str << " error_timeout=#{@error_timeout} error_last_at=\"#{@last_error_time}\"" + str << " minimum_request_volume=#{@minimum_request_volume} time_window_ms=#{@window.time_window_ms}" str << " name=\"#{@name}\"" Semian.logger.info(str) end diff --git a/lib/semian/time_sliding_window.rb b/lib/semian/time_sliding_window.rb index 090b3dc0..73bd2bd3 100644 --- a/lib/semian/time_sliding_window.rb +++ b/lib/semian/time_sliding_window.rb @@ -6,13 +6,13 @@ class TimeSlidingWindow #:nodoc: extend Forwardable def_delegators :@window, :size, :empty?, :length - attr_reader :time_window_millis + attr_reader :time_window_ms Pair = Struct.new(:head, :tail) # A sliding window is a structure that stores the most recent entries that were pushed within the last slice of time def initialize(time_window, time_source = nil) - @time_window_millis = time_window * 1000 + @time_window_ms = time_window * 1000 @time_source = time_source ? time_source : -> { Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond) } @window = [] end @@ -52,7 +52,7 @@ def last def remove_old return if @window.empty? - midtime = current_time - time_window_millis + midtime = current_time - time_window_ms # special case, everything is too old @window.clear if @window.last.head < midtime # otherwise we find the index position where the cutoff is diff --git a/test/error_rate_circuit_breaker_test.rb b/test/error_rate_circuit_breaker_test.rb index 0300756b..77c9577d 100644 --- a/test/error_rate_circuit_breaker_test.rb +++ b/test/error_rate_circuit_breaker_test.rb @@ -74,6 +74,9 @@ def test_acquire_raises_circuit_open_error_when_the_circuit_is_open @resource.acquire { 1 + 1 } end assert_match(/State transition from closed to open/, @strio.string) + assert_match(/minimum_request_volume=2/, @strio.string) + assert_match(/time_window_ms=2000/, @strio.string) + assert_match(/error_count_percent=0\.5/, @strio.string) end def test_after_error_threshold_the_circuit_is_open diff --git a/test/time_sliding_window_test.rb b/test/time_sliding_window_test.rb index 5e5c968e..254c424c 100644 --- a/test/time_sliding_window_test.rb +++ b/test/time_sliding_window_test.rb @@ -70,6 +70,6 @@ def assert_sliding_window(sliding_window, array, time_window_millis) # each_with_object will remove old entries first data = sliding_window.each_with_object([]) { |v, data| data.append(v) } assert_equal(array, data) - assert_equal(time_window_millis, sliding_window.time_window_millis) + assert_equal(time_window_millis, sliding_window.time_window_ms) end end From 35b47d152bd3f1b4e99c38cdc5ebb4a40bea3fc5 Mon Sep 17 00:00:00 2001 From: Ryan Dearing Date: Wed, 6 May 2020 18:38:13 -0600 Subject: [PATCH 09/11] add accessor for exceptions --- lib/semian/error_rate_circuit_breaker.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/semian/error_rate_circuit_breaker.rb b/lib/semian/error_rate_circuit_breaker.rb index 7486f24d..2b460935 100644 --- a/lib/semian/error_rate_circuit_breaker.rb +++ b/lib/semian/error_rate_circuit_breaker.rb @@ -5,7 +5,7 @@ class ErrorRateCircuitBreaker #:nodoc: def_delegators :@state, :closed?, :open?, :half_open? attr_reader :name, :half_open_resource_timeout, :error_timeout, :state, :last_error, :error_percent_threshold, - :minimum_request_volume, :success_threshold + :minimum_request_volume, :success_threshold, :exceptions def_delegator :@window, :time_window_ms From e2255d74263ed499ba93f5e6881eddbfb0b395c3 Mon Sep 17 00:00:00 2001 From: Ryan Dearing Date: Wed, 22 Jun 2022 16:12:09 -0600 Subject: [PATCH 10/11] code cleanup --- lib/semian.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/semian.rb b/lib/semian.rb index 686fbac8..883e1db6 100644 --- a/lib/semian.rb +++ b/lib/semian.rb @@ -264,7 +264,7 @@ def thread_safe=(thread_safe) def create_error_rate_circuit_breaker(name, **options) require_keys!([:success_threshold, :error_percent_threshold, :error_timeout, - :minimum_request_volume, :time_window], options) + :minimum_request_volume, :time_window], **options) exceptions = options[:exceptions] || [] ErrorRateCircuitBreaker.new(name, @@ -289,7 +289,7 @@ def create_circuit_breaker(name, **options) return create_error_rate_circuit_breaker(name, **options) if type == :error_rate - require_keys!([:success_threshold, :error_threshold, :error_timeout], options) + require_keys!([:success_threshold, :error_threshold, :error_timeout], **options) exceptions = options[:exceptions] || [] CircuitBreaker.new( From 1bdff257f726ffdbffddb9ac50f5c2c026f658d2 Mon Sep 17 00:00:00 2001 From: Ryan Dearing Date: Wed, 22 Jun 2022 16:12:20 -0600 Subject: [PATCH 11/11] TimeSlidingWindow#clear lock in mutex to prevent issues in #remove_old --- lib/semian/time_sliding_window.rb | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/semian/time_sliding_window.rb b/lib/semian/time_sliding_window.rb index 73bd2bd3..e56c0eb2 100644 --- a/lib/semian/time_sliding_window.rb +++ b/lib/semian/time_sliding_window.rb @@ -73,12 +73,9 @@ def initialize(*) @lock = Mutex.new end - # #size, #last, and #clear are not wrapped in a mutex. For the first two, - # the worst-case is a thread-switch at a timing where they'd receive an + # #size, #last are not wrapped in a mutex. The worst-case is a + # thread-switch at a timing where they'd receive an # out-of-date value--which could happen with a mutex as well. - # - # As for clear, it's an all or nothing operation. Doesn't matter if we - # have the lock or not. def count(*) @lock.synchronize { super } @@ -91,6 +88,10 @@ def each_with_object(*) def push(*) @lock.synchronize { super } end + + def clear + @lock.synchronize { super } + end end end end